From f751faecc68733b740bc695623bf67ebdec2654a Mon Sep 17 00:00:00 2001 From: iequidoo Date: Tue, 10 Dec 2024 16:04:07 -0300 Subject: [PATCH] fix: Render "message" parts in multipart messages' HTML (#4462) This fixes the HTML display of messages containing forwarded messages. Before, forwarded messages weren't rendered in HTML and if a forwarded message is long and therefore truncated in the chat, it could only be seen in the "Message Info". In #4462 it was suggested to display "Show Full Message..." for each truncated message part and save to `msgs.mime_headers` only the corresponding part, but this is a quite huge change and refactoring and also it may be good that currently we save the full message structure to `msgs.mime_headers`, so i'd suggest not to change this for now. --- src/html.rs | 78 +++++++++++++++++++++++++++------------- src/receive_imf/tests.rs | 51 ++++++++++++++++++-------- 2 files changed, 89 insertions(+), 40 deletions(-) diff --git a/src/html.rs b/src/html.rs index e32ee43575..c58f099c83 100644 --- a/src/html.rs +++ b/src/html.rs @@ -7,6 +7,8 @@ //! `MsgId.get_html()` will return HTML - //! this allows nice quoting, handling linebreaks properly etc. +use std::mem; + use anyhow::{Context as _, Result}; use base64::Engine as _; use lettre_email::mime::Mime; @@ -77,21 +79,26 @@ fn get_mime_multipart_type(ctype: &ParsedContentType) -> MimeMultipartType { struct HtmlMsgParser { pub html: String, pub plain: Option, + pub(crate) msg_html: String, } impl HtmlMsgParser { /// Function takes a raw mime-message string, /// searches for the main-text part /// and returns that as parser.html - pub async fn from_bytes(context: &Context, rawmime: &[u8]) -> Result<Self> { + pub async fn from_bytes<'a>( + context: &Context, + rawmime: &'a [u8], + ) -> Result<(Self, mailparse::ParsedMail<'a>)> { let mut parser = HtmlMsgParser { html: "".to_string(), plain: None, + msg_html: "".to_string(), }; - let parsedmail = mailparse::parse_mail(rawmime)?; + let parsedmail = mailparse::parse_mail(rawmime).context("Failed to parse mail")?; - parser.collect_texts_recursive(&parsedmail).await?; + parser.collect_texts_recursive(context, &parsedmail).await?; if parser.html.is_empty() { if let Some(plain) = &parser.plain { @@ -100,8 +107,8 @@ impl HtmlMsgParser { } else { parser.cid_to_data_recursive(context, &parsedmail).await?; } - - Ok(parser) + parser.html += &mem::take(&mut parser.msg_html); + Ok((parser, parsedmail)) } /// Function iterates over all mime-parts @@ -114,12 +121,13 @@ impl HtmlMsgParser { /// therefore we use the first one. async fn collect_texts_recursive<'a>( &'a mut self, + context: &'a Context, mail: &'a mailparse::ParsedMail<'a>, ) -> Result<()> { match get_mime_multipart_type(&mail.ctype) { MimeMultipartType::Multiple => { for cur_data in &mail.subparts { - Box::pin(self.collect_texts_recursive(cur_data)).await? + Box::pin(self.collect_texts_recursive(context, cur_data)).await? } Ok(()) } @@ -128,8 +136,35 @@ impl HtmlMsgParser { if raw.is_empty() { return Ok(()); } - let mail = mailparse::parse_mail(&raw).context("failed to parse mail")?; - Box::pin(self.collect_texts_recursive(&mail)).await + let (parser, mail) = Box::pin(HtmlMsgParser::from_bytes(context, &raw)).await?; + if !parser.html.is_empty() { + let mut text = "\r\n\r\n".to_string(); + for h in mail.headers { + let key = h.get_key(); + if matches!( + key.to_lowercase().as_str(), + "date" + | "from" + | "sender" + | "reply-to" + | "to" + | "cc" + | "bcc" + | "subject" + ) { + text += &format!("{key}: {}\r\n", h.get_value()); + } + } + text += "\r\n"; + self.msg_html += &PlainText { + text, + flowed: false, + delsp: false, + } + .to_html(); + self.msg_html += &parser.html; + } + Ok(()) } MimeMultipartType::Single => { let mimetype = mail.ctype.mimetype.parse::<Mime>()?; @@ -175,14 +210,7 @@ impl HtmlMsgParser { } Ok(()) } - MimeMultipartType::Message => { - let raw = mail.get_body_raw()?; - if raw.is_empty() { - return Ok(()); - } - let mail = mailparse::parse_mail(&raw).context("failed to parse mail")?; - Box::pin(self.cid_to_data_recursive(context, &mail)).await - } + MimeMultipartType::Message => Ok(()), MimeMultipartType::Single => { let mimetype = mail.ctype.mimetype.parse::<Mime>()?; if mimetype.type_() == mime::IMAGE { @@ -240,7 +268,7 @@ impl MsgId { warn!(context, "get_html: parser error: {:#}", err); Ok(None) } - Ok(parser) => Ok(Some(parser.html)), + Ok((parser, _)) => Ok(Some(parser.html)), } } else { warn!(context, "get_html: no mime for {}", self); @@ -274,7 +302,7 @@ mod tests { async fn test_htmlparse_plain_unspecified() { let t = TestContext::new().await; let raw = include_bytes!("../test-data/message/text_plain_unspecified.eml"); - let parser = HtmlMsgParser::from_bytes(&t.ctx, raw).await.unwrap(); + let (parser, _) = HtmlMsgParser::from_bytes(&t.ctx, raw).await.unwrap(); assert_eq!( parser.html, r#"<!DOCTYPE html> @@ -292,7 +320,7 @@ This message does not have Content-Type nor Subject.<br/> async fn test_htmlparse_plain_iso88591() { let t = TestContext::new().await; let raw = include_bytes!("../test-data/message/text_plain_iso88591.eml"); - let parser = HtmlMsgParser::from_bytes(&t.ctx, raw).await.unwrap(); + let (parser, _) = HtmlMsgParser::from_bytes(&t.ctx, raw).await.unwrap(); assert_eq!( parser.html, r#"<!DOCTYPE html> @@ -310,7 +338,7 @@ message with a non-UTF-8 encoding: äöüßÄÖÜ<br/> async fn test_htmlparse_plain_flowed() { let t = TestContext::new().await; let raw = include_bytes!("../test-data/message/text_plain_flowed.eml"); - let parser = HtmlMsgParser::from_bytes(&t.ctx, raw).await.unwrap(); + let (parser, _) = HtmlMsgParser::from_bytes(&t.ctx, raw).await.unwrap(); assert!(parser.plain.unwrap().flowed); assert_eq!( parser.html, @@ -332,7 +360,7 @@ and will be wrapped as usual.<br/> async fn test_htmlparse_alt_plain() { let t = TestContext::new().await; let raw = include_bytes!("../test-data/message/text_alt_plain.eml"); - let parser = HtmlMsgParser::from_bytes(&t.ctx, raw).await.unwrap(); + let (parser, _) = HtmlMsgParser::from_bytes(&t.ctx, raw).await.unwrap(); assert_eq!( parser.html, r#"<!DOCTYPE html> @@ -353,7 +381,7 @@ test some special html-characters as &lt; &gt; and &amp; but also &quot; and &#x async fn test_htmlparse_html() { let t = TestContext::new().await; let raw = include_bytes!("../test-data/message/text_html.eml"); - let parser = HtmlMsgParser::from_bytes(&t.ctx, raw).await.unwrap(); + let (parser, _) = HtmlMsgParser::from_bytes(&t.ctx, raw).await.unwrap(); // on windows, `\r\n` linends are returned from mimeparser, // however, rust multiline-strings use just `\n`; @@ -371,7 +399,7 @@ test some special html-characters as &lt; &gt; and &amp; but also &quot; and &#x async fn test_htmlparse_alt_html() { let t = TestContext::new().await; let raw = include_bytes!("../test-data/message/text_alt_html.eml"); - let parser = HtmlMsgParser::from_bytes(&t.ctx, raw).await.unwrap(); + let (parser, _) = HtmlMsgParser::from_bytes(&t.ctx, raw).await.unwrap(); assert_eq!( parser.html.replace('\r', ""), // see comment in test_htmlparse_html() r##"<html> @@ -386,7 +414,7 @@ test some special html-characters as &lt; &gt; and &amp; but also &quot; and &#x async fn test_htmlparse_alt_plain_html() { let t = TestContext::new().await; let raw = include_bytes!("../test-data/message/text_alt_plain_html.eml"); - let parser = HtmlMsgParser::from_bytes(&t.ctx, raw).await.unwrap(); + let (parser, _) = HtmlMsgParser::from_bytes(&t.ctx, raw).await.unwrap(); assert_eq!( parser.html.replace('\r', ""), // see comment in test_htmlparse_html() r##"<html> @@ -411,7 +439,7 @@ test some special html-characters as &lt; &gt; and &amp; but also &quot; and &#x assert!(test.find("data:").is_none()); // parsing converts cid: to data: - let parser = HtmlMsgParser::from_bytes(&t.ctx, raw).await.unwrap(); + let (parser, _) = HtmlMsgParser::from_bytes(&t.ctx, raw).await.unwrap(); assert!(parser.html.contains("<html>")); assert!(!parser.html.contains("Content-Id:")); assert!(parser.html.contains("data:image/jpeg;base64,/9j/4AAQ")); diff --git a/src/receive_imf/tests.rs b/src/receive_imf/tests.rs index a9e9dfabf4..0bb83ceb95 100644 --- a/src/receive_imf/tests.rs +++ b/src/receive_imf/tests.rs @@ -3834,30 +3834,51 @@ async fn test_big_forwarded_with_big_attachment() -> Result<()> { let raw = include_bytes!("../../test-data/message/big_forwarded_with_big_attachment.eml"); let rcvd = receive_imf(t, raw, false).await?.unwrap(); assert_eq!(rcvd.msg_ids.len(), 3); + let msg = Message::load_from_db(t, rcvd.msg_ids[0]).await?; assert_eq!(msg.get_viewtype(), Viewtype::Text); assert_eq!(msg.get_text(), "Hello!"); - // Wrong: the second bubble's text is truncated, but "Show Full Message..." is going to be shown - // in the first message bubble in the UIs. - assert_eq!( - msg.id - .get_html(t) - .await? - .unwrap() - .matches("Hello!") - .count(), - 1 - ); + assert!(!msg.has_html()); + let msg = Message::load_from_db(t, rcvd.msg_ids[1]).await?; assert_eq!(msg.get_viewtype(), Viewtype::Text); - assert!(msg.get_text().starts_with("this text with 42 chars is just repeated.")); + assert!(msg + .get_text() + .starts_with("this text with 42 chars is just repeated.")); assert!(msg.get_text().ends_with("[...]")); - // Wrong: the text is truncated, but it's not possible to see the full text in HTML. assert!(!msg.has_html()); + let msg = Message::load_from_db(t, rcvd.msg_ids[2]).await?; assert_eq!(msg.get_viewtype(), Viewtype::File); - assert!(!msg.has_html()); - + assert!(msg.has_html()); + let html = msg.id.get_html(t).await?.unwrap(); + let tail = html + .split_once("Hello!") + .unwrap() + .1 + .split_once("From: AAA") + .unwrap() + .1 + .split_once("aaa@example.org") + .unwrap() + .1 + .split_once("To: Alice") + .unwrap() + .1 + .split_once("alice@example.org") + .unwrap() + .1 + .split_once("Subject: Some subject") + .unwrap() + .1 + .split_once("Date: Fri, 2 Jun 2023 12:29:17 +0000") + .unwrap() + .1; + assert_eq!( + tail.matches("this text with 42 chars is just repeated.") + .count(), + 128 + ); Ok(()) }