From f60b689b031505874c25c630e810f85370631a3a Mon Sep 17 00:00:00 2001 From: Winni Neessen Date: Fri, 13 Oct 2023 15:06:28 +0200 Subject: [PATCH] Refactor EML file parsing and header extraction We can no parse simple mails (multipart is not working yet). The existing implementation was made more efficient by refactoring the EML file parsing and header extraction mechanism. Added 'strings' and 'bytes' packages to facilitate these changes. Previously, headers and body were parsed separately which was unnecessarily complex and increased the chance of errors. Now, with the new function 'readEML' and the helper function 'parseEMLBodyParts', we are able to parse headers and body together which not only simplifies the code but also increases its reliability. Specifically, 'bytes.Buffer' now helps us capture body while parsing, which removes need for separate handling. Additionally, certain headers like 'charset' and body types are also accounted for in the new implementation, enhancing the completeness of information extracted from EML files. --- eml.go | 60 ++++++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 42 insertions(+), 18 deletions(-) diff --git a/eml.go b/eml.go index ea1eaf5..59bb0d3 100644 --- a/eml.go +++ b/eml.go @@ -1,11 +1,13 @@ package mail import ( + "bytes" "errors" "fmt" "mime" nm "net/mail" "os" + "strings" ) // EMLToMsg will open an parse a .eml file at a provided file path and return a @@ -18,44 +20,40 @@ func EMLToMsg(fp string) (*Msg, error) { mimever: Mime10, } - pm, err := readEML(fp) + pm, mbbuf, err := readEML(fp) if err != nil || pm == nil { return m, fmt.Errorf("failed to parse EML file: %w", err) } - // Parse the header if err := parseEMLHeaders(&pm.Header, m); err != nil { return m, fmt.Errorf("failed to parse EML headers: %w", err) } - - // Extract the transfer encoding of the body - mi, ar, err := mime.ParseMediaType(pm.Header.Get(HeaderContentType.String())) - if err != nil { - return m, fmt.Errorf("failed to extract content type: %w", err) + if err := parseEMLBodyParts(pm, mbbuf, m); err != nil { + return m, fmt.Errorf("failed to parse EML body parts: %w", err) } - if v, ok := ar["charset"]; ok { - m.SetCharset(Charset(v)) - } - fmt.Printf("Encoding: %s\n", mi) - fmt.Printf("Params: %+v\n", ar) return m, nil } // readEML opens an EML file and uses net/mail to parse the header and body -func readEML(fp string) (*nm.Message, error) { +func readEML(fp string) (*nm.Message, *bytes.Buffer, error) { fh, err := os.Open(fp) if err != nil { - return nil, fmt.Errorf("failed to open EML file: %w", err) + return nil, nil, fmt.Errorf("failed to open EML file: %w", err) } defer func() { _ = fh.Close() }() pm, err := nm.ReadMessage(fh) if err != nil { - return pm, fmt.Errorf("failed to parse EML: %w", err) + return pm, nil, fmt.Errorf("failed to parse EML: %w", err) } - return pm, nil + + buf := bytes.Buffer{} + if _, err = buf.ReadFrom(pm.Body); err != nil { + return nil, nil, err + } + return pm, &buf, nil } // parseEMLHeaders will check the EML headers for the most common headers and set the @@ -64,8 +62,8 @@ func parseEMLHeaders(mh *nm.Header, m *Msg) error { commonHeaders := []Header{ HeaderContentType, HeaderImportance, HeaderInReplyTo, HeaderListUnsubscribe, HeaderListUnsubscribePost, HeaderMessageID, HeaderMIMEVersion, HeaderOrganization, - HeaderPrecedence, HeaderPriority, HeaderSubject, HeaderUserAgent, HeaderXMailer, - HeaderXMSMailPriority, HeaderXPriority, + HeaderPrecedence, HeaderPriority, HeaderReferences, HeaderSubject, HeaderUserAgent, + HeaderXMailer, HeaderXMSMailPriority, HeaderXPriority, } // Extract address headers @@ -118,3 +116,29 @@ func parseEMLHeaders(mh *nm.Header, m *Msg) error { return nil } + +// parseEMLBodyParts ... +func parseEMLBodyParts(pm *nm.Message, mbbuf *bytes.Buffer, m *Msg) error { + // Extract the transfer encoding of the body + mt, par, err := mime.ParseMediaType(pm.Header.Get(HeaderContentType.String())) + if err != nil { + return fmt.Errorf("failed to extract content type: %w", err) + } + if v, ok := par["charset"]; ok { + m.SetCharset(Charset(v)) + } + + if cte := pm.Header.Get(HeaderContentTransferEnc.String()); cte != "" { + switch strings.ToLower(cte) { + case NoEncoding.String(): + m.SetEncoding(NoEncoding) + } + } + + switch strings.ToLower(mt) { + case TypeTextPlain.String(): + m.SetBodyString(TypeTextPlain, mbbuf.String()) + default: + } + return nil +}