mirror of
https://github.com/wneessen/go-mail.git
synced 2024-11-22 05:40:50 +01:00
Refactor EML file parsing and header extraction
We can no parse simple mails (multipart is not working yet). The existing implementation was made more efficient by refactoring the EML file parsing and header extraction mechanism. Added 'strings' and 'bytes' packages to facilitate these changes. Previously, headers and body were parsed separately which was unnecessarily complex and increased the chance of errors. Now, with the new function 'readEML' and the helper function 'parseEMLBodyParts', we are able to parse headers and body together which not only simplifies the code but also increases its reliability. Specifically, 'bytes.Buffer' now helps us capture body while parsing, which removes need for separate handling. Additionally, certain headers like 'charset' and body types are also accounted for in the new implementation, enhancing the completeness of information extracted from EML files.
This commit is contained in:
parent
3d50370a4c
commit
f60b689b03
1 changed files with 42 additions and 18 deletions
60
eml.go
60
eml.go
|
@ -1,11 +1,13 @@
|
|||
package mail
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"fmt"
|
||||
"mime"
|
||||
nm "net/mail"
|
||||
"os"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// EMLToMsg will open an parse a .eml file at a provided file path and return a
|
||||
|
@ -18,44 +20,40 @@ func EMLToMsg(fp string) (*Msg, error) {
|
|||
mimever: Mime10,
|
||||
}
|
||||
|
||||
pm, err := readEML(fp)
|
||||
pm, mbbuf, err := readEML(fp)
|
||||
if err != nil || pm == nil {
|
||||
return m, fmt.Errorf("failed to parse EML file: %w", err)
|
||||
}
|
||||
|
||||
// Parse the header
|
||||
if err := parseEMLHeaders(&pm.Header, m); err != nil {
|
||||
return m, fmt.Errorf("failed to parse EML headers: %w", err)
|
||||
}
|
||||
|
||||
// Extract the transfer encoding of the body
|
||||
mi, ar, err := mime.ParseMediaType(pm.Header.Get(HeaderContentType.String()))
|
||||
if err != nil {
|
||||
return m, fmt.Errorf("failed to extract content type: %w", err)
|
||||
if err := parseEMLBodyParts(pm, mbbuf, m); err != nil {
|
||||
return m, fmt.Errorf("failed to parse EML body parts: %w", err)
|
||||
}
|
||||
if v, ok := ar["charset"]; ok {
|
||||
m.SetCharset(Charset(v))
|
||||
}
|
||||
fmt.Printf("Encoding: %s\n", mi)
|
||||
fmt.Printf("Params: %+v\n", ar)
|
||||
|
||||
return m, nil
|
||||
}
|
||||
|
||||
// readEML opens an EML file and uses net/mail to parse the header and body
|
||||
func readEML(fp string) (*nm.Message, error) {
|
||||
func readEML(fp string) (*nm.Message, *bytes.Buffer, error) {
|
||||
fh, err := os.Open(fp)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to open EML file: %w", err)
|
||||
return nil, nil, fmt.Errorf("failed to open EML file: %w", err)
|
||||
}
|
||||
defer func() {
|
||||
_ = fh.Close()
|
||||
}()
|
||||
pm, err := nm.ReadMessage(fh)
|
||||
if err != nil {
|
||||
return pm, fmt.Errorf("failed to parse EML: %w", err)
|
||||
return pm, nil, fmt.Errorf("failed to parse EML: %w", err)
|
||||
}
|
||||
return pm, nil
|
||||
|
||||
buf := bytes.Buffer{}
|
||||
if _, err = buf.ReadFrom(pm.Body); err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
return pm, &buf, nil
|
||||
}
|
||||
|
||||
// parseEMLHeaders will check the EML headers for the most common headers and set the
|
||||
|
@ -64,8 +62,8 @@ func parseEMLHeaders(mh *nm.Header, m *Msg) error {
|
|||
commonHeaders := []Header{
|
||||
HeaderContentType, HeaderImportance, HeaderInReplyTo, HeaderListUnsubscribe,
|
||||
HeaderListUnsubscribePost, HeaderMessageID, HeaderMIMEVersion, HeaderOrganization,
|
||||
HeaderPrecedence, HeaderPriority, HeaderSubject, HeaderUserAgent, HeaderXMailer,
|
||||
HeaderXMSMailPriority, HeaderXPriority,
|
||||
HeaderPrecedence, HeaderPriority, HeaderReferences, HeaderSubject, HeaderUserAgent,
|
||||
HeaderXMailer, HeaderXMSMailPriority, HeaderXPriority,
|
||||
}
|
||||
|
||||
// Extract address headers
|
||||
|
@ -118,3 +116,29 @@ func parseEMLHeaders(mh *nm.Header, m *Msg) error {
|
|||
|
||||
return nil
|
||||
}
|
||||
|
||||
// parseEMLBodyParts ...
|
||||
func parseEMLBodyParts(pm *nm.Message, mbbuf *bytes.Buffer, m *Msg) error {
|
||||
// Extract the transfer encoding of the body
|
||||
mt, par, err := mime.ParseMediaType(pm.Header.Get(HeaderContentType.String()))
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to extract content type: %w", err)
|
||||
}
|
||||
if v, ok := par["charset"]; ok {
|
||||
m.SetCharset(Charset(v))
|
||||
}
|
||||
|
||||
if cte := pm.Header.Get(HeaderContentTransferEnc.String()); cte != "" {
|
||||
switch strings.ToLower(cte) {
|
||||
case NoEncoding.String():
|
||||
m.SetEncoding(NoEncoding)
|
||||
}
|
||||
}
|
||||
|
||||
switch strings.ToLower(mt) {
|
||||
case TypeTextPlain.String():
|
||||
m.SetBodyString(TypeTextPlain, mbbuf.String())
|
||||
default:
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue