go-mail/eml.go
Winni Neessen f60b689b03
Refactor EML file parsing and header extraction
We can no parse simple mails (multipart is not working yet). The existing implementation was made more efficient by refactoring the EML file parsing and header extraction mechanism. Added 'strings' and 'bytes' packages to facilitate these changes. Previously, headers and body were parsed separately which was unnecessarily complex and increased the chance of errors. Now, with the new function 'readEML' and the helper function 'parseEMLBodyParts', we are able to parse headers and body together which not only simplifies the code but also increases its reliability. Specifically, 'bytes.Buffer' now helps us capture body while parsing, which removes need for separate handling. Additionally, certain headers like 'charset' and body types are also accounted for in the new implementation, enhancing the completeness of information extracted from EML files.
2023-10-13 15:06:28 +02:00

144 lines
3.5 KiB
Go

package mail
import (
"bytes"
"errors"
"fmt"
"mime"
nm "net/mail"
"os"
"strings"
)
// EMLToMsg will open an parse a .eml file at a provided file path and return a
// pre-filled Msg pointer
func EMLToMsg(fp string) (*Msg, error) {
m := &Msg{
addrHeader: make(map[AddrHeader][]*nm.Address),
genHeader: make(map[Header][]string),
preformHeader: make(map[Header]string),
mimever: Mime10,
}
pm, mbbuf, err := readEML(fp)
if err != nil || pm == nil {
return m, fmt.Errorf("failed to parse EML file: %w", err)
}
if err := parseEMLHeaders(&pm.Header, m); err != nil {
return m, fmt.Errorf("failed to parse EML headers: %w", err)
}
if err := parseEMLBodyParts(pm, mbbuf, m); err != nil {
return m, fmt.Errorf("failed to parse EML body parts: %w", err)
}
return m, nil
}
// readEML opens an EML file and uses net/mail to parse the header and body
func readEML(fp string) (*nm.Message, *bytes.Buffer, error) {
fh, err := os.Open(fp)
if err != nil {
return nil, nil, fmt.Errorf("failed to open EML file: %w", err)
}
defer func() {
_ = fh.Close()
}()
pm, err := nm.ReadMessage(fh)
if err != nil {
return pm, nil, fmt.Errorf("failed to parse EML: %w", err)
}
buf := bytes.Buffer{}
if _, err = buf.ReadFrom(pm.Body); err != nil {
return nil, nil, err
}
return pm, &buf, nil
}
// parseEMLHeaders will check the EML headers for the most common headers and set the
// according settings in the Msg
func parseEMLHeaders(mh *nm.Header, m *Msg) error {
commonHeaders := []Header{
HeaderContentType, HeaderImportance, HeaderInReplyTo, HeaderListUnsubscribe,
HeaderListUnsubscribePost, HeaderMessageID, HeaderMIMEVersion, HeaderOrganization,
HeaderPrecedence, HeaderPriority, HeaderReferences, HeaderSubject, HeaderUserAgent,
HeaderXMailer, HeaderXMSMailPriority, HeaderXPriority,
}
// Extract address headers
if v := mh.Get(HeaderFrom.String()); v != "" {
if err := m.From(v); err != nil {
return fmt.Errorf(`failed to parse "From:" header: %w`, err)
}
}
ahl := map[AddrHeader]func(...string) error{
HeaderTo: m.To,
HeaderCc: m.Cc,
HeaderBcc: m.Bcc,
}
for h, f := range ahl {
if v := mh.Get(h.String()); v != "" {
var als []string
pal, err := nm.ParseAddressList(v)
if err != nil {
return fmt.Errorf(`failed to parse address list: %w`, err)
}
for _, a := range pal {
als = append(als, a.String())
}
if err := f(als...); err != nil {
return fmt.Errorf(`failed to parse "To:" header: %w`, err)
}
}
}
// Extract date from message
d, err := mh.Date()
if err != nil {
switch {
case errors.Is(err, nm.ErrHeaderNotPresent):
m.SetDate()
default:
return fmt.Errorf("failed to parse EML date: %w", err)
}
}
if err == nil {
m.SetDateWithValue(d)
}
// Extract common headers
for _, h := range commonHeaders {
if v := mh.Get(h.String()); v != "" {
m.SetGenHeader(h, v)
}
}
return nil
}
// parseEMLBodyParts ...
func parseEMLBodyParts(pm *nm.Message, mbbuf *bytes.Buffer, m *Msg) error {
// Extract the transfer encoding of the body
mt, par, err := mime.ParseMediaType(pm.Header.Get(HeaderContentType.String()))
if err != nil {
return fmt.Errorf("failed to extract content type: %w", err)
}
if v, ok := par["charset"]; ok {
m.SetCharset(Charset(v))
}
if cte := pm.Header.Get(HeaderContentTransferEnc.String()); cte != "" {
switch strings.ToLower(cte) {
case NoEncoding.String():
m.SetEncoding(NoEncoding)
}
}
switch strings.ToLower(mt) {
case TypeTextPlain.String():
m.SetBodyString(TypeTextPlain, mbbuf.String())
default:
}
return nil
}