mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
When the name of an Address contains non-ASCII characters, Address.String() used mime.QEncoding to encode the name. However certain characters are forbidden when an encoded-word is in a phrase context (see RFC 2047 section 5.3) and these characters are not encoded by mime.QEncoding. In this case we now use mime.BEncoding (base64 encoding) so that forbidden characters are also encoded. Fixes #11292 Change-Id: I52db98b41ece439295e97d7e94c8190426f499c2 Reviewed-on: https://go-review.googlesource.com/16012 Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org> Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
590 lines
14 KiB
Go
590 lines
14 KiB
Go
// Copyright 2011 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
/*
|
|
Package mail implements parsing of mail messages.
|
|
|
|
For the most part, this package follows the syntax as specified by RFC 5322.
|
|
Notable divergences:
|
|
* Obsolete address formats are not parsed, including addresses with
|
|
embedded route information.
|
|
* Group addresses are not parsed.
|
|
* The full range of spacing (the CFWS syntax element) is not supported,
|
|
such as breaking addresses across lines.
|
|
*/
|
|
package mail
|
|
|
|
import (
|
|
"bufio"
|
|
"bytes"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"log"
|
|
"mime"
|
|
"net/textproto"
|
|
"strings"
|
|
"time"
|
|
)
|
|
|
|
var debug = debugT(false)
|
|
|
|
type debugT bool
|
|
|
|
func (d debugT) Printf(format string, args ...interface{}) {
|
|
if d {
|
|
log.Printf(format, args...)
|
|
}
|
|
}
|
|
|
|
// A Message represents a parsed mail message.
|
|
type Message struct {
|
|
Header Header
|
|
Body io.Reader
|
|
}
|
|
|
|
// ReadMessage reads a message from r.
|
|
// The headers are parsed, and the body of the message will be available
|
|
// for reading from r.
|
|
func ReadMessage(r io.Reader) (msg *Message, err error) {
|
|
tp := textproto.NewReader(bufio.NewReader(r))
|
|
|
|
hdr, err := tp.ReadMIMEHeader()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return &Message{
|
|
Header: Header(hdr),
|
|
Body: tp.R,
|
|
}, nil
|
|
}
|
|
|
|
// Layouts suitable for passing to time.Parse.
|
|
// These are tried in order.
|
|
var dateLayouts []string
|
|
|
|
func init() {
|
|
// Generate layouts based on RFC 5322, section 3.3.
|
|
|
|
dows := [...]string{"", "Mon, "} // day-of-week
|
|
days := [...]string{"2", "02"} // day = 1*2DIGIT
|
|
years := [...]string{"2006", "06"} // year = 4*DIGIT / 2*DIGIT
|
|
seconds := [...]string{":05", ""} // second
|
|
// "-0700 (MST)" is not in RFC 5322, but is common.
|
|
zones := [...]string{"-0700", "MST", "-0700 (MST)"} // zone = (("+" / "-") 4DIGIT) / "GMT" / ...
|
|
|
|
for _, dow := range dows {
|
|
for _, day := range days {
|
|
for _, year := range years {
|
|
for _, second := range seconds {
|
|
for _, zone := range zones {
|
|
s := dow + day + " Jan " + year + " 15:04" + second + " " + zone
|
|
dateLayouts = append(dateLayouts, s)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
func parseDate(date string) (time.Time, error) {
|
|
for _, layout := range dateLayouts {
|
|
t, err := time.Parse(layout, date)
|
|
if err == nil {
|
|
return t, nil
|
|
}
|
|
}
|
|
return time.Time{}, errors.New("mail: header could not be parsed")
|
|
}
|
|
|
|
// A Header represents the key-value pairs in a mail message header.
|
|
type Header map[string][]string
|
|
|
|
// Get gets the first value associated with the given key.
|
|
// If there are no values associated with the key, Get returns "".
|
|
func (h Header) Get(key string) string {
|
|
return textproto.MIMEHeader(h).Get(key)
|
|
}
|
|
|
|
var ErrHeaderNotPresent = errors.New("mail: header not in message")
|
|
|
|
// Date parses the Date header field.
|
|
func (h Header) Date() (time.Time, error) {
|
|
hdr := h.Get("Date")
|
|
if hdr == "" {
|
|
return time.Time{}, ErrHeaderNotPresent
|
|
}
|
|
return parseDate(hdr)
|
|
}
|
|
|
|
// AddressList parses the named header field as a list of addresses.
|
|
func (h Header) AddressList(key string) ([]*Address, error) {
|
|
hdr := h.Get(key)
|
|
if hdr == "" {
|
|
return nil, ErrHeaderNotPresent
|
|
}
|
|
return ParseAddressList(hdr)
|
|
}
|
|
|
|
// Address represents a single mail address.
|
|
// An address such as "Barry Gibbs <bg@example.com>" is represented
|
|
// as Address{Name: "Barry Gibbs", Address: "bg@example.com"}.
|
|
type Address struct {
|
|
Name string // Proper name; may be empty.
|
|
Address string // user@domain
|
|
}
|
|
|
|
// Parses a single RFC 5322 address, e.g. "Barry Gibbs <bg@example.com>"
|
|
func ParseAddress(address string) (*Address, error) {
|
|
return (&addrParser{s: address}).parseAddress()
|
|
}
|
|
|
|
// ParseAddressList parses the given string as a list of addresses.
|
|
func ParseAddressList(list string) ([]*Address, error) {
|
|
return (&addrParser{s: list}).parseAddressList()
|
|
}
|
|
|
|
// An AddressParser is an RFC 5322 address parser.
|
|
type AddressParser struct {
|
|
// WordDecoder optionally specifies a decoder for RFC 2047 encoded-words.
|
|
WordDecoder *mime.WordDecoder
|
|
}
|
|
|
|
// Parse parses a single RFC 5322 address of the
|
|
// form "Gogh Fir <gf@example.com>" or "foo@example.com".
|
|
func (p *AddressParser) Parse(address string) (*Address, error) {
|
|
return (&addrParser{s: address, dec: p.WordDecoder}).parseAddress()
|
|
}
|
|
|
|
// ParseList parses the given string as a list of comma-separated addresses
|
|
// of the form "Gogh Fir <gf@example.com>" or "foo@example.com".
|
|
func (p *AddressParser) ParseList(list string) ([]*Address, error) {
|
|
return (&addrParser{s: list, dec: p.WordDecoder}).parseAddressList()
|
|
}
|
|
|
|
// String formats the address as a valid RFC 5322 address.
|
|
// If the address's name contains non-ASCII characters
|
|
// the name will be rendered according to RFC 2047.
|
|
func (a *Address) String() string {
|
|
|
|
// Format address local@domain
|
|
at := strings.LastIndex(a.Address, "@")
|
|
var local, domain string
|
|
if at < 0 {
|
|
// This is a malformed address ("@" is required in addr-spec);
|
|
// treat the whole address as local-part.
|
|
local = a.Address
|
|
} else {
|
|
local, domain = a.Address[:at], a.Address[at+1:]
|
|
}
|
|
|
|
// Add quotes if needed
|
|
// TODO: rendering quoted local part and rendering printable name
|
|
// should be merged in helper function.
|
|
quoteLocal := false
|
|
for i := 0; i < len(local); i++ {
|
|
ch := local[i]
|
|
if isAtext(ch, false) {
|
|
continue
|
|
}
|
|
if ch == '.' {
|
|
// Dots are okay if they are surrounded by atext.
|
|
// We only need to check that the previous byte is
|
|
// not a dot, and this isn't the end of the string.
|
|
if i > 0 && local[i-1] != '.' && i < len(local)-1 {
|
|
continue
|
|
}
|
|
}
|
|
quoteLocal = true
|
|
break
|
|
}
|
|
if quoteLocal {
|
|
local = quoteString(local)
|
|
|
|
}
|
|
|
|
s := "<" + local + "@" + domain + ">"
|
|
|
|
if a.Name == "" {
|
|
return s
|
|
}
|
|
|
|
// If every character is printable ASCII, quoting is simple.
|
|
allPrintable := true
|
|
for i := 0; i < len(a.Name); i++ {
|
|
// isWSP here should actually be isFWS,
|
|
// but we don't support folding yet.
|
|
if !isVchar(a.Name[i]) && !isWSP(a.Name[i]) {
|
|
allPrintable = false
|
|
break
|
|
}
|
|
}
|
|
if allPrintable {
|
|
b := bytes.NewBufferString(`"`)
|
|
for i := 0; i < len(a.Name); i++ {
|
|
if !isQtext(a.Name[i]) && !isWSP(a.Name[i]) {
|
|
b.WriteByte('\\')
|
|
}
|
|
b.WriteByte(a.Name[i])
|
|
}
|
|
b.WriteString(`" `)
|
|
b.WriteString(s)
|
|
return b.String()
|
|
}
|
|
|
|
// Text in an encoded-word in a display-name must not contain certain
|
|
// characters like quotes or parentheses (see RFC 2047 section 5.3).
|
|
// When this is the case encode the name using base64 encoding.
|
|
if strings.ContainsAny(a.Name, "\"#$%&'(),.:;<>@[]^`{|}~") {
|
|
return mime.BEncoding.Encode("utf-8", a.Name) + " " + s
|
|
}
|
|
return mime.QEncoding.Encode("utf-8", a.Name) + " " + s
|
|
}
|
|
|
|
type addrParser struct {
|
|
s string
|
|
dec *mime.WordDecoder // may be nil
|
|
}
|
|
|
|
func (p *addrParser) parseAddressList() ([]*Address, error) {
|
|
var list []*Address
|
|
for {
|
|
p.skipSpace()
|
|
addr, err := p.parseAddress()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
list = append(list, addr)
|
|
|
|
p.skipSpace()
|
|
if p.empty() {
|
|
break
|
|
}
|
|
if !p.consume(',') {
|
|
return nil, errors.New("mail: expected comma")
|
|
}
|
|
}
|
|
return list, nil
|
|
}
|
|
|
|
// parseAddress parses a single RFC 5322 address at the start of p.
|
|
func (p *addrParser) parseAddress() (addr *Address, err error) {
|
|
debug.Printf("parseAddress: %q", p.s)
|
|
p.skipSpace()
|
|
if p.empty() {
|
|
return nil, errors.New("mail: no address")
|
|
}
|
|
|
|
// address = name-addr / addr-spec
|
|
// TODO(dsymonds): Support parsing group address.
|
|
|
|
// addr-spec has a more restricted grammar than name-addr,
|
|
// so try parsing it first, and fallback to name-addr.
|
|
// TODO(dsymonds): Is this really correct?
|
|
spec, err := p.consumeAddrSpec()
|
|
if err == nil {
|
|
return &Address{
|
|
Address: spec,
|
|
}, err
|
|
}
|
|
debug.Printf("parseAddress: not an addr-spec: %v", err)
|
|
debug.Printf("parseAddress: state is now %q", p.s)
|
|
|
|
// display-name
|
|
var displayName string
|
|
if p.peek() != '<' {
|
|
displayName, err = p.consumePhrase()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
debug.Printf("parseAddress: displayName=%q", displayName)
|
|
|
|
// angle-addr = "<" addr-spec ">"
|
|
p.skipSpace()
|
|
if !p.consume('<') {
|
|
return nil, errors.New("mail: no angle-addr")
|
|
}
|
|
spec, err = p.consumeAddrSpec()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if !p.consume('>') {
|
|
return nil, errors.New("mail: unclosed angle-addr")
|
|
}
|
|
debug.Printf("parseAddress: spec=%q", spec)
|
|
|
|
return &Address{
|
|
Name: displayName,
|
|
Address: spec,
|
|
}, nil
|
|
}
|
|
|
|
// consumeAddrSpec parses a single RFC 5322 addr-spec at the start of p.
|
|
func (p *addrParser) consumeAddrSpec() (spec string, err error) {
|
|
debug.Printf("consumeAddrSpec: %q", p.s)
|
|
|
|
orig := *p
|
|
defer func() {
|
|
if err != nil {
|
|
*p = orig
|
|
}
|
|
}()
|
|
|
|
// local-part = dot-atom / quoted-string
|
|
var localPart string
|
|
p.skipSpace()
|
|
if p.empty() {
|
|
return "", errors.New("mail: no addr-spec")
|
|
}
|
|
if p.peek() == '"' {
|
|
// quoted-string
|
|
debug.Printf("consumeAddrSpec: parsing quoted-string")
|
|
localPart, err = p.consumeQuotedString()
|
|
} else {
|
|
// dot-atom
|
|
debug.Printf("consumeAddrSpec: parsing dot-atom")
|
|
localPart, err = p.consumeAtom(true, false)
|
|
}
|
|
if err != nil {
|
|
debug.Printf("consumeAddrSpec: failed: %v", err)
|
|
return "", err
|
|
}
|
|
|
|
if !p.consume('@') {
|
|
return "", errors.New("mail: missing @ in addr-spec")
|
|
}
|
|
|
|
// domain = dot-atom / domain-literal
|
|
var domain string
|
|
p.skipSpace()
|
|
if p.empty() {
|
|
return "", errors.New("mail: no domain in addr-spec")
|
|
}
|
|
// TODO(dsymonds): Handle domain-literal
|
|
domain, err = p.consumeAtom(true, false)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
|
|
return localPart + "@" + domain, nil
|
|
}
|
|
|
|
// consumePhrase parses the RFC 5322 phrase at the start of p.
|
|
func (p *addrParser) consumePhrase() (phrase string, err error) {
|
|
debug.Printf("consumePhrase: [%s]", p.s)
|
|
// phrase = 1*word
|
|
var words []string
|
|
for {
|
|
// word = atom / quoted-string
|
|
var word string
|
|
p.skipSpace()
|
|
if p.empty() {
|
|
return "", errors.New("mail: missing phrase")
|
|
}
|
|
if p.peek() == '"' {
|
|
// quoted-string
|
|
word, err = p.consumeQuotedString()
|
|
} else {
|
|
// atom
|
|
// We actually parse dot-atom here to be more permissive
|
|
// than what RFC 5322 specifies.
|
|
word, err = p.consumeAtom(true, true)
|
|
}
|
|
|
|
if err == nil {
|
|
word, err = p.decodeRFC2047Word(word)
|
|
}
|
|
|
|
if err != nil {
|
|
break
|
|
}
|
|
debug.Printf("consumePhrase: consumed %q", word)
|
|
words = append(words, word)
|
|
}
|
|
// Ignore any error if we got at least one word.
|
|
if err != nil && len(words) == 0 {
|
|
debug.Printf("consumePhrase: hit err: %v", err)
|
|
return "", fmt.Errorf("mail: missing word in phrase: %v", err)
|
|
}
|
|
phrase = strings.Join(words, " ")
|
|
return phrase, nil
|
|
}
|
|
|
|
// consumeQuotedString parses the quoted string at the start of p.
|
|
func (p *addrParser) consumeQuotedString() (qs string, err error) {
|
|
// Assume first byte is '"'.
|
|
i := 1
|
|
qsb := make([]byte, 0, 10)
|
|
Loop:
|
|
for {
|
|
if i >= p.len() {
|
|
return "", errors.New("mail: unclosed quoted-string")
|
|
}
|
|
switch c := p.s[i]; {
|
|
case c == '"':
|
|
break Loop
|
|
case c == '\\':
|
|
if i+1 == p.len() {
|
|
return "", errors.New("mail: unclosed quoted-string")
|
|
}
|
|
qsb = append(qsb, p.s[i+1])
|
|
i += 2
|
|
case isQtext(c), c == ' ':
|
|
// qtext (printable US-ASCII excluding " and \), or
|
|
// FWS (almost; we're ignoring CRLF)
|
|
qsb = append(qsb, c)
|
|
i++
|
|
default:
|
|
return "", fmt.Errorf("mail: bad character in quoted-string: %q", c)
|
|
}
|
|
}
|
|
p.s = p.s[i+1:]
|
|
if len(qsb) == 0 {
|
|
return "", errors.New("mail: empty quoted-string")
|
|
}
|
|
return string(qsb), nil
|
|
}
|
|
|
|
var errNonASCII = errors.New("mail: unencoded non-ASCII text in address")
|
|
|
|
// consumeAtom parses an RFC 5322 atom at the start of p.
|
|
// If dot is true, consumeAtom parses an RFC 5322 dot-atom instead.
|
|
// If permissive is true, consumeAtom will not fail on
|
|
// leading/trailing/double dots in the atom (see golang.org/issue/4938).
|
|
func (p *addrParser) consumeAtom(dot bool, permissive bool) (atom string, err error) {
|
|
if c := p.peek(); !isAtext(c, false) {
|
|
if c > 127 {
|
|
return "", errNonASCII
|
|
}
|
|
return "", errors.New("mail: invalid string")
|
|
}
|
|
i := 1
|
|
for ; i < p.len() && isAtext(p.s[i], dot); i++ {
|
|
}
|
|
if i < p.len() && p.s[i] > 127 {
|
|
return "", errNonASCII
|
|
}
|
|
atom, p.s = string(p.s[:i]), p.s[i:]
|
|
if !permissive {
|
|
if strings.HasPrefix(atom, ".") {
|
|
return "", errors.New("mail: leading dot in atom")
|
|
}
|
|
if strings.Contains(atom, "..") {
|
|
return "", errors.New("mail: double dot in atom")
|
|
}
|
|
if strings.HasSuffix(atom, ".") {
|
|
return "", errors.New("mail: trailing dot in atom")
|
|
}
|
|
}
|
|
return atom, nil
|
|
}
|
|
|
|
func (p *addrParser) consume(c byte) bool {
|
|
if p.empty() || p.peek() != c {
|
|
return false
|
|
}
|
|
p.s = p.s[1:]
|
|
return true
|
|
}
|
|
|
|
// skipSpace skips the leading space and tab characters.
|
|
func (p *addrParser) skipSpace() {
|
|
p.s = strings.TrimLeft(p.s, " \t")
|
|
}
|
|
|
|
func (p *addrParser) peek() byte {
|
|
return p.s[0]
|
|
}
|
|
|
|
func (p *addrParser) empty() bool {
|
|
return p.len() == 0
|
|
}
|
|
|
|
func (p *addrParser) len() int {
|
|
return len(p.s)
|
|
}
|
|
|
|
func (p *addrParser) decodeRFC2047Word(s string) (string, error) {
|
|
if p.dec != nil {
|
|
return p.dec.DecodeHeader(s)
|
|
}
|
|
|
|
dec, err := rfc2047Decoder.Decode(s)
|
|
if err == nil {
|
|
return dec, nil
|
|
}
|
|
|
|
if _, ok := err.(charsetError); ok {
|
|
return s, err
|
|
}
|
|
|
|
// Ignore invalid RFC 2047 encoded-word errors.
|
|
return s, nil
|
|
}
|
|
|
|
var rfc2047Decoder = mime.WordDecoder{
|
|
CharsetReader: func(charset string, input io.Reader) (io.Reader, error) {
|
|
return nil, charsetError(charset)
|
|
},
|
|
}
|
|
|
|
type charsetError string
|
|
|
|
func (e charsetError) Error() string {
|
|
return fmt.Sprintf("charset not supported: %q", string(e))
|
|
}
|
|
|
|
var atextChars = []byte("ABCDEFGHIJKLMNOPQRSTUVWXYZ" +
|
|
"abcdefghijklmnopqrstuvwxyz" +
|
|
"0123456789" +
|
|
"!#$%&'*+-/=?^_`{|}~")
|
|
|
|
// isAtext reports whether c is an RFC 5322 atext character.
|
|
// If dot is true, period is included.
|
|
func isAtext(c byte, dot bool) bool {
|
|
if dot && c == '.' {
|
|
return true
|
|
}
|
|
return bytes.IndexByte(atextChars, c) >= 0
|
|
}
|
|
|
|
// isQtext reports whether c is an RFC 5322 qtext character.
|
|
func isQtext(c byte) bool {
|
|
// Printable US-ASCII, excluding backslash or quote.
|
|
if c == '\\' || c == '"' {
|
|
return false
|
|
}
|
|
return '!' <= c && c <= '~'
|
|
}
|
|
|
|
// quoteString renders a string as a RFC5322 quoted-string.
|
|
func quoteString(s string) string {
|
|
var buf bytes.Buffer
|
|
buf.WriteByte('"')
|
|
for _, c := range s {
|
|
ch := byte(c)
|
|
if isQtext(ch) || isWSP(ch) {
|
|
buf.WriteByte(ch)
|
|
} else if isVchar(ch) {
|
|
buf.WriteByte('\\')
|
|
buf.WriteByte(ch)
|
|
}
|
|
}
|
|
buf.WriteByte('"')
|
|
return buf.String()
|
|
}
|
|
|
|
// isVchar reports whether c is an RFC 5322 VCHAR character.
|
|
func isVchar(c byte) bool {
|
|
// Visible (printing) characters.
|
|
return '!' <= c && c <= '~'
|
|
}
|
|
|
|
// isWSP reports whether c is a WSP (white space).
|
|
// WSP is a space or horizontal tab (RFC5234 Appendix B).
|
|
func isWSP(c byte) bool {
|
|
return c == ' ' || c == '\t'
|
|
}
|