newline battles continue

2020-01-19 20:41:30 +00:00
parent 98adb53caf
commit 573696774e
1456 changed files with 501133 additions and 6 deletions
--- a/vendor/github.com/mmcdole/gofeed/internal/shared/charsetconv.go
+++ b/vendor/github.com/mmcdole/gofeed/internal/shared/charsetconv.go
@@ -0,0 +1,19 @@
+package shared
+
+import (
+	"io"
+
+	"golang.org/x/net/html/charset"
+)
+
+func NewReaderLabel(label string, input io.Reader) (io.Reader, error) {
+	conv, err := charset.NewReaderLabel(label, input)
+
+	if err != nil {
+		return nil, err
+	}
+
+	// Wrap the charset decoder reader with a XML sanitizer
+	//clean := NewXMLSanitizerReader(conv)
+	return conv, nil
+}
--- a/vendor/github.com/mmcdole/gofeed/internal/shared/dateparser.go
+++ b/vendor/github.com/mmcdole/gofeed/internal/shared/dateparser.go
@@ -0,0 +1,219 @@
+package shared
+
+import (
+	"fmt"
+	"strings"
+	"time"
+)
+
+// DateFormats taken from github.com/mjibson/goread
+var dateFormats = []string{
+	time.RFC822,  // RSS
+	time.RFC822Z, // RSS
+	time.RFC3339, // Atom
+	time.UnixDate,
+	time.RubyDate,
+	time.RFC850,
+	time.RFC1123Z,
+	time.RFC1123,
+	time.ANSIC,
+	"Mon, January 2 2006 15:04:05 -0700",
+	"Mon, Jan 2 2006 15:04:05 -700",
+	"Mon, Jan 2 2006 15:04:05 -0700",
+	"Mon Jan 2 15:04 2006",
+	"Mon Jan 02, 2006 3:04 pm",
+	"Mon Jan 02 2006 15:04:05 -0700",
+	"Monday, January 2, 2006 03:04 PM",
+	"Monday, January 2, 2006",
+	"Monday, January 02, 2006",
+	"Monday, 2 January 2006 15:04:05 -0700",
+	"Monday, 2 Jan 2006 15:04:05 -0700",
+	"Monday, 02 January 2006 15:04:05 -0700",
+	"Monday, 02 January 2006 15:04:05",
+	"Mon, 2 January 2006, 15:04 -0700",
+	"Mon, 2 January 2006 15:04:05 -0700",
+	"Mon, 2 January 2006",
+	"Mon, 2 Jan 2006 3:04:05 PM -0700",
+	"Mon, 2 Jan 2006 15:4:5 -0700 GMT",
+	"Mon, 2, Jan 2006 15:4",
+	"Mon, 2 Jan 2006, 15:04 -0700",
+	"Mon, 2 Jan 2006 15:04 -0700",
+	"Mon, 2 Jan 2006 15:04:05 UT",
+	"Mon, 2 Jan 2006 15:04:05 -0700 MST",
+	"Mon, 2 Jan 2006 15:04:05-0700",
+	"Mon, 2 Jan 2006 15:04:05 -0700",
+	"Mon, 2 Jan 2006 15:04:05",
+	"Mon, 2 Jan 2006 15:04",
+	"Mon,2 Jan 2006",
+	"Mon, 2 Jan 2006",
+	"Mon, 2 Jan 06 15:04:05 -0700",
+	"Mon, 2006-01-02 15:04",
+	"Mon, 02 January 2006",
+	"Mon, 02 Jan 2006 15 -0700",
+	"Mon, 02 Jan 2006 15:04 -0700",
+	"Mon, 02 Jan 2006 15:04:05 Z",
+	"Mon, 02 Jan 2006 15:04:05 UT",
+	"Mon, 02 Jan 2006 15:04:05 MST-07:00",
+	"Mon, 02 Jan 2006 15:04:05 MST -0700",
+	"Mon, 02 Jan 2006 15:04:05 GMT-0700",
+	"Mon,02 Jan 2006 15:04:05 -0700",
+	"Mon, 02 Jan 2006 15:04:05 -0700",
+	"Mon, 02 Jan 2006 15:04:05 -07:00",
+	"Mon, 02 Jan 2006 15:04:05 --0700",
+	"Mon 02 Jan 2006 15:04:05 -0700",
+	"Mon, 02 Jan 2006 15:04:05 -07",
+	"Mon, 02 Jan 2006 15:04:05 00",
+	"Mon, 02 Jan 2006 15:04:05",
+	"Mon, 02 Jan 2006",
+	"January 2, 2006 3:04 PM",
+	"January 2, 2006, 3:04 p.m.",
+	"January 2, 2006 15:04:05",
+	"January 2, 2006 03:04 PM",
+	"January 2, 2006",
+	"January 02, 2006 15:04",
+	"January 02, 2006 03:04 PM",
+	"January 02, 2006",
+	"Jan 2, 2006 3:04:05 PM",
+	"Jan 2, 2006",
+	"Jan 02 2006 03:04:05PM",
+	"Jan 02, 2006",
+	"6/1/2 15:04",
+	"6-1-2 15:04",
+	"2 January 2006 15:04:05 -0700",
+	"2 January 2006",
+	"2 Jan 2006 15:04:05 Z",
+	"2 Jan 2006 15:04:05 -0700",
+	"2 Jan 2006",
+	"2.1.2006 15:04:05",
+	"2/1/2006",
+	"2-1-2006",
+	"2006 January 02",
+	"2006-1-2T15:04:05Z",
+	"2006-1-2 15:04:05",
+	"2006-1-2",
+	"2006-1-02T15:04:05Z",
+	"2006-01-02T15:04Z",
+	"2006-01-02T15:04-07:00",
+	"2006-01-02T15:04:05Z",
+	"2006-01-02T15:04:05-07:00:00",
+	"2006-01-02T15:04:05:-0700",
+	"2006-01-02T15:04:05-0700",
+	"2006-01-02T15:04:05-07:00",
+	"2006-01-02T15:04:05 -0700",
+	"2006-01-02T15:04:05:00",
+	"2006-01-02T15:04:05",
+	"2006-01-02 at 15:04:05",
+	"2006-01-02 15:04:05Z",
+	"2006-01-02 15:04:05-0700",
+	"2006-01-02 15:04:05-07:00",
+	"2006-01-02 15:04:05 -0700",
+	"2006-01-02 15:04",
+	"2006-01-02 00:00:00.0 15:04:05.0 -0700",
+	"2006/01/02",
+	"2006-01-02",
+	"15:04 02.01.2006 -0700",
+	"1/2/2006 3:04:05 PM",
+	"1/2/2006",
+	"06/1/2 15:04",
+	"06-1-2 15:04",
+	"02 Monday, Jan 2006 15:04",
+	"02 Jan 2006 15:04:05 UT",
+	"02 Jan 2006 15:04:05 -0700",
+	"02 Jan 2006 15:04:05",
+	"02 Jan 2006",
+	"02.01.2006 15:04:05",
+	"02/01/2006 15:04:05",
+	"02.01.2006 15:04",
+	"02/01/2006 - 15:04",
+	"02.01.2006 -0700",
+	"02/01/2006",
+	"02-01-2006",
+	"01/02/2006 3:04 PM",
+	"01/02/2006 - 15:04",
+	"01/02/2006",
+	"01-02-2006",
+}
+
+// Named zone cannot be consistently loaded, so handle separately
+var dateFormatsWithNamedZone = []string{
+	"Mon, January 02, 2006, 15:04:05 MST",
+	"Mon, January 02, 2006 15:04:05 MST",
+	"Mon, Jan 2, 2006 15:04 MST",
+	"Mon, Jan 2 2006 15:04 MST",
+	"Mon, Jan 2, 2006 15:04:05 MST",
+	"Mon Jan 2 15:04:05 2006 MST",
+	"Mon, Jan 02,2006 15:04:05 MST",
+	"Monday, January 2, 2006 15:04:05 MST",
+	"Monday, 2 January 2006 15:04:05 MST",
+	"Monday, 2 Jan 2006 15:04:05 MST",
+	"Monday, 02 January 2006 15:04:05 MST",
+	"Mon, 2 January 2006 15:04 MST",
+	"Mon, 2 January 2006, 15:04:05 MST",
+	"Mon, 2 January 2006 15:04:05 MST",
+	"Mon, 2 Jan 2006 15:4:5 MST",
+	"Mon, 2 Jan 2006 15:04 MST",
+	"Mon, 2 Jan 2006 15:04:05MST",
+	"Mon, 2 Jan 2006 15:04:05 MST",
+	"Mon 2 Jan 2006 15:04:05 MST",
+	"mon,2 Jan 2006 15:04:05 MST",
+	"Mon, 2 Jan 15:04:05 MST",
+	"Mon, 2 Jan 06 15:04:05 MST",
+	"Mon,02 January 2006 14:04:05 MST",
+	"Mon, 02 Jan 2006 3:04:05 PM MST",
+	"Mon,02 Jan 2006 15:04 MST",
+	"Mon, 02 Jan 2006 15:04 MST",
+	"Mon, 02 Jan 2006, 15:04:05 MST",
+	"Mon, 02 Jan 2006 15:04:05MST",
+	"Mon, 02 Jan 2006 15:04:05 MST",
+	"Mon , 02 Jan 2006 15:04:05 MST",
+	"Mon, 02 Jan 06 15:04:05 MST",
+	"January 2, 2006 15:04:05 MST",
+	"January 02, 2006 15:04:05 MST",
+	"Jan 2, 2006 3:04:05 PM MST",
+	"Jan 2, 2006 15:04:05 MST",
+	"2 January 2006 15:04:05 MST",
+	"2 Jan 2006 15:04:05 MST",
+	"2006-01-02 15:04:05 MST",
+	"1/2/2006 3:04:05 PM MST",
+	"1/2/2006 15:04:05 MST",
+	"02 Jan 2006 15:04 MST",
+	"02 Jan 2006 15:04:05 MST",
+	"02/01/2006 15:04 MST",
+	"02-01-2006 15:04:05 MST",
+	"01/02/2006 15:04:05 MST",
+}
+
+// ParseDate parses a given date string using a large
+// list of commonly found feed date formats.
+func ParseDate(ds string) (t time.Time, err error) {
+	d := strings.TrimSpace(ds)
+	if d == "" {
+		return t, fmt.Errorf("Date string is empty")
+	}
+	for _, f := range dateFormats {
+		if t, err = time.Parse(f, d); err == nil {
+			return
+		}
+	}
+	for _, f := range dateFormatsWithNamedZone {
+		t, err = time.Parse(f, d)
+		if err != nil {
+			continue
+		}
+
+		// This is a format match! Now try to load the timezone name
+		loc, err := time.LoadLocation(t.Location().String())
+		if err != nil {
+			// We couldn't load the TZ name. Just use UTC instead...
+			return t, nil
+		}
+
+		if t, err = time.ParseInLocation(f, ds, loc); err == nil {
+			return t, nil
+		}
+		// This should not be reachable
+	}
+
+	err = fmt.Errorf("Failed to parse date: %s", ds)
+	return
+}
--- a/vendor/github.com/mmcdole/gofeed/internal/shared/extparser.go
+++ b/vendor/github.com/mmcdole/gofeed/internal/shared/extparser.go
@@ -0,0 +1,176 @@
+package shared
+
+import (
+	"strings"
+
+	"github.com/mmcdole/gofeed/extensions"
+	"github.com/mmcdole/goxpp"
+)
+
+// IsExtension returns whether or not the current
+// XML element is an extension element (if it has a
+// non empty prefix)
+func IsExtension(p *xpp.XMLPullParser) bool {
+	space := strings.TrimSpace(p.Space)
+	if prefix, ok := p.Spaces[space]; ok {
+		return !(prefix == "" || prefix == "rss" || prefix == "rdf" || prefix == "content")
+	}
+
+	return p.Space != ""
+}
+
+// ParseExtension parses the current element of the
+// XMLPullParser as an extension element and updates
+// the extension map
+func ParseExtension(fe ext.Extensions, p *xpp.XMLPullParser) (ext.Extensions, error) {
+	prefix := prefixForNamespace(p.Space, p)
+
+	result, err := parseExtensionElement(p)
+	if err != nil {
+		return nil, err
+	}
+
+	// Ensure the extension prefix map exists
+	if _, ok := fe[prefix]; !ok {
+		fe[prefix] = map[string][]ext.Extension{}
+	}
+	// Ensure the extension element slice exists
+	if _, ok := fe[prefix][p.Name]; !ok {
+		fe[prefix][p.Name] = []ext.Extension{}
+	}
+
+	fe[prefix][p.Name] = append(fe[prefix][p.Name], result)
+	return fe, nil
+}
+
+func parseExtensionElement(p *xpp.XMLPullParser) (e ext.Extension, err error) {
+	if err = p.Expect(xpp.StartTag, "*"); err != nil {
+		return e, err
+	}
+
+	e.Name = p.Name
+	e.Children = map[string][]ext.Extension{}
+	e.Attrs = map[string]string{}
+
+	for _, attr := range p.Attrs {
+		// TODO: Alright that we are stripping
+		// namespace information from attributes ?
+		e.Attrs[attr.Name.Local] = attr.Value
+	}
+
+	for {
+		tok, err := p.Next()
+		if err != nil {
+			return e, err
+		}
+
+		if tok == xpp.EndTag {
+			break
+		}
+
+		if tok == xpp.StartTag {
+			child, err := parseExtensionElement(p)
+			if err != nil {
+				return e, err
+			}
+
+			if _, ok := e.Children[child.Name]; !ok {
+				e.Children[child.Name] = []ext.Extension{}
+			}
+
+			e.Children[child.Name] = append(e.Children[child.Name], child)
+		} else if tok == xpp.Text {
+			e.Value += p.Text
+		}
+	}
+
+	e.Value = strings.TrimSpace(e.Value)
+
+	if err = p.Expect(xpp.EndTag, e.Name); err != nil {
+		return e, err
+	}
+
+	return e, nil
+}
+
+func prefixForNamespace(space string, p *xpp.XMLPullParser) string {
+	// First we check if the global namespace map
+	// contains an entry for this namespace/prefix.
+	// This way we can use the canonical prefix for this
+	// ns instead of the one defined in the feed.
+	if prefix, ok := canonicalNamespaces[space]; ok {
+		return prefix
+	}
+
+	// Next we check if the feed itself defined this
+	// this namespace and return it if we have a result.
+	if prefix, ok := p.Spaces[space]; ok {
+		return prefix
+	}
+
+	// Lastly, any namespace which is not defined in the
+	// the feed will be the prefix itself when using Go's
+	// xml.Decoder.Token() method.
+	return space
+}
+
+// Namespaces taken from github.com/kurtmckee/feedparser
+// These are used for determining canonical name space prefixes
+// for many of the popular RSS/Atom extensions.
+//
+// These canonical prefixes override any prefixes used in the feed itself.
+var canonicalNamespaces = map[string]string{
+	"http://webns.net/mvcb/":                                         "admin",
+	"http://purl.org/rss/1.0/modules/aggregation/":                   "ag",
+	"http://purl.org/rss/1.0/modules/annotate/":                      "annotate",
+	"http://media.tangent.org/rss/1.0/":                              "audio",
+	"http://backend.userland.com/blogChannelModule":                  "blogChannel",
+	"http://creativecommons.org/ns#license":                          "cc",
+	"http://web.resource.org/cc/":                                    "cc",
+	"http://cyber.law.harvard.edu/rss/creativeCommonsRssModule.html": "creativeCommons",
+	"http://backend.userland.com/creativeCommonsRssModule":           "creativeCommons",
+	"http://purl.org/rss/1.0/modules/company":                        "co",
+	"http://purl.org/rss/1.0/modules/content/":                       "content",
+	"http://my.theinfo.org/changed/1.0/rss/":                         "cp",
+	"http://purl.org/dc/elements/1.1/":                               "dc",
+	"http://purl.org/dc/terms/":                                      "dcterms",
+	"http://purl.org/rss/1.0/modules/email/":                         "email",
+	"http://purl.org/rss/1.0/modules/event/":                         "ev",
+	"http://rssnamespace.org/feedburner/ext/1.0":                     "feedburner",
+	"http://freshmeat.net/rss/fm/":                                   "fm",
+	"http://xmlns.com/foaf/0.1/":                                     "foaf",
+	"http://www.w3.org/2003/01/geo/wgs84_pos#":                       "geo",
+	"http://www.georss.org/georss":                                   "georss",
+	"http://www.opengis.net/gml":                                     "gml",
+	"http://postneo.com/icbm/":                                       "icbm",
+	"http://purl.org/rss/1.0/modules/image/":                         "image",
+	"http://www.itunes.com/DTDs/PodCast-1.0.dtd":                     "itunes",
+	"http://example.com/DTDs/PodCast-1.0.dtd":                        "itunes",
+	"http://purl.org/rss/1.0/modules/link/":                          "l",
+	"http://search.yahoo.com/mrss":                                   "media",
+	"http://search.yahoo.com/mrss/":                                  "media",
+	"http://madskills.com/public/xml/rss/module/pingback/":           "pingback",
+	"http://prismstandard.org/namespaces/1.2/basic/":                 "prism",
+	"http://www.w3.org/1999/02/22-rdf-syntax-ns#":                    "rdf",
+	"http://www.w3.org/2000/01/rdf-schema#":                          "rdfs",
+	"http://purl.org/rss/1.0/modules/reference/":                     "ref",
+	"http://purl.org/rss/1.0/modules/richequiv/":                     "reqv",
+	"http://purl.org/rss/1.0/modules/search/":                        "search",
+	"http://purl.org/rss/1.0/modules/slash/":                         "slash",
+	"http://schemas.xmlsoap.org/soap/envelope/":                      "soap",
+	"http://purl.org/rss/1.0/modules/servicestatus/":                 "ss",
+	"http://hacks.benhammersley.com/rss/streaming/":                  "str",
+	"http://purl.org/rss/1.0/modules/subscription/":                  "sub",
+	"http://purl.org/rss/1.0/modules/syndication/":                   "sy",
+	"http://schemas.pocketsoap.com/rss/myDescModule/":                "szf",
+	"http://purl.org/rss/1.0/modules/taxonomy/":                      "taxo",
+	"http://purl.org/rss/1.0/modules/threading/":                     "thr",
+	"http://purl.org/rss/1.0/modules/textinput/":                     "ti",
+	"http://madskills.com/public/xml/rss/module/trackback/":          "trackback",
+	"http://wellformedweb.org/commentAPI/":                           "wfw",
+	"http://purl.org/rss/1.0/modules/wiki/":                          "wiki",
+	"http://www.w3.org/1999/xhtml":                                   "xhtml",
+	"http://www.w3.org/1999/xlink":                                   "xlink",
+	"http://www.w3.org/XML/1998/namespace":                           "xml",
+	"http://podlove.org/simple-chapters":                             "psc",
+}
--- a/vendor/github.com/mmcdole/gofeed/internal/shared/parseutils.go
+++ b/vendor/github.com/mmcdole/gofeed/internal/shared/parseutils.go
@@ -0,0 +1,194 @@
+package shared
+
+import (
+	"bytes"
+	"errors"
+	"fmt"
+	"regexp"
+	"strconv"
+	"strings"
+
+	xpp "github.com/mmcdole/goxpp"
+)
+
+var (
+	emailNameRgx = regexp.MustCompile(`^([^@]+@[^\s]+)\s+\(([^@]+)\)$`)
+	nameEmailRgx = regexp.MustCompile(`^([^@]+)\s+\(([^@]+@[^)]+)\)$`)
+	nameOnlyRgx  = regexp.MustCompile(`^([^@()]+)$`)
+	emailOnlyRgx = regexp.MustCompile(`^([^@()]+@[^@()]+)$`)
+
+	TruncatedEntity         = errors.New("truncated entity")
+	InvalidNumericReference = errors.New("invalid numeric reference")
+)
+
+const CDATA_START = "<![CDATA["
+const CDATA_END = "]]>"
+
+// ParseText is a helper function for parsing the text
+// from the current element of the XMLPullParser.
+// This function can handle parsing naked XML text from
+// an element.
+func ParseText(p *xpp.XMLPullParser) (string, error) {
+	var text struct {
+		Type     string `xml:"type,attr"`
+		InnerXML string `xml:",innerxml"`
+	}
+
+	err := p.DecodeElement(&text)
+	if err != nil {
+		return "", err
+	}
+
+	result := text.InnerXML
+	result = strings.TrimSpace(result)
+
+	if strings.Contains(result, CDATA_START) {
+		return StripCDATA(result), nil
+	}
+
+	return DecodeEntities(result)
+}
+
+// StripCDATA removes CDATA tags from the string
+// content outside of CDATA tags is passed via DecodeEntities
+func StripCDATA(str string) string {
+	buf := bytes.NewBuffer([]byte{})
+
+	curr := 0
+
+	for curr < len(str) {
+
+		start := indexAt(str, CDATA_START, curr)
+
+		if start == -1 {
+			dec, _ := DecodeEntities(str[curr:])
+			buf.Write([]byte(dec))
+			return buf.String()
+		}
+
+		end := indexAt(str, CDATA_END, start)
+
+		if end == -1 {
+			dec, _ := DecodeEntities(str[curr:])
+			buf.Write([]byte(dec))
+			return buf.String()
+		}
+
+		buf.Write([]byte(str[start+len(CDATA_START) : end]))
+
+		curr = curr + end + len(CDATA_END)
+	}
+
+	return buf.String()
+}
+
+// DecodeEntities decodes escaped XML entities
+// in a string and returns the unescaped string
+func DecodeEntities(str string) (string, error) {
+	data := []byte(str)
+	buf := bytes.NewBuffer([]byte{})
+
+	for len(data) > 0 {
+		// Find the next entity
+		idx := bytes.IndexByte(data, '&')
+		if idx == -1 {
+			buf.Write(data)
+			break
+		}
+
+		// Write and skip everything before it
+		buf.Write(data[:idx])
+		data = data[idx+1:]
+
+		if len(data) == 0 {
+			return "", TruncatedEntity
+		}
+
+		// Find the end of the entity
+		end := bytes.IndexByte(data, ';')
+		if end == -1 {
+			return "", TruncatedEntity
+		}
+
+		if data[0] == '#' {
+			// Numerical character reference
+			var str string
+			base := 10
+
+			if len(data) > 1 && data[1] == 'x' {
+				str = string(data[2:end])
+				base = 16
+			} else {
+				str = string(data[1:end])
+			}
+
+			i, err := strconv.ParseUint(str, base, 32)
+			if err != nil {
+				return "", InvalidNumericReference
+			}
+
+			buf.WriteRune(rune(i))
+		} else {
+			// Predefined entity
+			name := string(data[:end])
+
+			var c byte
+			switch name {
+			case "lt":
+				c = '<'
+			case "gt":
+				c = '>'
+			case "quot":
+				c = '"'
+			case "apos":
+				c = '\''
+			case "amp":
+				c = '&'
+			default:
+				return "", fmt.Errorf("unknown predefined "+
+					"entity &%s;", name)
+			}
+
+			buf.WriteByte(c)
+		}
+
+		// Skip the entity
+		data = data[end+1:]
+	}
+
+	return buf.String(), nil
+}
+
+// ParseNameAddress parses name/email strings commonly
+// found in RSS feeds of the format "Example Name (example@site.com)"
+// and other variations of this format.
+func ParseNameAddress(nameAddressText string) (name string, address string) {
+	if nameAddressText == "" {
+		return
+	}
+
+	if emailNameRgx.MatchString(nameAddressText) {
+		result := emailNameRgx.FindStringSubmatch(nameAddressText)
+		address = result[1]
+		name = result[2]
+	} else if nameEmailRgx.MatchString(nameAddressText) {
+		result := nameEmailRgx.FindStringSubmatch(nameAddressText)
+		name = result[1]
+		address = result[2]
+	} else if nameOnlyRgx.MatchString(nameAddressText) {
+		result := nameOnlyRgx.FindStringSubmatch(nameAddressText)
+		name = result[1]
+	} else if emailOnlyRgx.MatchString(nameAddressText) {
+		result := emailOnlyRgx.FindStringSubmatch(nameAddressText)
+		address = result[1]
+	}
+	return
+}
+
+func indexAt(str, substr string, start int) int {
+	idx := strings.Index(str[start:], substr)
+	if idx > -1 {
+		idx += start
+	}
+	return idx
+}
--- a/vendor/github.com/mmcdole/gofeed/internal/shared/xmlbase.go
+++ b/vendor/github.com/mmcdole/gofeed/internal/shared/xmlbase.go
@@ -0,0 +1,258 @@
+package shared
+
+import (
+	"bytes"
+	"fmt"
+	"golang.org/x/net/html"
+	"net/url"
+	"strings"
+
+	"github.com/mmcdole/goxpp"
+)
+
+var (
+	// HTML attributes which contain URIs
+	// https://pythonhosted.org/feedparser/resolving-relative-links.html
+	// To catch every possible URI attribute is non-trivial:
+	// https://stackoverflow.com/questions/2725156/complete-list-of-html-tag-attributes-which-have-a-url-value
+	htmlURIAttrs = map[string]bool{
+		"action":     true,
+		"background": true,
+		"cite":       true,
+		"codebase":   true,
+		"data":       true,
+		"href":       true,
+		"poster":     true,
+		"profile":    true,
+		"scheme":     true,
+		"src":        true,
+		"uri":        true,
+		"usemap":     true,
+	}
+)
+
+type urlStack []*url.URL
+
+func (s *urlStack) push(u *url.URL) {
+	*s = append([]*url.URL{u}, *s...)
+}
+
+func (s *urlStack) pop() *url.URL {
+	if s == nil || len(*s) == 0 {
+		return nil
+	}
+	var top *url.URL
+	top, *s = (*s)[0], (*s)[1:]
+	return top
+}
+
+func (s *urlStack) top() *url.URL {
+	if s == nil || len(*s) == 0 {
+		return nil
+	}
+	return (*s)[0]
+}
+
+type XMLBase struct {
+	stack    urlStack
+	URIAttrs map[string]bool
+}
+
+// FindRoot iterates through the tokens of an xml document until
+// it encounters its first StartTag event.  It returns an error
+// if it reaches EndDocument before finding a tag.
+func (b *XMLBase) FindRoot(p *xpp.XMLPullParser) (event xpp.XMLEventType, err error) {
+	for {
+		event, err = b.NextTag(p)
+		if err != nil {
+			return event, err
+		}
+		if event == xpp.StartTag {
+			break
+		}
+
+		if event == xpp.EndDocument {
+			return event, fmt.Errorf("Failed to find root node before document end.")
+		}
+	}
+	return
+}
+
+// XMLBase.NextTag iterates through the tokens until it reaches a StartTag or
+// EndTag It maintains the urlStack upon encountering StartTag and EndTags, so
+// that the top of the stack (accessible through the CurrentBase() and
+// CurrentBaseURL() methods) is the absolute base URI by which relative URIs
+// should be resolved.
+//
+// NextTag is similar to goxpp's NextTag method except it wont throw an error
+// if the next immediate token isnt a Start/EndTag.  Instead, it will continue
+// to consume tokens until it hits a Start/EndTag or EndDocument.
+func (b *XMLBase) NextTag(p *xpp.XMLPullParser) (event xpp.XMLEventType, err error) {
+	for {
+
+		if p.Event == xpp.EndTag {
+			// Pop xml:base after each end tag
+			b.pop()
+		}
+
+		event, err = p.Next()
+		if err != nil {
+			return event, err
+		}
+
+		if event == xpp.EndTag {
+			break
+		}
+
+		if event == xpp.StartTag {
+			base := parseBase(p)
+			err = b.push(base)
+			if err != nil {
+				return
+			}
+
+			err = b.resolveAttrs(p)
+			if err != nil {
+				return
+			}
+
+			break
+		}
+
+		if event == xpp.EndDocument {
+			return event, fmt.Errorf("Failed to find NextTag before reaching the end of the document.")
+		}
+
+	}
+	return
+}
+
+func parseBase(p *xpp.XMLPullParser) string {
+	xmlURI := "http://www.w3.org/XML/1998/namespace"
+	for _, attr := range p.Attrs {
+		if attr.Name.Local == "base" && attr.Name.Space == xmlURI {
+			return attr.Value
+		}
+	}
+	return ""
+}
+
+func (b *XMLBase) push(base string) error {
+	newURL, err := url.Parse(base)
+	if err != nil {
+		return err
+	}
+
+	topURL := b.CurrentBaseURL()
+	if topURL != nil {
+		newURL = topURL.ResolveReference(newURL)
+	}
+	b.stack.push(newURL)
+	return nil
+}
+
+// returns the popped base URL
+func (b *XMLBase) pop() string {
+	url := b.stack.pop()
+	if url != nil {
+		return url.String()
+	}
+	return ""
+}
+
+func (b *XMLBase) CurrentBaseURL() *url.URL {
+	return b.stack.top()
+}
+
+func (b *XMLBase) CurrentBase() string {
+	if url := b.CurrentBaseURL(); url != nil {
+		return url.String()
+	}
+	return ""
+}
+
+// resolve the given string as a URL relative to current base
+func (b *XMLBase) ResolveURL(u string) (string, error) {
+	if b.CurrentBase() == "" {
+		return u, nil
+	}
+
+	relURL, err := url.Parse(u)
+	if err != nil {
+		return u, err
+	}
+	curr := b.CurrentBaseURL()
+	if curr.Path != "" && u != "" && curr.Path[len(curr.Path)-1] != '/' {
+		// There's no reason someone would use a path in xml:base if they
+		// didn't mean for it to be a directory
+		curr.Path = curr.Path + "/"
+	}
+	absURL := b.CurrentBaseURL().ResolveReference(relURL)
+	return absURL.String(), nil
+}
+
+// resolve relative URI attributes according to xml:base
+func (b *XMLBase) resolveAttrs(p *xpp.XMLPullParser) error {
+	for i, attr := range p.Attrs {
+		lowerName := strings.ToLower(attr.Name.Local)
+		if b.URIAttrs[lowerName] {
+			absURL, err := b.ResolveURL(attr.Value)
+			if err != nil {
+				return err
+			}
+			p.Attrs[i].Value = absURL
+		}
+	}
+	return nil
+}
+
+// Transforms html by resolving any relative URIs in attributes
+// if an error occurs during parsing or serialization, then the original string
+// is returned along with the error.
+func (b *XMLBase) ResolveHTML(relHTML string) (string, error) {
+	if b.CurrentBase() == "" {
+		return relHTML, nil
+	}
+
+	htmlReader := strings.NewReader(relHTML)
+
+	doc, err := html.Parse(htmlReader)
+	if err != nil {
+		return relHTML, err
+	}
+
+	var visit func(*html.Node)
+
+	// recursively traverse HTML resolving any relative URIs in attributes
+	visit = func(n *html.Node) {
+		if n.Type == html.ElementNode {
+			for i, a := range n.Attr {
+				if htmlURIAttrs[a.Key] {
+					absVal, err := b.ResolveURL(a.Val)
+					if err == nil {
+						n.Attr[i].Val = absVal
+					}
+					break
+				}
+			}
+		}
+		for c := n.FirstChild; c != nil; c = c.NextSibling {
+			visit(c)
+		}
+	}
+
+	visit(doc)
+	var w bytes.Buffer
+	err = html.Render(&w, doc)
+	if err != nil {
+		return relHTML, err
+	}
+
+	// html.Render() always writes a complete html5 document, so strip the html
+	// and body tags
+	absHTML := w.String()
+	absHTML = strings.TrimPrefix(absHTML, "<html><head></head><body>")
+	absHTML = strings.TrimSuffix(absHTML, "</body></html>")
+
+	return absHTML, err
+}
--- a/vendor/github.com/mmcdole/gofeed/internal/shared/xmlsanitizer.go
+++ b/vendor/github.com/mmcdole/gofeed/internal/shared/xmlsanitizer.go
@@ -0,0 +1,23 @@
+package shared
+
+import (
+	"io"
+
+	"golang.org/x/text/transform"
+)
+
+// NewXMLSanitizerReader creates an io.Reader that
+// wraps another io.Reader and removes illegal xml
+// characters from the io stream.
+func NewXMLSanitizerReader(xml io.Reader) io.Reader {
+	isIllegal := func(r rune) bool {
+		return !(r == 0x09 ||
+			r == 0x0A ||
+			r == 0x0D ||
+			r >= 0x20 && r <= 0xDF77 ||
+			r >= 0xE000 && r <= 0xFFFD ||
+			r >= 0x10000 && r <= 0x10FFFF)
+	}
+	t := transform.Chain(transform.RemoveFunc(isIllegal))
+	return transform.NewReader(xml, t)
+}