minimal vend
This commit is contained in:
19
vendor/github.com/mmcdole/gofeed/internal/shared/charsetconv.go
generated
vendored
Normal file
19
vendor/github.com/mmcdole/gofeed/internal/shared/charsetconv.go
generated
vendored
Normal file
@@ -0,0 +1,19 @@
|
||||
package shared
|
||||
|
||||
import (
|
||||
"io"
|
||||
|
||||
"golang.org/x/net/html/charset"
|
||||
)
|
||||
|
||||
func NewReaderLabel(label string, input io.Reader) (io.Reader, error) {
|
||||
conv, err := charset.NewReaderLabel(label, input)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Wrap the charset decoder reader with a XML sanitizer
|
||||
//clean := NewXMLSanitizerReader(conv)
|
||||
return conv, nil
|
||||
}
|
||||
196
vendor/github.com/mmcdole/gofeed/internal/shared/dateparser.go
generated
vendored
Normal file
196
vendor/github.com/mmcdole/gofeed/internal/shared/dateparser.go
generated
vendored
Normal file
@@ -0,0 +1,196 @@
|
||||
package shared
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
// DateFormats taken from github.com/mjibson/goread
|
||||
var dateFormats = []string{
|
||||
time.RFC822, // RSS
|
||||
time.RFC822Z, // RSS
|
||||
time.RFC3339, // Atom
|
||||
time.UnixDate,
|
||||
time.RubyDate,
|
||||
time.RFC850,
|
||||
time.RFC1123Z,
|
||||
time.RFC1123,
|
||||
time.ANSIC,
|
||||
"Mon, January 2 2006 15:04:05 -0700",
|
||||
"Mon, January 02, 2006, 15:04:05 MST",
|
||||
"Mon, January 02, 2006 15:04:05 MST",
|
||||
"Mon, Jan 2, 2006 15:04 MST",
|
||||
"Mon, Jan 2 2006 15:04 MST",
|
||||
"Mon, Jan 2, 2006 15:04:05 MST",
|
||||
"Mon, Jan 2 2006 15:04:05 -700",
|
||||
"Mon, Jan 2 2006 15:04:05 -0700",
|
||||
"Mon Jan 2 15:04 2006",
|
||||
"Mon Jan 2 15:04:05 2006 MST",
|
||||
"Mon Jan 02, 2006 3:04 pm",
|
||||
"Mon, Jan 02,2006 15:04:05 MST",
|
||||
"Mon Jan 02 2006 15:04:05 -0700",
|
||||
"Monday, January 2, 2006 15:04:05 MST",
|
||||
"Monday, January 2, 2006 03:04 PM",
|
||||
"Monday, January 2, 2006",
|
||||
"Monday, January 02, 2006",
|
||||
"Monday, 2 January 2006 15:04:05 MST",
|
||||
"Monday, 2 January 2006 15:04:05 -0700",
|
||||
"Monday, 2 Jan 2006 15:04:05 MST",
|
||||
"Monday, 2 Jan 2006 15:04:05 -0700",
|
||||
"Monday, 02 January 2006 15:04:05 MST",
|
||||
"Monday, 02 January 2006 15:04:05 -0700",
|
||||
"Monday, 02 January 2006 15:04:05",
|
||||
"Mon, 2 January 2006 15:04 MST",
|
||||
"Mon, 2 January 2006, 15:04 -0700",
|
||||
"Mon, 2 January 2006, 15:04:05 MST",
|
||||
"Mon, 2 January 2006 15:04:05 MST",
|
||||
"Mon, 2 January 2006 15:04:05 -0700",
|
||||
"Mon, 2 January 2006",
|
||||
"Mon, 2 Jan 2006 3:04:05 PM -0700",
|
||||
"Mon, 2 Jan 2006 15:4:5 MST",
|
||||
"Mon, 2 Jan 2006 15:4:5 -0700 GMT",
|
||||
"Mon, 2, Jan 2006 15:4",
|
||||
"Mon, 2 Jan 2006 15:04 MST",
|
||||
"Mon, 2 Jan 2006, 15:04 -0700",
|
||||
"Mon, 2 Jan 2006 15:04 -0700",
|
||||
"Mon, 2 Jan 2006 15:04:05 UT",
|
||||
"Mon, 2 Jan 2006 15:04:05MST",
|
||||
"Mon, 2 Jan 2006 15:04:05 MST",
|
||||
"Mon 2 Jan 2006 15:04:05 MST",
|
||||
"mon,2 Jan 2006 15:04:05 MST",
|
||||
"Mon, 2 Jan 2006 15:04:05 -0700 MST",
|
||||
"Mon, 2 Jan 2006 15:04:05-0700",
|
||||
"Mon, 2 Jan 2006 15:04:05 -0700",
|
||||
"Mon, 2 Jan 2006 15:04:05",
|
||||
"Mon, 2 Jan 2006 15:04",
|
||||
"Mon,2 Jan 2006",
|
||||
"Mon, 2 Jan 2006",
|
||||
"Mon, 2 Jan 15:04:05 MST",
|
||||
"Mon, 2 Jan 06 15:04:05 MST",
|
||||
"Mon, 2 Jan 06 15:04:05 -0700",
|
||||
"Mon, 2006-01-02 15:04",
|
||||
"Mon,02 January 2006 14:04:05 MST",
|
||||
"Mon, 02 January 2006",
|
||||
"Mon, 02 Jan 2006 3:04:05 PM MST",
|
||||
"Mon, 02 Jan 2006 15 -0700",
|
||||
"Mon,02 Jan 2006 15:04 MST",
|
||||
"Mon, 02 Jan 2006 15:04 MST",
|
||||
"Mon, 02 Jan 2006 15:04 -0700",
|
||||
"Mon, 02 Jan 2006 15:04:05 Z",
|
||||
"Mon, 02 Jan 2006 15:04:05 UT",
|
||||
"Mon, 02 Jan 2006 15:04:05 MST-07:00",
|
||||
"Mon, 02 Jan 2006 15:04:05 MST -0700",
|
||||
"Mon, 02 Jan 2006, 15:04:05 MST",
|
||||
"Mon, 02 Jan 2006 15:04:05MST",
|
||||
"Mon, 02 Jan 2006 15:04:05 MST",
|
||||
"Mon , 02 Jan 2006 15:04:05 MST",
|
||||
"Mon, 02 Jan 2006 15:04:05 GMT-0700",
|
||||
"Mon,02 Jan 2006 15:04:05 -0700",
|
||||
"Mon, 02 Jan 2006 15:04:05 -0700",
|
||||
"Mon, 02 Jan 2006 15:04:05 -07:00",
|
||||
"Mon, 02 Jan 2006 15:04:05 --0700",
|
||||
"Mon 02 Jan 2006 15:04:05 -0700",
|
||||
"Mon, 02 Jan 2006 15:04:05 -07",
|
||||
"Mon, 02 Jan 2006 15:04:05 00",
|
||||
"Mon, 02 Jan 2006 15:04:05",
|
||||
"Mon, 02 Jan 2006",
|
||||
"Mon, 02 Jan 06 15:04:05 MST",
|
||||
"January 2, 2006 3:04 PM",
|
||||
"January 2, 2006, 3:04 p.m.",
|
||||
"January 2, 2006 15:04:05 MST",
|
||||
"January 2, 2006 15:04:05",
|
||||
"January 2, 2006 03:04 PM",
|
||||
"January 2, 2006",
|
||||
"January 02, 2006 15:04:05 MST",
|
||||
"January 02, 2006 15:04",
|
||||
"January 02, 2006 03:04 PM",
|
||||
"January 02, 2006",
|
||||
"Jan 2, 2006 3:04:05 PM MST",
|
||||
"Jan 2, 2006 3:04:05 PM",
|
||||
"Jan 2, 2006 15:04:05 MST",
|
||||
"Jan 2, 2006",
|
||||
"Jan 02 2006 03:04:05PM",
|
||||
"Jan 02, 2006",
|
||||
"6/1/2 15:04",
|
||||
"6-1-2 15:04",
|
||||
"2 January 2006 15:04:05 MST",
|
||||
"2 January 2006 15:04:05 -0700",
|
||||
"2 January 2006",
|
||||
"2 Jan 2006 15:04:05 Z",
|
||||
"2 Jan 2006 15:04:05 MST",
|
||||
"2 Jan 2006 15:04:05 -0700",
|
||||
"2 Jan 2006",
|
||||
"2.1.2006 15:04:05",
|
||||
"2/1/2006",
|
||||
"2-1-2006",
|
||||
"2006 January 02",
|
||||
"2006-1-2T15:04:05Z",
|
||||
"2006-1-2 15:04:05",
|
||||
"2006-1-2",
|
||||
"2006-1-02T15:04:05Z",
|
||||
"2006-01-02T15:04Z",
|
||||
"2006-01-02T15:04-07:00",
|
||||
"2006-01-02T15:04:05Z",
|
||||
"2006-01-02T15:04:05-07:00:00",
|
||||
"2006-01-02T15:04:05:-0700",
|
||||
"2006-01-02T15:04:05-0700",
|
||||
"2006-01-02T15:04:05-07:00",
|
||||
"2006-01-02T15:04:05 -0700",
|
||||
"2006-01-02T15:04:05:00",
|
||||
"2006-01-02T15:04:05",
|
||||
"2006-01-02 at 15:04:05",
|
||||
"2006-01-02 15:04:05Z",
|
||||
"2006-01-02 15:04:05 MST",
|
||||
"2006-01-02 15:04:05-0700",
|
||||
"2006-01-02 15:04:05-07:00",
|
||||
"2006-01-02 15:04:05 -0700",
|
||||
"2006-01-02 15:04",
|
||||
"2006-01-02 00:00:00.0 15:04:05.0 -0700",
|
||||
"2006/01/02",
|
||||
"2006-01-02",
|
||||
"15:04 02.01.2006 -0700",
|
||||
"1/2/2006 3:04:05 PM MST",
|
||||
"1/2/2006 3:04:05 PM",
|
||||
"1/2/2006 15:04:05 MST",
|
||||
"1/2/2006",
|
||||
"06/1/2 15:04",
|
||||
"06-1-2 15:04",
|
||||
"02 Monday, Jan 2006 15:04",
|
||||
"02 Jan 2006 15:04 MST",
|
||||
"02 Jan 2006 15:04:05 UT",
|
||||
"02 Jan 2006 15:04:05 MST",
|
||||
"02 Jan 2006 15:04:05 -0700",
|
||||
"02 Jan 2006 15:04:05",
|
||||
"02 Jan 2006",
|
||||
"02/01/2006 15:04 MST",
|
||||
"02-01-2006 15:04:05 MST",
|
||||
"02.01.2006 15:04:05",
|
||||
"02/01/2006 15:04:05",
|
||||
"02.01.2006 15:04",
|
||||
"02/01/2006 - 15:04",
|
||||
"02.01.2006 -0700",
|
||||
"02/01/2006",
|
||||
"02-01-2006",
|
||||
"01/02/2006 3:04 PM",
|
||||
"01/02/2006 15:04:05 MST",
|
||||
"01/02/2006 - 15:04",
|
||||
"01/02/2006",
|
||||
"01-02-2006",
|
||||
}
|
||||
|
||||
// ParseDate parses a given date string using a large
|
||||
// list of commonly found feed date formats.
|
||||
func ParseDate(ds string) (t time.Time, err error) {
|
||||
d := strings.TrimSpace(ds)
|
||||
if d == "" {
|
||||
return t, fmt.Errorf("Date string is empty")
|
||||
}
|
||||
for _, f := range dateFormats {
|
||||
if t, err = time.Parse(f, d); err == nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
err = fmt.Errorf("Failed to parse date: %s", ds)
|
||||
return
|
||||
}
|
||||
176
vendor/github.com/mmcdole/gofeed/internal/shared/extparser.go
generated
vendored
Normal file
176
vendor/github.com/mmcdole/gofeed/internal/shared/extparser.go
generated
vendored
Normal file
@@ -0,0 +1,176 @@
|
||||
package shared
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
"github.com/mmcdole/gofeed/extensions"
|
||||
"github.com/mmcdole/goxpp"
|
||||
)
|
||||
|
||||
// IsExtension returns whether or not the current
|
||||
// XML element is an extension element (if it has a
|
||||
// non empty prefix)
|
||||
func IsExtension(p *xpp.XMLPullParser) bool {
|
||||
space := strings.TrimSpace(p.Space)
|
||||
if prefix, ok := p.Spaces[space]; ok {
|
||||
return !(prefix == "" || prefix == "rss" || prefix == "rdf" || prefix == "content")
|
||||
}
|
||||
|
||||
return p.Space != ""
|
||||
}
|
||||
|
||||
// ParseExtension parses the current element of the
|
||||
// XMLPullParser as an extension element and updates
|
||||
// the extension map
|
||||
func ParseExtension(fe ext.Extensions, p *xpp.XMLPullParser) (ext.Extensions, error) {
|
||||
prefix := prefixForNamespace(p.Space, p)
|
||||
|
||||
result, err := parseExtensionElement(p)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Ensure the extension prefix map exists
|
||||
if _, ok := fe[prefix]; !ok {
|
||||
fe[prefix] = map[string][]ext.Extension{}
|
||||
}
|
||||
// Ensure the extension element slice exists
|
||||
if _, ok := fe[prefix][p.Name]; !ok {
|
||||
fe[prefix][p.Name] = []ext.Extension{}
|
||||
}
|
||||
|
||||
fe[prefix][p.Name] = append(fe[prefix][p.Name], result)
|
||||
return fe, nil
|
||||
}
|
||||
|
||||
func parseExtensionElement(p *xpp.XMLPullParser) (e ext.Extension, err error) {
|
||||
if err = p.Expect(xpp.StartTag, "*"); err != nil {
|
||||
return e, err
|
||||
}
|
||||
|
||||
e.Name = p.Name
|
||||
e.Children = map[string][]ext.Extension{}
|
||||
e.Attrs = map[string]string{}
|
||||
|
||||
for _, attr := range p.Attrs {
|
||||
// TODO: Alright that we are stripping
|
||||
// namespace information from attributes ?
|
||||
e.Attrs[attr.Name.Local] = attr.Value
|
||||
}
|
||||
|
||||
for {
|
||||
tok, err := p.Next()
|
||||
if err != nil {
|
||||
return e, err
|
||||
}
|
||||
|
||||
if tok == xpp.EndTag {
|
||||
break
|
||||
}
|
||||
|
||||
if tok == xpp.StartTag {
|
||||
child, err := parseExtensionElement(p)
|
||||
if err != nil {
|
||||
return e, err
|
||||
}
|
||||
|
||||
if _, ok := e.Children[child.Name]; !ok {
|
||||
e.Children[child.Name] = []ext.Extension{}
|
||||
}
|
||||
|
||||
e.Children[child.Name] = append(e.Children[child.Name], child)
|
||||
} else if tok == xpp.Text {
|
||||
e.Value += p.Text
|
||||
}
|
||||
}
|
||||
|
||||
e.Value = strings.TrimSpace(e.Value)
|
||||
|
||||
if err = p.Expect(xpp.EndTag, e.Name); err != nil {
|
||||
return e, err
|
||||
}
|
||||
|
||||
return e, nil
|
||||
}
|
||||
|
||||
func prefixForNamespace(space string, p *xpp.XMLPullParser) string {
|
||||
// First we check if the global namespace map
|
||||
// contains an entry for this namespace/prefix.
|
||||
// This way we can use the canonical prefix for this
|
||||
// ns instead of the one defined in the feed.
|
||||
if prefix, ok := canonicalNamespaces[space]; ok {
|
||||
return prefix
|
||||
}
|
||||
|
||||
// Next we check if the feed itself defined this
|
||||
// this namespace and return it if we have a result.
|
||||
if prefix, ok := p.Spaces[space]; ok {
|
||||
return prefix
|
||||
}
|
||||
|
||||
// Lastly, any namespace which is not defined in the
|
||||
// the feed will be the prefix itself when using Go's
|
||||
// xml.Decoder.Token() method.
|
||||
return space
|
||||
}
|
||||
|
||||
// Namespaces taken from github.com/kurtmckee/feedparser
|
||||
// These are used for determining canonical name space prefixes
|
||||
// for many of the popular RSS/Atom extensions.
|
||||
//
|
||||
// These canonical prefixes override any prefixes used in the feed itself.
|
||||
var canonicalNamespaces = map[string]string{
|
||||
"http://webns.net/mvcb/": "admin",
|
||||
"http://purl.org/rss/1.0/modules/aggregation/": "ag",
|
||||
"http://purl.org/rss/1.0/modules/annotate/": "annotate",
|
||||
"http://media.tangent.org/rss/1.0/": "audio",
|
||||
"http://backend.userland.com/blogChannelModule": "blogChannel",
|
||||
"http://creativecommons.org/ns#license": "cc",
|
||||
"http://web.resource.org/cc/": "cc",
|
||||
"http://cyber.law.harvard.edu/rss/creativeCommonsRssModule.html": "creativeCommons",
|
||||
"http://backend.userland.com/creativeCommonsRssModule": "creativeCommons",
|
||||
"http://purl.org/rss/1.0/modules/company": "co",
|
||||
"http://purl.org/rss/1.0/modules/content/": "content",
|
||||
"http://my.theinfo.org/changed/1.0/rss/": "cp",
|
||||
"http://purl.org/dc/elements/1.1/": "dc",
|
||||
"http://purl.org/dc/terms/": "dcterms",
|
||||
"http://purl.org/rss/1.0/modules/email/": "email",
|
||||
"http://purl.org/rss/1.0/modules/event/": "ev",
|
||||
"http://rssnamespace.org/feedburner/ext/1.0": "feedburner",
|
||||
"http://freshmeat.net/rss/fm/": "fm",
|
||||
"http://xmlns.com/foaf/0.1/": "foaf",
|
||||
"http://www.w3.org/2003/01/geo/wgs84_pos#": "geo",
|
||||
"http://www.georss.org/georss": "georss",
|
||||
"http://www.opengis.net/gml": "gml",
|
||||
"http://postneo.com/icbm/": "icbm",
|
||||
"http://purl.org/rss/1.0/modules/image/": "image",
|
||||
"http://www.itunes.com/DTDs/PodCast-1.0.dtd": "itunes",
|
||||
"http://example.com/DTDs/PodCast-1.0.dtd": "itunes",
|
||||
"http://purl.org/rss/1.0/modules/link/": "l",
|
||||
"http://search.yahoo.com/mrss": "media",
|
||||
"http://search.yahoo.com/mrss/": "media",
|
||||
"http://madskills.com/public/xml/rss/module/pingback/": "pingback",
|
||||
"http://prismstandard.org/namespaces/1.2/basic/": "prism",
|
||||
"http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf",
|
||||
"http://www.w3.org/2000/01/rdf-schema#": "rdfs",
|
||||
"http://purl.org/rss/1.0/modules/reference/": "ref",
|
||||
"http://purl.org/rss/1.0/modules/richequiv/": "reqv",
|
||||
"http://purl.org/rss/1.0/modules/search/": "search",
|
||||
"http://purl.org/rss/1.0/modules/slash/": "slash",
|
||||
"http://schemas.xmlsoap.org/soap/envelope/": "soap",
|
||||
"http://purl.org/rss/1.0/modules/servicestatus/": "ss",
|
||||
"http://hacks.benhammersley.com/rss/streaming/": "str",
|
||||
"http://purl.org/rss/1.0/modules/subscription/": "sub",
|
||||
"http://purl.org/rss/1.0/modules/syndication/": "sy",
|
||||
"http://schemas.pocketsoap.com/rss/myDescModule/": "szf",
|
||||
"http://purl.org/rss/1.0/modules/taxonomy/": "taxo",
|
||||
"http://purl.org/rss/1.0/modules/threading/": "thr",
|
||||
"http://purl.org/rss/1.0/modules/textinput/": "ti",
|
||||
"http://madskills.com/public/xml/rss/module/trackback/": "trackback",
|
||||
"http://wellformedweb.org/commentAPI/": "wfw",
|
||||
"http://purl.org/rss/1.0/modules/wiki/": "wiki",
|
||||
"http://www.w3.org/1999/xhtml": "xhtml",
|
||||
"http://www.w3.org/1999/xlink": "xlink",
|
||||
"http://www.w3.org/XML/1998/namespace": "xml",
|
||||
"http://podlove.org/simple-chapters": "psc",
|
||||
}
|
||||
153
vendor/github.com/mmcdole/gofeed/internal/shared/parseutils.go
generated
vendored
Normal file
153
vendor/github.com/mmcdole/gofeed/internal/shared/parseutils.go
generated
vendored
Normal file
@@ -0,0 +1,153 @@
|
||||
package shared
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"fmt"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/mmcdole/goxpp"
|
||||
)
|
||||
|
||||
var (
|
||||
emailNameRgx = regexp.MustCompile(`^([^@]+@[^\s]+)\s+\(([^@]+)\)$`)
|
||||
nameEmailRgx = regexp.MustCompile(`^([^@]+)\s+\(([^@]+@[^)]+)\)$`)
|
||||
nameOnlyRgx = regexp.MustCompile(`^([^@()]+)$`)
|
||||
emailOnlyRgx = regexp.MustCompile(`^([^@()]+@[^@()]+)$`)
|
||||
|
||||
TruncatedEntity = errors.New("truncated entity")
|
||||
InvalidNumericReference = errors.New("invalid numeric reference")
|
||||
)
|
||||
|
||||
// ParseText is a helper function for parsing the text
|
||||
// from the current element of the XMLPullParser.
|
||||
// This function can handle parsing naked XML text from
|
||||
// an element.
|
||||
func ParseText(p *xpp.XMLPullParser) (string, error) {
|
||||
var text struct {
|
||||
Type string `xml:"type,attr"`
|
||||
InnerXML string `xml:",innerxml"`
|
||||
}
|
||||
|
||||
err := p.DecodeElement(&text)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
result := text.InnerXML
|
||||
result = strings.TrimSpace(result)
|
||||
|
||||
if strings.HasPrefix(result, "<![CDATA[") &&
|
||||
strings.HasSuffix(result, "]]>") {
|
||||
result = strings.TrimPrefix(result, "<![CDATA[")
|
||||
result = strings.TrimSuffix(result, "]]>")
|
||||
return result, nil
|
||||
}
|
||||
|
||||
return DecodeEntities(result)
|
||||
}
|
||||
|
||||
// DecodeEntities decodes escaped XML entities
|
||||
// in a string and returns the unescaped string
|
||||
func DecodeEntities(str string) (string, error) {
|
||||
data := []byte(str)
|
||||
buf := bytes.NewBuffer([]byte{})
|
||||
|
||||
for len(data) > 0 {
|
||||
// Find the next entity
|
||||
idx := bytes.IndexByte(data, '&')
|
||||
if idx == -1 {
|
||||
buf.Write(data)
|
||||
break
|
||||
}
|
||||
|
||||
// Write and skip everything before it
|
||||
buf.Write(data[:idx])
|
||||
data = data[idx+1:]
|
||||
|
||||
if len(data) == 0 {
|
||||
return "", TruncatedEntity
|
||||
}
|
||||
|
||||
// Find the end of the entity
|
||||
end := bytes.IndexByte(data, ';')
|
||||
if end == -1 {
|
||||
return "", TruncatedEntity
|
||||
}
|
||||
|
||||
if data[0] == '#' {
|
||||
// Numerical character reference
|
||||
var str string
|
||||
base := 10
|
||||
|
||||
if len(data) > 1 && data[1] == 'x' {
|
||||
str = string(data[2:end])
|
||||
base = 16
|
||||
} else {
|
||||
str = string(data[1:end])
|
||||
}
|
||||
|
||||
i, err := strconv.ParseUint(str, base, 32)
|
||||
if err != nil {
|
||||
return "", InvalidNumericReference
|
||||
}
|
||||
|
||||
buf.WriteRune(rune(i))
|
||||
} else {
|
||||
// Predefined entity
|
||||
name := string(data[:end])
|
||||
|
||||
var c byte
|
||||
switch name {
|
||||
case "lt":
|
||||
c = '<'
|
||||
case "gt":
|
||||
c = '>'
|
||||
case "quot":
|
||||
c = '"'
|
||||
case "apos":
|
||||
c = '\''
|
||||
case "amp":
|
||||
c = '&'
|
||||
default:
|
||||
return "", fmt.Errorf("unknown predefined "+
|
||||
"entity &%s;", name)
|
||||
}
|
||||
|
||||
buf.WriteByte(c)
|
||||
}
|
||||
|
||||
// Skip the entity
|
||||
data = data[end+1:]
|
||||
}
|
||||
|
||||
return buf.String(), nil
|
||||
}
|
||||
|
||||
// ParseNameAddress parses name/email strings commonly
|
||||
// found in RSS feeds of the format "Example Name (example@site.com)"
|
||||
// and other variations of this format.
|
||||
func ParseNameAddress(nameAddressText string) (name string, address string) {
|
||||
if nameAddressText == "" {
|
||||
return
|
||||
}
|
||||
|
||||
if emailNameRgx.MatchString(nameAddressText) {
|
||||
result := emailNameRgx.FindStringSubmatch(nameAddressText)
|
||||
address = result[1]
|
||||
name = result[2]
|
||||
} else if nameEmailRgx.MatchString(nameAddressText) {
|
||||
result := nameEmailRgx.FindStringSubmatch(nameAddressText)
|
||||
name = result[1]
|
||||
address = result[2]
|
||||
} else if nameOnlyRgx.MatchString(nameAddressText) {
|
||||
result := nameOnlyRgx.FindStringSubmatch(nameAddressText)
|
||||
name = result[1]
|
||||
} else if emailOnlyRgx.MatchString(nameAddressText) {
|
||||
result := emailOnlyRgx.FindStringSubmatch(nameAddressText)
|
||||
address = result[1]
|
||||
}
|
||||
return
|
||||
}
|
||||
258
vendor/github.com/mmcdole/gofeed/internal/shared/xmlbase.go
generated
vendored
Normal file
258
vendor/github.com/mmcdole/gofeed/internal/shared/xmlbase.go
generated
vendored
Normal file
@@ -0,0 +1,258 @@
|
||||
package shared
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"golang.org/x/net/html"
|
||||
"net/url"
|
||||
"strings"
|
||||
|
||||
"github.com/mmcdole/goxpp"
|
||||
)
|
||||
|
||||
var (
|
||||
// HTML attributes which contain URIs
|
||||
// https://pythonhosted.org/feedparser/resolving-relative-links.html
|
||||
// To catch every possible URI attribute is non-trivial:
|
||||
// https://stackoverflow.com/questions/2725156/complete-list-of-html-tag-attributes-which-have-a-url-value
|
||||
htmlURIAttrs = map[string]bool{
|
||||
"action": true,
|
||||
"background": true,
|
||||
"cite": true,
|
||||
"codebase": true,
|
||||
"data": true,
|
||||
"href": true,
|
||||
"poster": true,
|
||||
"profile": true,
|
||||
"scheme": true,
|
||||
"src": true,
|
||||
"uri": true,
|
||||
"usemap": true,
|
||||
}
|
||||
)
|
||||
|
||||
type urlStack []*url.URL
|
||||
|
||||
func (s *urlStack) push(u *url.URL) {
|
||||
*s = append([]*url.URL{u}, *s...)
|
||||
}
|
||||
|
||||
func (s *urlStack) pop() *url.URL {
|
||||
if s == nil || len(*s) == 0 {
|
||||
return nil
|
||||
}
|
||||
var top *url.URL
|
||||
top, *s = (*s)[0], (*s)[1:]
|
||||
return top
|
||||
}
|
||||
|
||||
func (s *urlStack) top() *url.URL {
|
||||
if s == nil || len(*s) == 0 {
|
||||
return nil
|
||||
}
|
||||
return (*s)[0]
|
||||
}
|
||||
|
||||
type XMLBase struct {
|
||||
stack urlStack
|
||||
URIAttrs map[string]bool
|
||||
}
|
||||
|
||||
// FindRoot iterates through the tokens of an xml document until
|
||||
// it encounters its first StartTag event. It returns an error
|
||||
// if it reaches EndDocument before finding a tag.
|
||||
func (b *XMLBase) FindRoot(p *xpp.XMLPullParser) (event xpp.XMLEventType, err error) {
|
||||
for {
|
||||
event, err = b.NextTag(p)
|
||||
if err != nil {
|
||||
return event, err
|
||||
}
|
||||
if event == xpp.StartTag {
|
||||
break
|
||||
}
|
||||
|
||||
if event == xpp.EndDocument {
|
||||
return event, fmt.Errorf("Failed to find root node before document end.")
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// XMLBase.NextTag iterates through the tokens until it reaches a StartTag or
|
||||
// EndTag It maintains the urlStack upon encountering StartTag and EndTags, so
|
||||
// that the top of the stack (accessible through the CurrentBase() and
|
||||
// CurrentBaseURL() methods) is the absolute base URI by which relative URIs
|
||||
// should be resolved.
|
||||
//
|
||||
// NextTag is similar to goxpp's NextTag method except it wont throw an error
|
||||
// if the next immediate token isnt a Start/EndTag. Instead, it will continue
|
||||
// to consume tokens until it hits a Start/EndTag or EndDocument.
|
||||
func (b *XMLBase) NextTag(p *xpp.XMLPullParser) (event xpp.XMLEventType, err error) {
|
||||
for {
|
||||
|
||||
if p.Event == xpp.EndTag {
|
||||
// Pop xml:base after each end tag
|
||||
b.pop()
|
||||
}
|
||||
|
||||
event, err = p.Next()
|
||||
if err != nil {
|
||||
return event, err
|
||||
}
|
||||
|
||||
if event == xpp.EndTag {
|
||||
break
|
||||
}
|
||||
|
||||
if event == xpp.StartTag {
|
||||
base := parseBase(p)
|
||||
err = b.push(base)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
err = b.resolveAttrs(p)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
break
|
||||
}
|
||||
|
||||
if event == xpp.EndDocument {
|
||||
return event, fmt.Errorf("Failed to find NextTag before reaching the end of the document.")
|
||||
}
|
||||
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func parseBase(p *xpp.XMLPullParser) string {
|
||||
xmlURI := "http://www.w3.org/XML/1998/namespace"
|
||||
for _, attr := range p.Attrs {
|
||||
if attr.Name.Local == "base" && attr.Name.Space == xmlURI {
|
||||
return attr.Value
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func (b *XMLBase) push(base string) error {
|
||||
newURL, err := url.Parse(base)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
topURL := b.CurrentBaseURL()
|
||||
if topURL != nil {
|
||||
newURL = topURL.ResolveReference(newURL)
|
||||
}
|
||||
b.stack.push(newURL)
|
||||
return nil
|
||||
}
|
||||
|
||||
// returns the popped base URL
|
||||
func (b *XMLBase) pop() string {
|
||||
url := b.stack.pop()
|
||||
if url != nil {
|
||||
return url.String()
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func (b *XMLBase) CurrentBaseURL() *url.URL {
|
||||
return b.stack.top()
|
||||
}
|
||||
|
||||
func (b *XMLBase) CurrentBase() string {
|
||||
if url := b.CurrentBaseURL(); url != nil {
|
||||
return url.String()
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// resolve the given string as a URL relative to current base
|
||||
func (b *XMLBase) ResolveURL(u string) (string, error) {
|
||||
if b.CurrentBase() == "" {
|
||||
return u, nil
|
||||
}
|
||||
|
||||
relURL, err := url.Parse(u)
|
||||
if err != nil {
|
||||
return u, err
|
||||
}
|
||||
curr := b.CurrentBaseURL()
|
||||
if curr.Path != "" && u != "" && curr.Path[len(curr.Path)-1] != '/' {
|
||||
// There's no reason someone would use a path in xml:base if they
|
||||
// didn't mean for it to be a directory
|
||||
curr.Path = curr.Path + "/"
|
||||
}
|
||||
absURL := b.CurrentBaseURL().ResolveReference(relURL)
|
||||
return absURL.String(), nil
|
||||
}
|
||||
|
||||
// resolve relative URI attributes according to xml:base
|
||||
func (b *XMLBase) resolveAttrs(p *xpp.XMLPullParser) error {
|
||||
for i, attr := range p.Attrs {
|
||||
lowerName := strings.ToLower(attr.Name.Local)
|
||||
if b.URIAttrs[lowerName] {
|
||||
absURL, err := b.ResolveURL(attr.Value)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
p.Attrs[i].Value = absURL
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Transforms html by resolving any relative URIs in attributes
|
||||
// if an error occurs during parsing or serialization, then the original string
|
||||
// is returned along with the error.
|
||||
func (b *XMLBase) ResolveHTML(relHTML string) (string, error) {
|
||||
if b.CurrentBase() == "" {
|
||||
return relHTML, nil
|
||||
}
|
||||
|
||||
htmlReader := strings.NewReader(relHTML)
|
||||
|
||||
doc, err := html.Parse(htmlReader)
|
||||
if err != nil {
|
||||
return relHTML, err
|
||||
}
|
||||
|
||||
var visit func(*html.Node)
|
||||
|
||||
// recursively traverse HTML resolving any relative URIs in attributes
|
||||
visit = func(n *html.Node) {
|
||||
if n.Type == html.ElementNode {
|
||||
for i, a := range n.Attr {
|
||||
if htmlURIAttrs[a.Key] {
|
||||
absVal, err := b.ResolveURL(a.Val)
|
||||
if err == nil {
|
||||
n.Attr[i].Val = absVal
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
for c := n.FirstChild; c != nil; c = c.NextSibling {
|
||||
visit(c)
|
||||
}
|
||||
}
|
||||
|
||||
visit(doc)
|
||||
var w bytes.Buffer
|
||||
err = html.Render(&w, doc)
|
||||
if err != nil {
|
||||
return relHTML, err
|
||||
}
|
||||
|
||||
// html.Render() always writes a complete html5 document, so strip the html
|
||||
// and body tags
|
||||
absHTML := w.String()
|
||||
absHTML = strings.TrimPrefix(absHTML, "<html><head></head><body>")
|
||||
absHTML = strings.TrimSuffix(absHTML, "</body></html>")
|
||||
|
||||
return absHTML, err
|
||||
}
|
||||
23
vendor/github.com/mmcdole/gofeed/internal/shared/xmlsanitizer.go
generated
vendored
Normal file
23
vendor/github.com/mmcdole/gofeed/internal/shared/xmlsanitizer.go
generated
vendored
Normal file
@@ -0,0 +1,23 @@
|
||||
package shared
|
||||
|
||||
import (
|
||||
"io"
|
||||
|
||||
"golang.org/x/text/transform"
|
||||
)
|
||||
|
||||
// NewXMLSanitizerReader creates an io.Reader that
|
||||
// wraps another io.Reader and removes illegal xml
|
||||
// characters from the io stream.
|
||||
func NewXMLSanitizerReader(xml io.Reader) io.Reader {
|
||||
isIllegal := func(r rune) bool {
|
||||
return !(r == 0x09 ||
|
||||
r == 0x0A ||
|
||||
r == 0x0D ||
|
||||
r >= 0x20 && r <= 0xDF77 ||
|
||||
r >= 0xE000 && r <= 0xFFFD ||
|
||||
r >= 0x10000 && r <= 0x10FFFF)
|
||||
}
|
||||
t := transform.Chain(transform.RemoveFunc(isIllegal))
|
||||
return transform.NewReader(xml, t)
|
||||
}
|
||||
Reference in New Issue
Block a user