diff --git a/rss/feed.go b/rss/feed.go new file mode 100644 index 0000000..f6f24db --- /dev/null +++ b/rss/feed.go @@ -0,0 +1,109 @@ +package rss + +import ( + "bytes" + "encoding/gob" + "errors" + "io/ioutil" + "net/http" + "regexp" + "time" + + "github.com/mmcdole/gofeed" +) + +type Feed struct { + Updated time.Time + Items []string + ItemFilter string + ContentFilter string + Source string +} + +func NewFeed(source, itemFilter, contentFilter string) (*Feed, error) { + if _, err := regexp.Compile(itemFilter); err != nil { + return nil, err + } + if _, err := regexp.Compile(contentFilter); err != nil { + return nil, err + } + f := &Feed{ + Items: []string{}, + ItemFilter: itemFilter, + ContentFilter: contentFilter, + Source: source, + } + if _, err := f.Update(); err != nil { + return nil, err + } + return f, errors.New("not implemented") +} + +func Deserialize(src []byte) (*Feed, error) { + buffer := bytes.NewBuffer(src) + dec := gob.NewDecoder(buffer) + var dst Feed + err := dec.Decode(&dst) + return &dst, err +} + +func (f *Feed) Serialize() ([]byte, error) { + var buffer bytes.Buffer + enc := gob.NewEncoder(&buffer) + err := enc.Encode(f) + return buffer.Bytes(), err +} + +func (f *Feed) Update() ([]*Item, error) { + resp, err := http.Get(f.Source) + if err != nil { + return nil, err + } + body, err := ioutil.ReadAll(resp.Body) + if err != nil { + return nil, err + } + parser := gofeed.NewParser() + feed, err := parser.Parse(bytes.NewBuffer(body)) + if err != nil { + return nil, err + } + return f.fromGofeed(feed) +} + +func (f *Feed) fromGofeed(feed *gofeed.Feed) ([]*Item, error) { + updated := feed.PublishedParsed + if updated == nil { + updated = feed.UpdatedParsed + } + if updated == nil && len(feed.Items) > 0 { + updated = gofeedItemTS(feed.Items[0]) + } + if updated == nil { + t := time.Now() + updated = &t + } + newitems, err := f.appendNewItems(feed.Items) + if err != nil { + return nil, err + } + f.Updated = *updated + return newitems, nil +} + +func (f *Feed) appendNewItems(items []*gofeed.Item) ([]*Item, error) { + newitems := []*Item{} + for i := range items { + t := gofeedItemTS(items[i]) + if t.Before(f.Updated) { + continue + } + if ok, _ := regexp.MatchString(f.ItemFilter, items[i].Title); !ok { + continue + } + item := fromGofeedItem(items[i], f.ContentFilter) + newitems = append(newitems, item) + f.Items = append(f.Items, item.Name) + } + return newitems, nil +} diff --git a/rss/item.go b/rss/item.go new file mode 100644 index 0000000..eefc4dd --- /dev/null +++ b/rss/item.go @@ -0,0 +1,68 @@ +package rss + +import ( + "html" + "regexp" + "strings" + "time" + + "github.com/mmcdole/gofeed" +) + +type Item struct { + Name string + Link string + Content string + TS time.Time +} + +func fromGofeedItem(gfitem *gofeed.Item, filter string) *Item { + item := &Item{ + Name: gfitem.Title, + Link: gfitem.Link, + Content: "", + TS: *gofeedItemTS(gfitem), + } + if filter == "" { + item.Content = gfitem.Content + return item + } + r := regexp.MustCompile(filter) + matches := r.FindAllString(gfitem.Content, -1) + content := strings.Join(matches, "\n
\n") + content = cleanImgTags(content, item.Link) + item.Content = content + return item +} + +func cleanImgTags(s, url string) string { + domain := regexp.MustCompile("(https?://)?(www\\.)?[a-zA-Z0-9]+\\.+[a-z]{2}[a-z]?").FindString(url) + reg := regexp.MustCompile("") + s = html.UnescapeString(s) + matches := reg.FindAllString(s, -1) + if len(matches) > 0 { + // get img src="..." and build + regImgSrc := regexp.MustCompile("src=\"[^\"]+\"") + for j := range matches { + theseMatches := regImgSrc.FindAllString(matches[j], -1) + for k := range theseMatches { + if strings.HasPrefix(theseMatches[k], "src=\"/") { + theseMatches[k] = "src=\"" + domain + theseMatches[k][5:] + } + theseMatches[k] = "" + } + s = strings.Replace(s, matches[j], strings.Join(theseMatches, "
"), 1) + } + } + return s +} + +func gofeedItemTS(gfitem *gofeed.Item) *time.Time { + var t time.Time + if gfitem.UpdatedParsed != nil { + t = *gfitem.UpdatedParsed + } else if gfitem.PublishedParsed != nil { + t = *gfitem.PublishedParsed + } + return &t +} diff --git a/rss/item_test.go b/rss/item_test.go new file mode 100644 index 0000000..ce134fc --- /dev/null +++ b/rss/item_test.go @@ -0,0 +1,61 @@ +package rss + +import ( + "testing" + + "github.com/mmcdole/gofeed" +) + +func Test_RSSItem(t *testing.T) { + cases := []struct { + input gofeed.Item + filter string + output Item + }{ + { + input: gofeed.Item{ + Title: "a", + Link: "b", + Content: "", + }, + filter: "", + output: Item{ + Name: "a", + Link: "b", + Content: "", + }, + }, + { + input: gofeed.Item{ + Title: "a", + Link: "b", + Content: `x y `, + }, + filter: "[a-z]*", + output: Item{ + Name: "a", + Link: "b", + Content: "x\n
\ny", + }, + }, + { + input: gofeed.Item{ + Title: "a", + Link: "b", + Content: "x y", + }, + filter: "[a-z]*", + output: Item{ + Name: "a", + Link: "b", + Content: "x\n
\ny", + }, + }, + } + for _, c := range cases { + output := fromGofeedItem(&c.input, c.filter) + if *output != c.output { + t.Errorf("failed to convert gofeed: wanted %v, got %v", c.output, *output) + } + } +}