Initial rss with partially tested items

master
Bel LaPointe 2018-10-07 23:06:21 -06:00
parent cfb31cf6a5
commit eeb577dda8
3 changed files with 238 additions and 0 deletions

109
rss/feed.go Normal file
View File

@ -0,0 +1,109 @@
package rss
import (
"bytes"
"encoding/gob"
"errors"
"io/ioutil"
"net/http"
"regexp"
"time"
"github.com/mmcdole/gofeed"
)
type Feed struct {
Updated time.Time
Items []string
ItemFilter string
ContentFilter string
Source string
}
func NewFeed(source, itemFilter, contentFilter string) (*Feed, error) {
if _, err := regexp.Compile(itemFilter); err != nil {
return nil, err
}
if _, err := regexp.Compile(contentFilter); err != nil {
return nil, err
}
f := &Feed{
Items: []string{},
ItemFilter: itemFilter,
ContentFilter: contentFilter,
Source: source,
}
if _, err := f.Update(); err != nil {
return nil, err
}
return f, errors.New("not implemented")
}
func Deserialize(src []byte) (*Feed, error) {
buffer := bytes.NewBuffer(src)
dec := gob.NewDecoder(buffer)
var dst Feed
err := dec.Decode(&dst)
return &dst, err
}
func (f *Feed) Serialize() ([]byte, error) {
var buffer bytes.Buffer
enc := gob.NewEncoder(&buffer)
err := enc.Encode(f)
return buffer.Bytes(), err
}
func (f *Feed) Update() ([]*Item, error) {
resp, err := http.Get(f.Source)
if err != nil {
return nil, err
}
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
return nil, err
}
parser := gofeed.NewParser()
feed, err := parser.Parse(bytes.NewBuffer(body))
if err != nil {
return nil, err
}
return f.fromGofeed(feed)
}
func (f *Feed) fromGofeed(feed *gofeed.Feed) ([]*Item, error) {
updated := feed.PublishedParsed
if updated == nil {
updated = feed.UpdatedParsed
}
if updated == nil && len(feed.Items) > 0 {
updated = gofeedItemTS(feed.Items[0])
}
if updated == nil {
t := time.Now()
updated = &t
}
newitems, err := f.appendNewItems(feed.Items)
if err != nil {
return nil, err
}
f.Updated = *updated
return newitems, nil
}
func (f *Feed) appendNewItems(items []*gofeed.Item) ([]*Item, error) {
newitems := []*Item{}
for i := range items {
t := gofeedItemTS(items[i])
if t.Before(f.Updated) {
continue
}
if ok, _ := regexp.MatchString(f.ItemFilter, items[i].Title); !ok {
continue
}
item := fromGofeedItem(items[i], f.ContentFilter)
newitems = append(newitems, item)
f.Items = append(f.Items, item.Name)
}
return newitems, nil
}

68
rss/item.go Normal file
View File

@ -0,0 +1,68 @@
package rss
import (
"html"
"regexp"
"strings"
"time"
"github.com/mmcdole/gofeed"
)
type Item struct {
Name string
Link string
Content string
TS time.Time
}
func fromGofeedItem(gfitem *gofeed.Item, filter string) *Item {
item := &Item{
Name: gfitem.Title,
Link: gfitem.Link,
Content: "",
TS: *gofeedItemTS(gfitem),
}
if filter == "" {
item.Content = gfitem.Content
return item
}
r := regexp.MustCompile(filter)
matches := r.FindAllString(gfitem.Content, -1)
content := strings.Join(matches, "\n<br>\n")
content = cleanImgTags(content, item.Link)
item.Content = content
return item
}
func cleanImgTags(s, url string) string {
domain := regexp.MustCompile("(https?://)?(www\\.)?[a-zA-Z0-9]+\\.+[a-z]{2}[a-z]?").FindString(url)
reg := regexp.MustCompile("<img.+?/(img)?>")
s = html.UnescapeString(s)
matches := reg.FindAllString(s, -1)
if len(matches) > 0 {
// get img src="..." and build
regImgSrc := regexp.MustCompile("src=\"[^\"]+\"")
for j := range matches {
theseMatches := regImgSrc.FindAllString(matches[j], -1)
for k := range theseMatches {
if strings.HasPrefix(theseMatches[k], "src=\"/") {
theseMatches[k] = "src=\"" + domain + theseMatches[k][5:]
}
theseMatches[k] = "<img " + theseMatches[k] + " />"
}
s = strings.Replace(s, matches[j], strings.Join(theseMatches, "<br>"), 1)
}
}
return s
}
func gofeedItemTS(gfitem *gofeed.Item) *time.Time {
var t time.Time
if gfitem.UpdatedParsed != nil {
t = *gfitem.UpdatedParsed
} else if gfitem.PublishedParsed != nil {
t = *gfitem.PublishedParsed
}
return &t
}

61
rss/item_test.go Normal file
View File

@ -0,0 +1,61 @@
package rss
import (
"testing"
"github.com/mmcdole/gofeed"
)
func Test_RSSItem(t *testing.T) {
cases := []struct {
input gofeed.Item
filter string
output Item
}{
{
input: gofeed.Item{
Title: "a",
Link: "b",
Content: "",
},
filter: "",
output: Item{
Name: "a",
Link: "b",
Content: "",
},
},
{
input: gofeed.Item{
Title: "a",
Link: "b",
Content: `x y <img src="asdf"></img>`,
},
filter: "[a-z]*",
output: Item{
Name: "a",
Link: "b",
Content: "x\n<br>\ny",
},
},
{
input: gofeed.Item{
Title: "a",
Link: "b",
Content: "x y",
},
filter: "[a-z]*",
output: Item{
Name: "a",
Link: "b",
Content: "x\n<br>\ny",
},
},
}
for _, c := range cases {
output := fromGofeedItem(&c.input, c.filter)
if *output != c.output {
t.Errorf("failed to convert gofeed: wanted %v, got %v", c.output, *output)
}
}
}