Initial rss with partially tested items
parent
cfb31cf6a5
commit
eeb577dda8
|
|
@ -0,0 +1,109 @@
|
|||
package rss
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/gob"
|
||||
"errors"
|
||||
"io/ioutil"
|
||||
"net/http"
|
||||
"regexp"
|
||||
"time"
|
||||
|
||||
"github.com/mmcdole/gofeed"
|
||||
)
|
||||
|
||||
type Feed struct {
|
||||
Updated time.Time
|
||||
Items []string
|
||||
ItemFilter string
|
||||
ContentFilter string
|
||||
Source string
|
||||
}
|
||||
|
||||
func NewFeed(source, itemFilter, contentFilter string) (*Feed, error) {
|
||||
if _, err := regexp.Compile(itemFilter); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if _, err := regexp.Compile(contentFilter); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
f := &Feed{
|
||||
Items: []string{},
|
||||
ItemFilter: itemFilter,
|
||||
ContentFilter: contentFilter,
|
||||
Source: source,
|
||||
}
|
||||
if _, err := f.Update(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return f, errors.New("not implemented")
|
||||
}
|
||||
|
||||
func Deserialize(src []byte) (*Feed, error) {
|
||||
buffer := bytes.NewBuffer(src)
|
||||
dec := gob.NewDecoder(buffer)
|
||||
var dst Feed
|
||||
err := dec.Decode(&dst)
|
||||
return &dst, err
|
||||
}
|
||||
|
||||
func (f *Feed) Serialize() ([]byte, error) {
|
||||
var buffer bytes.Buffer
|
||||
enc := gob.NewEncoder(&buffer)
|
||||
err := enc.Encode(f)
|
||||
return buffer.Bytes(), err
|
||||
}
|
||||
|
||||
func (f *Feed) Update() ([]*Item, error) {
|
||||
resp, err := http.Get(f.Source)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
body, err := ioutil.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
parser := gofeed.NewParser()
|
||||
feed, err := parser.Parse(bytes.NewBuffer(body))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return f.fromGofeed(feed)
|
||||
}
|
||||
|
||||
func (f *Feed) fromGofeed(feed *gofeed.Feed) ([]*Item, error) {
|
||||
updated := feed.PublishedParsed
|
||||
if updated == nil {
|
||||
updated = feed.UpdatedParsed
|
||||
}
|
||||
if updated == nil && len(feed.Items) > 0 {
|
||||
updated = gofeedItemTS(feed.Items[0])
|
||||
}
|
||||
if updated == nil {
|
||||
t := time.Now()
|
||||
updated = &t
|
||||
}
|
||||
newitems, err := f.appendNewItems(feed.Items)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
f.Updated = *updated
|
||||
return newitems, nil
|
||||
}
|
||||
|
||||
func (f *Feed) appendNewItems(items []*gofeed.Item) ([]*Item, error) {
|
||||
newitems := []*Item{}
|
||||
for i := range items {
|
||||
t := gofeedItemTS(items[i])
|
||||
if t.Before(f.Updated) {
|
||||
continue
|
||||
}
|
||||
if ok, _ := regexp.MatchString(f.ItemFilter, items[i].Title); !ok {
|
||||
continue
|
||||
}
|
||||
item := fromGofeedItem(items[i], f.ContentFilter)
|
||||
newitems = append(newitems, item)
|
||||
f.Items = append(f.Items, item.Name)
|
||||
}
|
||||
return newitems, nil
|
||||
}
|
||||
|
|
@ -0,0 +1,68 @@
|
|||
package rss
|
||||
|
||||
import (
|
||||
"html"
|
||||
"regexp"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/mmcdole/gofeed"
|
||||
)
|
||||
|
||||
type Item struct {
|
||||
Name string
|
||||
Link string
|
||||
Content string
|
||||
TS time.Time
|
||||
}
|
||||
|
||||
func fromGofeedItem(gfitem *gofeed.Item, filter string) *Item {
|
||||
item := &Item{
|
||||
Name: gfitem.Title,
|
||||
Link: gfitem.Link,
|
||||
Content: "",
|
||||
TS: *gofeedItemTS(gfitem),
|
||||
}
|
||||
if filter == "" {
|
||||
item.Content = gfitem.Content
|
||||
return item
|
||||
}
|
||||
r := regexp.MustCompile(filter)
|
||||
matches := r.FindAllString(gfitem.Content, -1)
|
||||
content := strings.Join(matches, "\n<br>\n")
|
||||
content = cleanImgTags(content, item.Link)
|
||||
item.Content = content
|
||||
return item
|
||||
}
|
||||
|
||||
func cleanImgTags(s, url string) string {
|
||||
domain := regexp.MustCompile("(https?://)?(www\\.)?[a-zA-Z0-9]+\\.+[a-z]{2}[a-z]?").FindString(url)
|
||||
reg := regexp.MustCompile("<img.+?/(img)?>")
|
||||
s = html.UnescapeString(s)
|
||||
matches := reg.FindAllString(s, -1)
|
||||
if len(matches) > 0 {
|
||||
// get img src="..." and build
|
||||
regImgSrc := regexp.MustCompile("src=\"[^\"]+\"")
|
||||
for j := range matches {
|
||||
theseMatches := regImgSrc.FindAllString(matches[j], -1)
|
||||
for k := range theseMatches {
|
||||
if strings.HasPrefix(theseMatches[k], "src=\"/") {
|
||||
theseMatches[k] = "src=\"" + domain + theseMatches[k][5:]
|
||||
}
|
||||
theseMatches[k] = "<img " + theseMatches[k] + " />"
|
||||
}
|
||||
s = strings.Replace(s, matches[j], strings.Join(theseMatches, "<br>"), 1)
|
||||
}
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
func gofeedItemTS(gfitem *gofeed.Item) *time.Time {
|
||||
var t time.Time
|
||||
if gfitem.UpdatedParsed != nil {
|
||||
t = *gfitem.UpdatedParsed
|
||||
} else if gfitem.PublishedParsed != nil {
|
||||
t = *gfitem.PublishedParsed
|
||||
}
|
||||
return &t
|
||||
}
|
||||
|
|
@ -0,0 +1,61 @@
|
|||
package rss
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/mmcdole/gofeed"
|
||||
)
|
||||
|
||||
func Test_RSSItem(t *testing.T) {
|
||||
cases := []struct {
|
||||
input gofeed.Item
|
||||
filter string
|
||||
output Item
|
||||
}{
|
||||
{
|
||||
input: gofeed.Item{
|
||||
Title: "a",
|
||||
Link: "b",
|
||||
Content: "",
|
||||
},
|
||||
filter: "",
|
||||
output: Item{
|
||||
Name: "a",
|
||||
Link: "b",
|
||||
Content: "",
|
||||
},
|
||||
},
|
||||
{
|
||||
input: gofeed.Item{
|
||||
Title: "a",
|
||||
Link: "b",
|
||||
Content: `x y <img src="asdf"></img>`,
|
||||
},
|
||||
filter: "[a-z]*",
|
||||
output: Item{
|
||||
Name: "a",
|
||||
Link: "b",
|
||||
Content: "x\n<br>\ny",
|
||||
},
|
||||
},
|
||||
{
|
||||
input: gofeed.Item{
|
||||
Title: "a",
|
||||
Link: "b",
|
||||
Content: "x y",
|
||||
},
|
||||
filter: "[a-z]*",
|
||||
output: Item{
|
||||
Name: "a",
|
||||
Link: "b",
|
||||
Content: "x\n<br>\ny",
|
||||
},
|
||||
},
|
||||
}
|
||||
for _, c := range cases {
|
||||
output := fromGofeedItem(&c.input, c.filter)
|
||||
if *output != c.output {
|
||||
t.Errorf("failed to convert gofeed: wanted %v, got %v", c.output, *output)
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue