115 lines
2.3 KiB
Go
115 lines
2.3 KiB
Go
package rss
|
|
|
|
import (
|
|
"bytes"
|
|
"encoding/gob"
|
|
"fmt"
|
|
"html"
|
|
"io/ioutil"
|
|
"net/http"
|
|
"regexp"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/mmcdole/gofeed"
|
|
)
|
|
|
|
type Item struct {
|
|
Name string
|
|
Link string
|
|
Content string
|
|
TS time.Time
|
|
}
|
|
|
|
func (item *Item) String() string {
|
|
return fmt.Sprintf("Name %v, Link %v, Content %q, TS %v",
|
|
item.Name,
|
|
item.Link,
|
|
item.Content,
|
|
item.TS.Local(),
|
|
)
|
|
}
|
|
|
|
func (item *Item) ID() string {
|
|
return item.TS.UTC().Format("20060102_") + strings.Join(regexp.MustCompile("[a-zA-Z0-9]*").FindAllString(item.Link, -1), "_")
|
|
}
|
|
|
|
func DeserializeItem(src []byte) (*Item, error) {
|
|
buffer := bytes.NewBuffer(src)
|
|
dec := gob.NewDecoder(buffer)
|
|
var dst Item
|
|
err := dec.Decode(&dst)
|
|
return &dst, err
|
|
}
|
|
|
|
func (item *Item) Serialize() ([]byte, error) {
|
|
var buffer bytes.Buffer
|
|
enc := gob.NewEncoder(&buffer)
|
|
err := enc.Encode(item)
|
|
return buffer.Bytes(), err
|
|
}
|
|
|
|
func fromGofeedItem(gfitem *gofeed.Item, filter string) *Item {
|
|
item := &Item{
|
|
Name: gfitem.Title,
|
|
Link: gfitem.Link,
|
|
Content: "",
|
|
TS: *gofeedItemTS(gfitem),
|
|
}
|
|
content := gfitem.Content
|
|
if content == "" {
|
|
content = contentFromLink(item.Link)
|
|
}
|
|
content = strings.Replace(content, "\n", "", -1)
|
|
if filter != "" {
|
|
r := regexp.MustCompile(filter)
|
|
matches := r.FindAllString(content, -1)
|
|
content = strings.Join(matches, "<br>")
|
|
}
|
|
content = cleanImgTags(content)
|
|
item.Content = content
|
|
return item
|
|
}
|
|
|
|
func contentFromLink(link string) string {
|
|
resp, err := http.Get(link)
|
|
if err != nil {
|
|
return ""
|
|
}
|
|
defer resp.Body.Close()
|
|
b, err := ioutil.ReadAll(resp.Body)
|
|
if err != nil {
|
|
return ""
|
|
}
|
|
return string(b)
|
|
}
|
|
|
|
func cleanImgTags(s string) string {
|
|
reg := regexp.MustCompile("<img.+?/(img)?>")
|
|
s = html.UnescapeString(s)
|
|
matches := reg.FindAllString(s, -1)
|
|
if len(matches) > 0 {
|
|
// get img src="..." and build
|
|
regImgSrc := regexp.MustCompile("src=\".*?\"")
|
|
for j := range matches {
|
|
imgSrc := regImgSrc.FindString(matches[j])
|
|
replacement := matches[j]
|
|
if imgSrc != "" {
|
|
replacement = "<img " + imgSrc + "/>"
|
|
}
|
|
s = strings.Replace(s, matches[j], replacement, 1)
|
|
}
|
|
}
|
|
return s
|
|
}
|
|
|
|
func gofeedItemTS(gfitem *gofeed.Item) *time.Time {
|
|
var t time.Time
|
|
if gfitem.UpdatedParsed != nil {
|
|
t = *gfitem.UpdatedParsed
|
|
} else if gfitem.PublishedParsed != nil {
|
|
t = *gfitem.PublishedParsed
|
|
}
|
|
return &t
|
|
}
|