package rss
import (
"bytes"
"encoding/gob"
"fmt"
"html"
"io/ioutil"
"net/http"
"regexp"
"strings"
"time"
"github.com/mmcdole/gofeed"
)
type Item struct {
Name string
Link string
Content string
TS time.Time
}
func (item *Item) String() string {
return fmt.Sprintf("Name %v, Link %v, Content %q, TS %v",
item.Name,
item.Link,
item.Content,
item.TS.Local(),
)
}
func (item *Item) ID() string {
return item.TS.UTC().Format("20060102_") + strings.Join(regexp.MustCompile("[a-zA-Z0-9]*").FindAllString(item.Link, -1), "_")
}
func DeserializeItem(src []byte) (*Item, error) {
buffer := bytes.NewBuffer(src)
dec := gob.NewDecoder(buffer)
var dst Item
err := dec.Decode(&dst)
return &dst, err
}
func (item *Item) Serialize() ([]byte, error) {
var buffer bytes.Buffer
enc := gob.NewEncoder(&buffer)
err := enc.Encode(item)
return buffer.Bytes(), err
}
func fromGofeedItem(gfitem *gofeed.Item, filter string) *Item {
item := &Item{
Name: gfitem.Title,
Link: gfitem.Link,
Content: "",
TS: *gofeedItemTS(gfitem),
}
content := gfitem.Content
if content == "" {
content = contentFromLink(item.Link)
}
if filter != "" {
r := regexp.MustCompile(filter)
matches := r.FindAllString(content, -1)
content = strings.Join(matches, "
")
}
content = cleanImgTags(content)
item.Content = content
return item
}
func contentFromLink(link string) string {
resp, err := http.Get(link)
if err != nil {
return ""
}
defer resp.Body.Close()
b, err := ioutil.ReadAll(resp.Body)
if err != nil {
return ""
}
protocol := strings.Split(link, ":")[0] + "://"
if !strings.HasPrefix(protocol, "http") {
protocol = ""
}
content := strings.Replace(string(b), "\n", "", -1)
// fix all //img.link/something.jpg
badSrc := regexp.MustCompile("\"\\/\\/")
content = badSrc.ReplaceAllString(content, "\""+protocol)
// fix all href="/path/to"
host := protocol + strings.Split(link[len(protocol):], "/")[0] + "/"
badHref := regexp.MustCompile("href=\"\\/")
content = badHref.ReplaceAllString(content, "href=\""+host)
// fix all src="/path/to"
badPathSrc := regexp.MustCompile("src=\"\\/")
content = badPathSrc.ReplaceAllString(content, "src=\""+host)
return content
}
func cleanImgTags(s string) string {
reg := regexp.MustCompile("")
s = html.UnescapeString(s)
matches := reg.FindAllString(s, -1)
if len(matches) > 0 {
// get img src="..." and build
regImgSrc := regexp.MustCompile("src=\".*?\"")
for j := range matches {
imgSrc := regImgSrc.FindString(matches[j])
replacement := matches[j]
if imgSrc != "" {
replacement = "
"
}
s = strings.Replace(s, matches[j], replacement, 1)
}
}
return s
}
func gofeedItemTS(gfitem *gofeed.Item) *time.Time {
var t time.Time
if gfitem.UpdatedParsed != nil {
t = *gfitem.UpdatedParsed
} else if gfitem.PublishedParsed != nil {
t = *gfitem.PublishedParsed
}
return &t
}