package rss import ( "fmt" "html" "io/ioutil" "net/http" "regexp" "strings" "time" "github.com/mmcdole/gofeed" ) type Item struct { Name string Link string Content string TS time.Time } func (item *Item) String() string { return fmt.Sprintf("Name %v, Link %v, Content %q, TS %v", item.Name, item.Link, item.Content, item.TS.Local(), ) } func (item *Item) ID() string { return strings.Join(regexp.MustCompile("[a-zA-Z0-9]*").FindAllString(item.Link, -1), "_") } func fromGofeedItem(gfitem *gofeed.Item, filter string) *Item { item := &Item{ Name: gfitem.Title, Link: gfitem.Link, Content: "", TS: *gofeedItemTS(gfitem), } content := gfitem.Content if content == "" { content = contentFromLink(item.Link) } content = strings.Replace(content, "\n", "", -1) if filter != "" { r := regexp.MustCompile(filter) matches := r.FindAllString(content, -1) content = strings.Join(matches, "
") } content = cleanImgTags(content) item.Content = content return item } func contentFromLink(link string) string { resp, err := http.Get(link) if err != nil { return "" } defer resp.Body.Close() b, err := ioutil.ReadAll(resp.Body) if err != nil { return "" } return string(b) } func cleanImgTags(s string) string { reg := regexp.MustCompile("") s = html.UnescapeString(s) matches := reg.FindAllString(s, -1) if len(matches) > 0 { // get img src="..." and build regImgSrc := regexp.MustCompile("src=\".*?\"") for j := range matches { imgSrc := regImgSrc.FindString(matches[j]) replacement := matches[j] if imgSrc != "" { replacement = "" } s = strings.Replace(s, matches[j], replacement, 1) } } return s } func gofeedItemTS(gfitem *gofeed.Item) *time.Time { var t time.Time if gfitem.UpdatedParsed != nil { t = *gfitem.UpdatedParsed } else if gfitem.PublishedParsed != nil { t = *gfitem.PublishedParsed } return &t }