Rssmon2/rss/item.go

package rss

import (
	"html"
	"regexp"
	"strings"
	"time"

	"github.com/mmcdole/gofeed"
)

type Item struct {
	Name    string
	Link    string
	Content string
	TS      time.Time
}

func fromGofeedItem(gfitem *gofeed.Item, filter string) *Item {
	item := &Item{
		Name:    gfitem.Title,
		Link:    gfitem.Link,
		Content: "",
		TS:      *gofeedItemTS(gfitem),
	}
	if filter == "" {
		item.Content = gfitem.Content
		return item
	}
	r := regexp.MustCompile(filter)
	matches := r.FindAllString(gfitem.Content, -1)
	content := strings.Join(matches, "\n<br>\n")
	content = cleanImgTags(content, item.Link)
	item.Content = content
	return item
}

func cleanImgTags(s, url string) string {
	domain := regexp.MustCompile("(https?://)?(www\\.)?[a-zA-Z0-9]+\\.+[a-z]{2}[a-z]?").FindString(url)
	reg := regexp.MustCompile("<img.+?/(img)?>")
	s = html.UnescapeString(s)
	matches := reg.FindAllString(s, -1)
	if len(matches) > 0 {
		// get img src="..." and build
		regImgSrc := regexp.MustCompile("src=\"[^\"]+\"")
		for j := range matches {
			theseMatches := regImgSrc.FindAllString(matches[j], -1)
			for k := range theseMatches {
				if strings.HasPrefix(theseMatches[k], "src=\"/") {
					theseMatches[k] = "src=\"" + domain + theseMatches[k][5:]
				}
				theseMatches[k] = "<img " + theseMatches[k] + " />"
			}
			s = strings.Replace(s, matches[j], strings.Join(theseMatches, "<br>"), 1)
		}
	}
	return s
}

func gofeedItemTS(gfitem *gofeed.Item) *time.Time {
	var t time.Time
	if gfitem.UpdatedParsed != nil {
		t = *gfitem.UpdatedParsed
	} else if gfitem.PublishedParsed != nil {
		t = *gfitem.PublishedParsed
	}
	return &t
}