RSS implemented and tested

This commit is contained in:
Bel LaPointe
2018-10-08 10:02:34 -06:00
parent 24e30a7eee
commit 49d95c150e
5 changed files with 212 additions and 48 deletions

View File

@@ -1,7 +1,10 @@
package rss
import (
"fmt"
"html"
"io/ioutil"
"net/http"
"regexp"
"strings"
"time"
@@ -16,6 +19,19 @@ type Item struct {
TS time.Time
}
func (item *Item) String() string {
return fmt.Sprintf("Name %v, Link %v, Content %q, TS %v",
item.Name,
item.Link,
item.Content,
item.TS.Local(),
)
}
func (item *Item) ID() string {
return strings.Join(regexp.MustCompile("[a-zA-Z0-9]*").FindAllString(item.Link, -1), "_")
}
func fromGofeedItem(gfitem *gofeed.Item, filter string) *Item {
item := &Item{
Name: gfitem.Title,
@@ -23,35 +39,48 @@ func fromGofeedItem(gfitem *gofeed.Item, filter string) *Item {
Content: "",
TS: *gofeedItemTS(gfitem),
}
if filter == "" {
item.Content = gfitem.Content
return item
content := gfitem.Content
if content == "" {
content = contentFromLink(item.Link)
}
r := regexp.MustCompile(filter)
matches := r.FindAllString(gfitem.Content, -1)
content := strings.Join(matches, "\n<br>\n")
content = cleanImgTags(content, item.Link)
content = strings.Replace(content, "\n", "", -1)
if filter != "" {
r := regexp.MustCompile(filter)
matches := r.FindAllString(content, -1)
content = strings.Join(matches, "<br>")
}
content = cleanImgTags(content)
item.Content = content
return item
}
func cleanImgTags(s, url string) string {
domain := regexp.MustCompile("(https?://)?(www\\.)?[a-zA-Z0-9]+\\.+[a-z]{2}[a-z]?").FindString(url)
func contentFromLink(link string) string {
resp, err := http.Get(link)
if err != nil {
return ""
}
defer resp.Body.Close()
b, err := ioutil.ReadAll(resp.Body)
if err != nil {
return ""
}
return string(b)
}
func cleanImgTags(s string) string {
reg := regexp.MustCompile("<img.+?/(img)?>")
s = html.UnescapeString(s)
matches := reg.FindAllString(s, -1)
if len(matches) > 0 {
// get img src="..." and build
regImgSrc := regexp.MustCompile("src=\"[^\"]+\"")
regImgSrc := regexp.MustCompile("src=\".*?\"")
for j := range matches {
theseMatches := regImgSrc.FindAllString(matches[j], -1)
for k := range theseMatches {
if strings.HasPrefix(theseMatches[k], "src=\"/") {
theseMatches[k] = "src=\"" + domain + theseMatches[k][5:]
}
theseMatches[k] = "<img " + theseMatches[k] + " />"
imgSrc := regImgSrc.FindString(matches[j])
replacement := matches[j]
if imgSrc != "" {
replacement = "<img " + imgSrc + "/>"
}
s = strings.Replace(s, matches[j], strings.Join(theseMatches, "<br>"), 1)
s = strings.Replace(s, matches[j], replacement, 1)
}
}
return s