feed.Fetch parses and matches many fields

This commit is contained in:
Bel LaPointe
2025-04-27 12:36:01 -06:00
parent 1fed2d648f
commit 8b67437fd1
6 changed files with 628 additions and 8 deletions

View File

@@ -3,9 +3,14 @@ package feeds
import (
"context"
"fmt"
"io"
"regexp"
"slices"
"sort"
"strings"
"time"
"github.com/jaytaylor/html2text"
"github.com/mmcdole/gofeed"
"github.com/robfig/cron/v3"
)
@@ -31,5 +36,51 @@ func (feed Feed) ShouldExecute() (bool, error) {
}
func (feed Feed) Fetch(ctx context.Context) (Items, error) {
return nil, io.EOF
gfeed, err := gofeed.NewParser().ParseURLWithContext(feed.Version.URL, ctx)
if err != nil {
return nil, err
}
sort.Sort(gfeed)
matcher := regexp.MustCompile(feed.Version.Pattern)
result := make(Items, 0, len(gfeed.Items))
for _, gitem := range gfeed.Items {
if matches := slices.DeleteFunc(append([]string{
gitem.Title,
gitem.Description,
gitem.Content,
gitem.Link,
gitem.Author.Name,
gitem.Author.Email,
}, gitem.Links...), func(s string) bool {
return !matcher.MatchString(s)
}); len(matches) == 0 {
continue
}
preview, _ := html2text.FromString(gitem.Description)
body, _ := html2text.FromString(gitem.Content)
if body == "" {
body = preview
if len(preview) > 53 {
preview = preview[:50] + "..."
}
}
links := slices.DeleteFunc(append([]string{gitem.Link}, gitem.Links...), func(s string) bool {
return strings.TrimSpace(s) == ""
})
slices.Sort(links)
links = slices.Compact(links)
result = append(result, Item{
Title: gitem.Title,
Links: links,
Preview: preview,
Body: body,
})
}
return result, nil
}