Files
show-rss/src/feeds/http.go
2025-05-05 22:41:37 -06:00

179 lines
3.5 KiB
Go

package feeds
import (
"context"
"fmt"
"io"
"net/http"
"net/url"
"regexp"
"slices"
"sort"
"strings"
"time"
"github.com/jaytaylor/html2text"
"github.com/mmcdole/gofeed"
"github.com/robfig/cron/v3"
)
var (
ProxyU = func() *url.URL {
u, err := url.Parse("socks5://wghttp.inhome.blapointe.com:63114")
if err != nil {
panic(err)
}
return u
}()
)
func (feed Feed) ShouldExecute() (bool, error) {
if !feed.Entry.Deleted.IsZero() {
return false, nil
}
schedule, err := cron.NewParser(
cron.SecondOptional |
cron.Minute |
cron.Hour |
cron.Dom |
cron.Month |
cron.Dow |
cron.Descriptor,
).Parse(feed.Version.Cron)
if err != nil {
return false, fmt.Errorf("illegal cron %q", feed.Version.Cron)
}
next := schedule.Next(feed.Execution.Executed)
return time.Now().After(next), nil
}
func (feed Feed) Fetch(ctx context.Context) (Items, error) {
u, err := feed.FetchURL()
if err != nil {
return nil, err
}
resp, err := proxyFetch(ctx, u.String())
if err != nil {
return nil, err
}
gfeed, err := gofeed.NewParser().ParseString(resp)
if err != nil {
return nil, err
}
sort.Sort(gfeed)
matcher := regexp.MustCompile(feed.Version.Pattern)
result := make(Items, 0, len(gfeed.Items))
for _, gitem := range gfeed.Items {
if gitem.Author == nil {
gitem.Author = &gofeed.Person{}
}
if matches := slices.DeleteFunc(append([]string{
gitem.Title,
gitem.Description,
gitem.Content,
gitem.Link,
gitem.Author.Name,
gitem.Author.Email,
}, gitem.Links...), func(s string) bool {
return !matcher.MatchString(s)
}); len(matches) == 0 {
continue
}
preview, _ := html2text.FromString(gitem.Description)
body, _ := html2text.FromString(gitem.Content)
if body == "" {
body = preview
if len(preview) > 53 {
preview = preview[:50] + "..."
}
}
links := slices.DeleteFunc(append([]string{gitem.Link}, gitem.Links...), func(s string) bool {
return strings.TrimSpace(s) == ""
})
slices.Sort(links)
links = slices.Compact(links)
var link string
if len(links) > 0 {
link = links[0]
}
result = append(result, Item{
Title: gitem.Title,
Link: link,
Links: links,
Preview: preview,
Body: body,
})
}
return result, nil
}
func proxyFetch(ctx context.Context, u string) (string, error) {
req, err := http.NewRequest(http.MethodGet, u, nil)
if err != nil {
return "", err
}
c := http.Client{Timeout: time.Minute}
if ProxyU != nil {
c.Transport = &http.Transport{Proxy: http.ProxyURL(ProxyU)}
}
resp, err := c.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
defer io.Copy(io.Discard, resp.Body)
b, _ := io.ReadAll(resp.Body)
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("failed fetch %s %s: (%d) %s", req.Method, req.URL.String(), resp.StatusCode, b)
}
return string(b), nil
}
func (feed Feed) FetchURL() (*url.URL, error) {
return feed.Version.FetchURL()
}
func (version Version) FetchURL() (*url.URL, error) {
u, err := url.Parse(version.URL)
if err != nil {
return nil, err
}
switch u.Scheme {
case "nyaa": // `nyaa://?q=A B` to `https://nyaa.si/?page=rss&q=A%20B&c=0_0&f=0`
q := u.Query()
if q.Get("q") == "" {
return nil, fmt.Errorf("invalid nyaa:// (%s): no ?q", version.URL)
}
q.Set("page", "rss")
q.Set("c", "0_0")
q.Set("f", "0")
u.RawQuery = q.Encode()
u.Scheme = "https"
u.Host = "nyaa.si"
u.Path = "/"
case "http", "https":
default:
return nil, fmt.Errorf("not impl mapping %s:// to url", u.Scheme)
}
return u, nil
}