179 lines
3.5 KiB
Go
179 lines
3.5 KiB
Go
package feeds
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"io"
|
|
"net/http"
|
|
"net/url"
|
|
"regexp"
|
|
"slices"
|
|
"sort"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/jaytaylor/html2text"
|
|
"github.com/mmcdole/gofeed"
|
|
"github.com/robfig/cron/v3"
|
|
)
|
|
|
|
var (
|
|
ProxyU = func() *url.URL {
|
|
u, err := url.Parse("socks5://wghttp.inhome.blapointe.com:63114")
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
return u
|
|
}()
|
|
)
|
|
|
|
func (feed Feed) ShouldExecute() (bool, error) {
|
|
if !feed.Entry.Deleted.IsZero() {
|
|
return false, nil
|
|
}
|
|
|
|
schedule, err := cron.NewParser(
|
|
cron.SecondOptional |
|
|
cron.Minute |
|
|
cron.Hour |
|
|
cron.Dom |
|
|
cron.Month |
|
|
cron.Dow |
|
|
cron.Descriptor,
|
|
).Parse(feed.Version.Cron)
|
|
if err != nil {
|
|
return false, fmt.Errorf("illegal cron %q", feed.Version.Cron)
|
|
}
|
|
next := schedule.Next(feed.Execution.Executed)
|
|
return time.Now().After(next), nil
|
|
}
|
|
|
|
func (feed Feed) Fetch(ctx context.Context) (Items, error) {
|
|
u, err := feed.FetchURL()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
resp, err := proxyFetch(ctx, u.String())
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
gfeed, err := gofeed.NewParser().ParseString(resp)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
sort.Sort(gfeed)
|
|
|
|
matcher := regexp.MustCompile(feed.Version.Pattern)
|
|
|
|
result := make(Items, 0, len(gfeed.Items))
|
|
for _, gitem := range gfeed.Items {
|
|
if gitem.Author == nil {
|
|
gitem.Author = &gofeed.Person{}
|
|
}
|
|
if matches := slices.DeleteFunc(append([]string{
|
|
gitem.Title,
|
|
gitem.Description,
|
|
gitem.Content,
|
|
gitem.Link,
|
|
gitem.Author.Name,
|
|
gitem.Author.Email,
|
|
}, gitem.Links...), func(s string) bool {
|
|
return !matcher.MatchString(s)
|
|
}); len(matches) == 0 {
|
|
continue
|
|
}
|
|
|
|
preview, _ := html2text.FromString(gitem.Description)
|
|
body, _ := html2text.FromString(gitem.Content)
|
|
if body == "" {
|
|
body = preview
|
|
if len(preview) > 53 {
|
|
preview = preview[:50] + "..."
|
|
}
|
|
}
|
|
|
|
links := slices.DeleteFunc(append([]string{gitem.Link}, gitem.Links...), func(s string) bool {
|
|
return strings.TrimSpace(s) == ""
|
|
})
|
|
slices.Sort(links)
|
|
links = slices.Compact(links)
|
|
|
|
var link string
|
|
if len(links) > 0 {
|
|
link = links[0]
|
|
}
|
|
|
|
result = append(result, Item{
|
|
Title: gitem.Title,
|
|
Link: link,
|
|
Links: links,
|
|
Preview: preview,
|
|
Body: body,
|
|
})
|
|
}
|
|
|
|
return result, nil
|
|
}
|
|
|
|
func proxyFetch(ctx context.Context, u string) (string, error) {
|
|
req, err := http.NewRequest(http.MethodGet, u, nil)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
|
|
c := http.Client{Timeout: time.Minute}
|
|
if ProxyU != nil {
|
|
c.Transport = &http.Transport{Proxy: http.ProxyURL(ProxyU)}
|
|
}
|
|
|
|
resp, err := c.Do(req)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
defer resp.Body.Close()
|
|
defer io.Copy(io.Discard, resp.Body)
|
|
|
|
b, _ := io.ReadAll(resp.Body)
|
|
|
|
if resp.StatusCode != http.StatusOK {
|
|
return "", fmt.Errorf("failed fetch %s %s: (%d) %s", req.Method, req.URL.String(), resp.StatusCode, b)
|
|
}
|
|
|
|
return string(b), nil
|
|
}
|
|
|
|
func (feed Feed) FetchURL() (*url.URL, error) {
|
|
return feed.Version.FetchURL()
|
|
}
|
|
|
|
func (version Version) FetchURL() (*url.URL, error) {
|
|
u, err := url.Parse(version.URL)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
switch u.Scheme {
|
|
case "nyaa": // `nyaa://?q=A B` to `https://nyaa.si/?page=rss&q=A%20B&c=0_0&f=0`
|
|
q := u.Query()
|
|
if q.Get("q") == "" {
|
|
return nil, fmt.Errorf("invalid nyaa:// (%s): no ?q", version.URL)
|
|
}
|
|
|
|
q.Set("page", "rss")
|
|
q.Set("c", "0_0")
|
|
q.Set("f", "0")
|
|
|
|
u.RawQuery = q.Encode()
|
|
u.Scheme = "https"
|
|
u.Host = "nyaa.si"
|
|
u.Path = "/"
|
|
case "http", "https":
|
|
default:
|
|
return nil, fmt.Errorf("not impl mapping %s:// to url", u.Scheme)
|
|
}
|
|
|
|
return u, nil
|
|
}
|