package feeds import ( "context" "fmt" "io" "net/http" "net/url" "regexp" "slices" "sort" "strings" "time" "github.com/jaytaylor/html2text" "github.com/mmcdole/gofeed" "github.com/robfig/cron/v3" ) var ( ProxyU = func() *url.URL { u, err := url.Parse("socks5://wghttp.inhome.blapointe.com:63114") if err != nil { panic(err) } return u }() ) func (feed Feed) ShouldExecute() (bool, error) { if !feed.Entry.Deleted.IsZero() { return false, nil } schedule, err := cron.NewParser( cron.SecondOptional | cron.Minute | cron.Hour | cron.Dom | cron.Month | cron.Dow | cron.Descriptor, ).Parse(feed.Version.Cron) if err != nil { return false, fmt.Errorf("illegal cron %q", feed.Version.Cron) } next := schedule.Next(feed.Execution.Executed) return time.Now().After(next), nil } func (feed Feed) Fetch(ctx context.Context) (Items, error) { u, err := feed.FetchURL() if err != nil { return nil, err } resp, err := proxyFetch(ctx, u.String()) if err != nil { return nil, err } gfeed, err := gofeed.NewParser().ParseString(resp) if err != nil { return nil, err } sort.Sort(gfeed) matcher := regexp.MustCompile(feed.Version.Pattern) result := make(Items, 0, len(gfeed.Items)) for _, gitem := range gfeed.Items { if gitem.Author == nil { gitem.Author = &gofeed.Person{} } if matches := slices.DeleteFunc(append([]string{ gitem.Title, gitem.Description, gitem.Content, gitem.Link, gitem.Author.Name, gitem.Author.Email, }, gitem.Links...), func(s string) bool { return !matcher.MatchString(s) }); len(matches) == 0 { continue } preview, _ := html2text.FromString(gitem.Description) body, _ := html2text.FromString(gitem.Content) if body == "" { body = preview if len(preview) > 53 { preview = preview[:50] + "..." } } links := slices.DeleteFunc(append([]string{gitem.Link}, gitem.Links...), func(s string) bool { return strings.TrimSpace(s) == "" }) slices.Sort(links) links = slices.Compact(links) var link string if len(links) > 0 { link = links[0] } result = append(result, Item{ Title: gitem.Title, Link: link, Links: links, Preview: preview, Body: body, }) } return result, nil } func proxyFetch(ctx context.Context, u string) (string, error) { req, err := http.NewRequest(http.MethodGet, u, nil) if err != nil { return "", err } c := http.Client{Timeout: time.Minute} if ProxyU != nil { c.Transport = &http.Transport{Proxy: http.ProxyURL(ProxyU)} } resp, err := c.Do(req) if err != nil { return "", err } defer resp.Body.Close() defer io.Copy(io.Discard, resp.Body) b, _ := io.ReadAll(resp.Body) if resp.StatusCode != http.StatusOK { return "", fmt.Errorf("failed fetch %s %s: (%d) %s", req.Method, req.URL.String(), resp.StatusCode, b) } return string(b), nil } func (feed Feed) FetchURL() (*url.URL, error) { return feed.Version.FetchURL() } func (version Version) FetchURL() (*url.URL, error) { u, err := url.Parse(version.URL) if err != nil { return nil, err } switch u.Scheme { case "nyaa": // `nyaa://?q=A B` to `https://nyaa.si/?page=rss&q=A%20B&c=0_0&f=0` q := u.Query() if q.Get("q") == "" { return nil, fmt.Errorf("invalid nyaa:// (%s): no ?q", version.URL) } q.Set("page", "rss") q.Set("c", "0_0") q.Set("f", "0") u.RawQuery = q.Encode() u.Scheme = "https" u.Host = "nyaa.si" u.Path = "/" case "http", "https": default: return nil, fmt.Errorf("not impl mapping %s:// to url", u.Scheme) } return u, nil }