asses add modtime column to skip cksumming for dupe work

main
bel 2025-05-17 20:03:53 -06:00
parent 582e35b237
commit 90887d3f11
3 changed files with 42 additions and 22 deletions

View File

@ -34,8 +34,9 @@ func Record(ctx context.Context) error {
} }
type last struct { type last struct {
T time.Time `json:"checked_at"` T time.Time `json:"checked_at"`
Cksum string `json:"cksum"` Cksum string `json:"cksum"`
Modified time.Time `json:"modified"`
} }
func checkLast(ctx context.Context, p string) (last, error) { func checkLast(ctx context.Context, p string) (last, error) {
@ -44,26 +45,26 @@ func checkLast(ctx context.Context, p string) (last, error) {
} }
return db.QueryOne[last](ctx, ` return db.QueryOne[last](ctx, `
SELECT checked_at, cksum SELECT checked_at, cksum, modified
FROM "asses.checks" FROM "asses.checks"
WHERE p=$1 WHERE p=$1
`, p) `, p)
} }
func checked(ctx context.Context, p, cksum string) error { func checked(ctx context.Context, p, cksum string, modified time.Time) error {
if err := initDB(ctx); err != nil { if err := initDB(ctx); err != nil {
return err return err
} }
return db.Exec(ctx, ` return db.Exec(ctx, `
INSERT INTO "asses.checks" INSERT INTO "asses.checks"
(p, checked_at, cksum) (p, checked_at, cksum, modified)
VALUES ($1, $2, $3) VALUES ($1, $2, $3, $4)
ON CONFLICT DO UPDATE ON CONFLICT DO UPDATE
SET checked_at=$2, cksum=$3 SET checked_at=$2, cksum=$3, modified=$4
WHERE p=$1 WHERE p=$1
`, p, time.Now(), cksum) `, p, time.Now(), cksum, modified)
} }
func initDB(ctx context.Context) error { func initDB(ctx context.Context) error {
@ -77,5 +78,6 @@ func initDB(ctx context.Context) error {
checked_at TIMESTAMP NOT NULL, checked_at TIMESTAMP NOT NULL,
cksum TEXT NOT NULL cksum TEXT NOT NULL
)`, )`,
`ALTER TABLE "asses.checks" ADD COLUMN "modified" TIMESTAMP`,
}) })
} }

View File

@ -2,8 +2,10 @@ package asses
import ( import (
"context" "context"
"math"
"show-rss/src/db" "show-rss/src/db"
"testing" "testing"
"time"
) )
func TestLast(t *testing.T) { func TestLast(t *testing.T) {
@ -15,9 +17,10 @@ func TestLast(t *testing.T) {
t.Fatal(last) t.Fatal(last)
} }
if err := checked(ctx, "p", "cksum"); err != nil { modtime := time.Now().Add(-5 * time.Minute)
if err := checked(ctx, "p", "cksum", modtime); err != nil {
t.Fatal(err) t.Fatal(err)
} else if err := checked(ctx, "p", "cksum"); err != nil { } else if err := checked(ctx, "p", "cksum", modtime); err != nil {
t.Fatal(err) t.Fatal(err)
} }
@ -25,5 +28,7 @@ func TestLast(t *testing.T) {
t.Fatal(err) t.Fatal(err)
} else if last.T.IsZero() || last.Cksum != "cksum" { } else if last.T.IsZero() || last.Cksum != "cksum" {
t.Fatal(last) t.Fatal(last)
} else if math.Abs(float64(last.Modified.Sub(modtime))) > float64(time.Second) {
t.Fatalf("modified not uploaded: %v vs. %v (diff of %v)", last.Modified, modtime, last.Modified.Sub(modtime))
} }
} }

View File

@ -4,6 +4,7 @@ import (
"context" "context"
"crypto/md5" "crypto/md5"
"encoding/base64" "encoding/base64"
"fmt"
"io" "io"
"log" "log"
"math/rand" "math/rand"
@ -13,26 +14,34 @@ import (
) )
func One(ctx context.Context, p string) error { func One(ctx context.Context, p string) error {
shortp := path.Join("...", path.Dir(p), path.Base(p)) shortp := path.Join("...", path.Base(path.Dir(p)), path.Base(p))
last, err := checkLast(ctx, p) last, err := checkLast(ctx, p)
if err != nil { if err != nil {
return err return err
} }
if last.T.IsZero() { if threshold := 20 + time.Duration(rand.Int()%10)*24*time.Hour; time.Since(last.T) > threshold {
log.Printf("asses.One(%s) // never seen before", shortp) } else if stat, err := os.Stat(p); err != nil {
} else if cksum, err := Cksum(p); err != nil { return fmt.Errorf("cannot stat %s: %w", p, err)
return err } else if stat.ModTime() == last.Modified {
} else if cksum != last.Cksum { log.Printf("asses.One(%s) // unmodified since %v", shortp, last.T)
log.Printf("asses.One(%s) // cksum changed", shortp)
} else if threshold := 20 + time.Duration(rand.Int()%10)*24*time.Hour; time.Since(last.T) < threshold {
log.Printf("asses.One(%s) // lt %v since last review", shortp, threshold.Hours())
return nil return nil
} }
log.Printf("asses.deport(%s)...", shortp) if err := func() error {
if err := deport(ctx, p); err != nil { if cksum, err := Cksum(p); err != nil {
return err
} else if cksum == last.Cksum {
log.Printf("asses.One(%s) // cksum unchanged since %v", shortp, last.T)
} else {
log.Printf("asses.deport(%s)...", shortp)
if err := deport(ctx, p); err != nil {
return err
}
}
return nil
}(); err != nil {
return err return err
} }
@ -40,7 +49,11 @@ func One(ctx context.Context, p string) error {
if err != nil { if err != nil {
return err return err
} }
return checked(ctx, p, cksum) stat, err := os.Stat(p)
if err != nil {
return err
}
return checked(ctx, p, cksum, stat.ModTime())
} }
func Cksum(p string) (string, error) { func Cksum(p string) (string, error) {