asses add modtime column to skip cksumming for dupe work

main
bel 2025-05-17 20:03:53 -06:00
parent 582e35b237
commit 90887d3f11
3 changed files with 42 additions and 22 deletions

View File

@ -34,8 +34,9 @@ func Record(ctx context.Context) error {
}
type last struct {
T time.Time `json:"checked_at"`
Cksum string `json:"cksum"`
T time.Time `json:"checked_at"`
Cksum string `json:"cksum"`
Modified time.Time `json:"modified"`
}
func checkLast(ctx context.Context, p string) (last, error) {
@ -44,26 +45,26 @@ func checkLast(ctx context.Context, p string) (last, error) {
}
return db.QueryOne[last](ctx, `
SELECT checked_at, cksum
SELECT checked_at, cksum, modified
FROM "asses.checks"
WHERE p=$1
`, p)
}
func checked(ctx context.Context, p, cksum string) error {
func checked(ctx context.Context, p, cksum string, modified time.Time) error {
if err := initDB(ctx); err != nil {
return err
}
return db.Exec(ctx, `
INSERT INTO "asses.checks"
(p, checked_at, cksum)
VALUES ($1, $2, $3)
(p, checked_at, cksum, modified)
VALUES ($1, $2, $3, $4)
ON CONFLICT DO UPDATE
SET checked_at=$2, cksum=$3
SET checked_at=$2, cksum=$3, modified=$4
WHERE p=$1
`, p, time.Now(), cksum)
`, p, time.Now(), cksum, modified)
}
func initDB(ctx context.Context) error {
@ -77,5 +78,6 @@ func initDB(ctx context.Context) error {
checked_at TIMESTAMP NOT NULL,
cksum TEXT NOT NULL
)`,
`ALTER TABLE "asses.checks" ADD COLUMN "modified" TIMESTAMP`,
})
}

View File

@ -2,8 +2,10 @@ package asses
import (
"context"
"math"
"show-rss/src/db"
"testing"
"time"
)
func TestLast(t *testing.T) {
@ -15,9 +17,10 @@ func TestLast(t *testing.T) {
t.Fatal(last)
}
if err := checked(ctx, "p", "cksum"); err != nil {
modtime := time.Now().Add(-5 * time.Minute)
if err := checked(ctx, "p", "cksum", modtime); err != nil {
t.Fatal(err)
} else if err := checked(ctx, "p", "cksum"); err != nil {
} else if err := checked(ctx, "p", "cksum", modtime); err != nil {
t.Fatal(err)
}
@ -25,5 +28,7 @@ func TestLast(t *testing.T) {
t.Fatal(err)
} else if last.T.IsZero() || last.Cksum != "cksum" {
t.Fatal(last)
} else if math.Abs(float64(last.Modified.Sub(modtime))) > float64(time.Second) {
t.Fatalf("modified not uploaded: %v vs. %v (diff of %v)", last.Modified, modtime, last.Modified.Sub(modtime))
}
}

View File

@ -4,6 +4,7 @@ import (
"context"
"crypto/md5"
"encoding/base64"
"fmt"
"io"
"log"
"math/rand"
@ -13,26 +14,34 @@ import (
)
func One(ctx context.Context, p string) error {
shortp := path.Join("...", path.Dir(p), path.Base(p))
shortp := path.Join("...", path.Base(path.Dir(p)), path.Base(p))
last, err := checkLast(ctx, p)
if err != nil {
return err
}
if last.T.IsZero() {
log.Printf("asses.One(%s) // never seen before", shortp)
} else if cksum, err := Cksum(p); err != nil {
return err
} else if cksum != last.Cksum {
log.Printf("asses.One(%s) // cksum changed", shortp)
} else if threshold := 20 + time.Duration(rand.Int()%10)*24*time.Hour; time.Since(last.T) < threshold {
log.Printf("asses.One(%s) // lt %v since last review", shortp, threshold.Hours())
if threshold := 20 + time.Duration(rand.Int()%10)*24*time.Hour; time.Since(last.T) > threshold {
} else if stat, err := os.Stat(p); err != nil {
return fmt.Errorf("cannot stat %s: %w", p, err)
} else if stat.ModTime() == last.Modified {
log.Printf("asses.One(%s) // unmodified since %v", shortp, last.T)
return nil
}
log.Printf("asses.deport(%s)...", shortp)
if err := deport(ctx, p); err != nil {
if err := func() error {
if cksum, err := Cksum(p); err != nil {
return err
} else if cksum == last.Cksum {
log.Printf("asses.One(%s) // cksum unchanged since %v", shortp, last.T)
} else {
log.Printf("asses.deport(%s)...", shortp)
if err := deport(ctx, p); err != nil {
return err
}
}
return nil
}(); err != nil {
return err
}
@ -40,7 +49,11 @@ func One(ctx context.Context, p string) error {
if err != nil {
return err
}
return checked(ctx, p, cksum)
stat, err := os.Stat(p)
if err != nil {
return err
}
return checked(ctx, p, cksum, stat.ModTime())
}
func Cksum(p string) (string, error) {