email-xactions-to-todo/scrape.go

474 lines
14 KiB
Go
Executable File

package main
import (
"bytes"
"errors"
"fmt"
"io/ioutil"
"net/mail"
"regexp"
"slices"
"strconv"
"strings"
)
type scraper interface {
scrape(*mail.Message) ([]*Transaction, error)
}
type fidelityScraper struct{}
type bankOfAmericaScraper struct{}
type chaseScraper struct{}
type citiScraper struct{}
type uccuScraper struct{}
type amexScraper struct{}
func Scrape(m *mail.Message, banks map[Bank]bool) ([]*Transaction, error) {
scraper, err := buildScraper(m, banks)
if err != nil {
return nil, err
}
return scraper.scrape(m)
}
func buildScraper(m *mail.Message, banks map[Bank]bool) (scraper, error) {
subject := fmt.Sprint(m.Header["Subject"])
if !containsAny(subject, "transaction", "report", "Transaction", "payment", "Payment", "Deposit", "Withdrawal", "balance is", "Balance", "Large Purchase") {
return nil, errors.New("cannot build scraper for subject " + subject)
}
from := fmt.Sprint(m.Header["From"])
if strings.Contains(from, "Chase") && banks[Chase] {
return newChaseScraper(), nil
}
if strings.Contains(from, "Fidelity") && banks[Fidelity] {
return newFidelityScraper(), nil
}
if strings.Contains(from, "Bank of America") && banks[BankOfAmerica] {
return newBankOfAmericaScraper(), nil
}
if strings.Contains(from, "Citi") && banks[Citi] {
return newCitiScraper(), nil
}
if strings.Contains(from, "Notifications@uccu.com") && banks[UCCU] {
return newUCCUScraper(), nil
}
if strings.Contains(strings.ToLower(from), strings.ToLower("AmericanExpress")) && banks[Amex] {
return newAmexScraper(), nil
}
return nil, fmt.Errorf("unknown sender: %q", from)
}
func newFidelityScraper() scraper {
return &fidelityScraper{}
}
func newBankOfAmericaScraper() scraper {
return &bankOfAmericaScraper{}
}
func newChaseScraper() scraper {
return &chaseScraper{}
}
func newUCCUScraper() scraper {
return &uccuScraper{}
}
func newCitiScraper() scraper {
return &citiScraper{}
}
func newAmexScraper() scraper {
return &amexScraper{}
}
func containsAny(a string, b ...string) bool {
for i := range b {
if strings.Contains(a, b[i]) {
return true
}
}
return false
}
func (c *chaseScraper) scrape(m *mail.Message) ([]*Transaction, error) {
if transactions, err := c.scrape2025Balance(m); err == nil && len(transactions) > 0 {
return transactions, err
}
if transactions, err := c.scrape2021(m); err == nil && len(transactions) > 0 {
return transactions, err
}
return c.scrape2020(m)
}
func (c *chaseScraper) scrape2025Balance(m *mail.Message) ([]*Transaction, error) {
re := regexp.MustCompile(`^Your.*balance is \$[0-9,\.]*$`)
if !re.Match([]byte(m.Header["Subject"][0])) {
return nil, errors.New("no match subject search")
}
subject := m.Header["Subject"][0]
fields := strings.Fields(subject)
amount := fields[len(fields)-1]
amount = strings.TrimLeft(amount, "$")
amount = strings.ReplaceAll(amount, ",", "")
amount = fmt.Sprintf("=%s", amount)
b, err := ioutil.ReadAll(m.Body)
if err != nil {
return nil, err
}
re = regexp.MustCompile(`\(\.\.\.([0-9]{4})\)`)
submatches := re.FindSubmatch(b)
account := string(submatches[len(submatches)-1])
return []*Transaction{NewTransaction(account, amount, "*", fmt.Sprint(m.Header["Date"]), Chase)}, nil
}
func (c *chaseScraper) scrape2021(m *mail.Message) ([]*Transaction, error) {
if t, err := c.scrape2021Payment(m); err == nil {
return t, err
}
return c.scrape2021Charge(m)
}
func (c *chaseScraper) scrape2021Payment(m *mail.Message) ([]*Transaction, error) {
re := regexp.MustCompile(`^We've received your .* payment$`)
if !re.Match([]byte(m.Header["Subject"][0])) {
return nil, errors.New("no match subject search")
}
b, err := ioutil.ReadAll(m.Body)
if err != nil {
return nil, err
}
re = regexp.MustCompile(`\$[0-9,]+\.[0-9]{2}`)
amount := "-" + strings.TrimLeft(string(re.Find(b)), "$")
amount = strings.TrimLeft(string(re.Find(b)), "$")
amount = strings.ReplaceAll(amount, ",", "")
vendor := "Payment"
re = regexp.MustCompile(`\(\.\.\.[0-9]{4}\)`)
match := re.Find(b)
re = regexp.MustCompile(`[0-9]{4}`)
account := string(re.Find(match))
return []*Transaction{NewTransaction(account, amount, vendor, fmt.Sprint(m.Header["Date"]), Chase)}, nil
}
func (c *chaseScraper) scrape2021Charge(m *mail.Message) ([]*Transaction, error) {
re := regexp.MustCompile(`^Your \$(?P<amount>[,0-9\.]*) transaction with (?P<vendor>.*)$`)
matches := re.FindSubmatch([]byte(m.Header["Subject"][0]))
if len(matches) < 1 {
return nil, errors.New("no match subject search")
}
amount := string(matches[1])
amount = strings.ReplaceAll(amount, ",", "")
vendor := string(matches[2])
b, _ := ioutil.ReadAll(m.Body)
re = regexp.MustCompile(`\(\.\.\.[0-9]{4}\)`)
match := re.Find(b)
re = regexp.MustCompile(`[0-9]{4}`)
account := string(re.Find(match))
return []*Transaction{NewTransaction(account, amount, vendor, fmt.Sprint(m.Header["Date"]), Chase)}, nil
}
func (c *chaseScraper) scrape2020(m *mail.Message) ([]*Transaction, error) {
b, err := ioutil.ReadAll(m.Body)
if err != nil {
return nil, err
}
re := regexp.MustCompile(`A charge of \([^)]*\) (?P<amount>[\d\.]+) at (?P<vendor>.*) has been authorized`)
matches := re.FindSubmatch(b)
if len(matches) < 2 {
return nil, fmt.Errorf("no full matches found")
}
results := make(map[string][]string)
for i, name := range re.SubexpNames() {
if i != 0 && name != "" {
results[name] = append(results[name], string(matches[i]))
}
}
if len(results) != 2 || len(results["amount"]) != len(results["vendor"]) {
return nil, fmt.Errorf("unexpected matches found looking for transactions: %+v", results)
}
re = regexp.MustCompile(`account ending in (?P<account>[0-9]{4})\.`)
match := re.Find(b)
re = regexp.MustCompile(`[0-9]{4}`)
account := string(re.Find(match))
transactions := make([]*Transaction, len(results["amount"]))
for i := range results["amount"] {
transactions[i] = NewTransaction(account, results["amount"][i], results["vendor"][i], fmt.Sprint(m.Header["Date"]), Chase)
}
return transactions, nil
}
func (c *citiScraper) scrape(m *mail.Message) ([]*Transaction, error) {
date := fmt.Sprint(m.Header["Date"])
b, err := ioutil.ReadAll(m.Body)
if err != nil {
return nil, err
}
re := regexp.MustCompile(`Citi Alert: A \$[,0-9][,0-9]*\.[0-9][0-9] transaction was made at .* on card ending in`)
match := re.Find(b)
if len(match) == 0 {
return nil, nil
}
rePrice := regexp.MustCompile(`[0-9][0-9,]*\.[0-9][0-9]`)
price := rePrice.Find(match)
price = []byte(strings.ReplaceAll(string(price), ",", ""))
vendor := bytes.Split(bytes.Split(match, []byte(" on card ending in"))[0], []byte("transaction was made at "))[1]
transaction := NewTransaction(Citi.String(), string(price), string(vendor), date, Citi)
return []*Transaction{transaction}, nil
}
func (c *uccuScraper) scrape(m *mail.Message) ([]*Transaction, error) {
b, err := ioutil.ReadAll(m.Body)
if err != nil {
return nil, err
}
regexp := regexp.MustCompile(`\$([0-9]+,?)+\.[0-9][0-9]`)
match := regexp.Find(b)
if len(match) == 0 {
return nil, fmt.Errorf("no matches found")
}
match = match[1:]
match = bytes.ReplaceAll(match, []byte(","), []byte{})
f, err := strconv.ParseFloat(string(match), 10)
if err != nil {
return nil, err
}
if !bytes.Contains(b, []byte("credit")) {
f *= -1.0
}
transaction := NewTransaction(UCCU.String(), fmt.Sprintf("%.2f", f), "?", fmt.Sprint(m.Header["Date"]), UCCU)
return []*Transaction{transaction}, nil
}
func (c *amexScraper) scrape(m *mail.Message) ([]*Transaction, error) {
b, err := ioutil.ReadAll(m.Body)
if err != nil {
return nil, err
}
b = bytes.ReplaceAll(b, []byte("=\n"), []byte(""))
matches := regexp.MustCompile(`\$([0-9]+,?)+\.[0-9][0-9]`).FindAll(b, -1)
matches = slices.DeleteFunc(matches, func(match []byte) bool {
return string(match) == "$1.00"
})
if len(matches) == 0 {
return nil, fmt.Errorf("no matches found")
}
match := matches[0]
match = match[1:]
match = bytes.ReplaceAll(match, []byte(","), []byte{})
f, err := strconv.ParseFloat(string(match), 10)
if err != nil {
return nil, err
}
f *= -1.0
vendors := regexp.MustCompile(`>[A-Z][A-Z ]*<`).FindAll(b, -1)
vendors = slices.DeleteFunc(vendors, func(b []byte) bool { return string(b) == ">BREE A LAPOINTE<" })
vendor := "*"
if len(vendors) > 0 {
vendor = string(vendors[0])
}
vendor = strings.TrimSpace(strings.Trim(strings.Trim(vendor, ">"), "<"))
accs := regexp.MustCompile(`Account Ending: [0-9]*([0-9]{4})[^0-9]`).FindSubmatch(b)
acc := "?"
if len(accs) > 1 {
acc = string(accs[1])
}
transaction := NewTransaction(
fmt.Sprintf("%s-%s", Amex.String(), acc),
fmt.Sprintf("%.2f", f),
vendor,
fmt.Sprint(m.Header["Date"]),
Amex,
)
return []*Transaction{transaction}, nil
}
func (c *fidelityScraper) scrape(m *mail.Message) ([]*Transaction, error) {
subject := fmt.Sprint(m.Header["Subject"])
if strings.Contains(subject, "Daily Balance") {
return c.scrapeBalance(m)
}
if strings.Contains(subject, "Debit Withdrawal") {
return c.scrapeWithdrawal(m)
}
if strings.Contains(subject, "Deposit Received") {
return c.scrapeDeposit(m)
}
panic(nil)
}
func (c *fidelityScraper) scrapeBalance(m *mail.Message) ([]*Transaction, error) {
b, err := ioutil.ReadAll(m.Body)
if err != nil {
return nil, err
}
fidelAcc, _ := findSubstringBetween(b, "Account: XXXXX", "\n")
fidelAmount, _ := findSubstringBetween(b, "Your Daily Balance is $", " for ")
transaction := NewTransaction(
fmt.Sprintf("%s-%s", Fidelity, fidelAcc),
"="+strings.ReplaceAll(fidelAmount, ",", ""),
"*",
fmt.Sprint(m.Header["Date"]),
Fidelity,
)
return []*Transaction{transaction}, nil
}
func (c *fidelityScraper) scrapeDeposit(m *mail.Message) ([]*Transaction, error) {
b, err := ioutil.ReadAll(m.Body)
if err != nil {
return nil, err
}
fidelAcc, _ := findSubstringBetween(b, "Account: XXXXX", "\n")
transaction := NewTransaction(
fmt.Sprintf("%s-%s", Fidelity, fidelAcc),
"?.??",
"misc",
fmt.Sprint(m.Header["Date"]),
Fidelity,
)
return []*Transaction{transaction}, nil
}
func (c *fidelityScraper) scrapeWithdrawal(m *mail.Message) ([]*Transaction, error) {
b, err := ioutil.ReadAll(m.Body)
if err != nil {
return nil, err
}
amount, amountOk := findSubstringBetween(b, "in the amount of $", " ")
fidelAcc, fidelAccOk := findSubstringBetween(b, "For account ending in ", ":")
acc, accOk := findSubstringBetween(b, "in the amount of $"+amount+" by ", ".")
if amount == "" || acc == "" {
return nil, fmt.Errorf("no amount/account found: fidelAcc=%v,fidelAccOk=%v, acc=%v,accOk=%v, amount=%v,amountOk=%v", fidelAcc, fidelAccOk, acc, accOk, amount, amountOk)
}
transaction := NewTransaction(fmt.Sprintf("%s-%s", Fidelity, fidelAcc), amount, acc, fmt.Sprint(m.Header["Date"]), Fidelity)
return []*Transaction{transaction}, nil
}
func (c *bankOfAmericaScraper) scrape(m *mail.Message) ([]*Transaction, error) {
subject := fmt.Sprint(m.Header["Subject"])
if strings.Contains(subject, "Credit card transaction") {
return c.scrapeCharge(m)
}
if strings.Contains(subject, "Credit Card Payment") {
return c.scrapePayment(m)
}
return nil, errors.New("not impl")
}
func (c *bankOfAmericaScraper) scrapeCharge(m *mail.Message) ([]*Transaction, error) {
b, err := ioutil.ReadAll(m.Body)
if err != nil {
return nil, err
}
if v, err := c.scrapeCharge2023(m, b); err == nil {
return v, nil
}
return c.scrapeCharge2024(m, b)
}
func (c *bankOfAmericaScraper) scrapeCharge2023(m *mail.Message, b []byte) ([]*Transaction, error) {
amount := findFloatAfter(b, "Amount: $")
acc := string(findLineAfter(b, "Where: "))
if amount == "" || acc == "" {
return nil, errors.New("no amount/account found")
}
transaction := NewTransaction(BankOfAmerica.String(), amount, acc, fmt.Sprint(m.Header["Date"]), BankOfAmerica)
return []*Transaction{transaction}, nil
}
func (c *bankOfAmericaScraper) scrapeCharge2024(m *mail.Message, b []byte) ([]*Transaction, error) {
amountPattern := `Amount:[^\$]*\$([0-9]*\.[0-9]*)`
amountMatches := regexp.MustCompile(amountPattern).FindSubmatch(bytes.ReplaceAll(b, []byte("\n"), []byte(" ")))
if len(amountMatches) < 2 {
return nil, fmt.Errorf("email does not match amount %q: %+v", amountPattern, amountMatches)
}
amount := string(amountMatches[1])
b2 := bytes.Split(b, []byte("Where:"))[1]
b3 := bytes.Split(b2, []byte("</tr>"))[0]
b4 := bytes.ReplaceAll(b3, []byte("\n"), []byte(" "))
accPattern := `<b>(.*)</b>`
accMatches := regexp.MustCompile(accPattern).FindSubmatch(b4)
if len(accMatches) < 2 {
return nil, fmt.Errorf("email does not match acc %q", accPattern)
}
acc := string(accMatches[1])
if amount == "" || acc == "" {
return nil, errors.New("no amount/account found")
}
transaction := NewTransaction(BankOfAmerica.String(), amount, acc, fmt.Sprint(m.Header["Date"]), BankOfAmerica)
return []*Transaction{transaction}, nil
}
func (c *bankOfAmericaScraper) scrapePayment(m *mail.Message) ([]*Transaction, error) {
b, err := ioutil.ReadAll(m.Body)
if err != nil {
return nil, err
}
amount := "-" + findFloatAfter(b, "Payment: $")
acc := "Payment"
if amount == "" || acc == "" {
return nil, errors.New("no amount/account found")
}
transaction := NewTransaction(BankOfAmerica.String(), amount, acc, fmt.Sprint(m.Header["Date"]), BankOfAmerica)
return []*Transaction{transaction}, nil
}
func findSubstringBetween(b []byte, prefix, suffix string) (string, bool) {
byPre := bytes.Split(b, []byte(prefix))
if len(byPre) < 2 {
return "", false
}
bySuff := bytes.Split(byPre[1], []byte(suffix))
if len(bySuff) < 2 {
return "", false
}
return string(bySuff[0]), true
}
func findFloatAfter(b []byte, prefix string) string {
amount := string(findLineAfter(b, prefix))
words := strings.Split(amount, " ")
lastword := words[len(words)-1]
escapedfloat := strings.TrimPrefix(lastword, "$")
fixEscape := strings.ReplaceAll(escapedfloat, "=2E", ".")
amount = fixEscape
return amount
}
func findLineAfter(b []byte, prefix string) []byte {
for _, line := range bytes.Split(b, []byte("\n")) {
if bytes.HasPrefix(line, []byte(prefix)) {
return bytes.TrimSpace(bytes.TrimPrefix(line, []byte(prefix)))
}
}
return nil
}