email-xactions-to-todo/scrape.go

179 lines
5.0 KiB
Go
Executable File

package main
import (
"bytes"
"errors"
"fmt"
"io/ioutil"
"net/mail"
"regexp"
"strconv"
"strings"
)
type scraper interface {
scrape(*mail.Message) ([]*Transaction, error)
}
type chaseScraper struct{}
type citiScraper struct{}
type uccuScraper struct{}
func Scrape(m *mail.Message) ([]*Transaction, error) {
scraper, err := buildScraper(m)
if err != nil {
return nil, err
}
return scraper.scrape(m)
}
func buildScraper(m *mail.Message) (scraper, error) {
subject := fmt.Sprint(m.Header["Subject"])
if !containsAny(subject, "transaction", "report", "Transaction") {
return nil, errors.New("cannot build scraper for subject " + subject)
}
from := fmt.Sprint(m.Header["From"])
if strings.Contains(from, "Chase") {
return newChaseScraper(), nil
}
if strings.Contains(from, "Citi") {
return newCitiScraper(), nil
}
if strings.Contains(from, "Notifications@uccu.com") {
return newUCCUScraper(), nil
}
return nil, errors.New("unknown sender: " + from)
}
func newChaseScraper() scraper {
return &chaseScraper{}
}
func newUCCUScraper() scraper {
return &uccuScraper{}
}
func newCitiScraper() scraper {
return &citiScraper{}
}
func containsAny(a string, b ...string) bool {
for i := range b {
if strings.Contains(a, b[i]) {
return true
}
}
return false
}
func (c *chaseScraper) scrape(m *mail.Message) ([]*Transaction, error) {
b, err := ioutil.ReadAll(m.Body)
if err != nil {
return nil, err
}
regexp := regexp.MustCompile(`A charge of \([^)]*\) (?P<amount>[\d\.]+) at (?P<account>.*) has been authorized`)
matches := regexp.FindSubmatch(b)
if len(matches) < 2 {
return nil, fmt.Errorf("no full matches found")
}
results := make(map[string][]string)
for i, name := range regexp.SubexpNames() {
if i != 0 && name != "" {
results[name] = append(results[name], string(matches[i]))
}
}
if len(results) != 2 || len(results["amount"]) != len(results["account"]) {
return nil, fmt.Errorf("unexpected matches found looking for transactions: %+v", results)
}
transactions := make([]*Transaction, len(results["amount"]))
for i := range results["amount"] {
transactions[i] = NewTransaction(results["amount"][i], results["account"][i], fmt.Sprint(m.Header["Date"]), Chase)
}
return transactions, nil
}
func (c *citiScraper) scrape(m *mail.Message) ([]*Transaction, error) {
date := fmt.Sprint(m.Header["Date"])
b, err := ioutil.ReadAll(m.Body)
if err != nil {
return nil, err
}
re := regexp.MustCompile(`Citi Alert: A \$[0-9][0-9]*\.[0-9][0-9] transaction was made at .* on card ending in`)
match := re.Find(b)
if len(match) == 0 {
return nil, nil
}
rePrice := regexp.MustCompile(`[0-9][0-9]*\.[0-9][0-9]`)
price := rePrice.Find(match)
vendor := bytes.Split(bytes.Split(match, []byte(" on card ending in"))[0], []byte("transaction was made at "))[1]
transaction := NewTransaction(string(price), string(vendor), date, Citi)
return []*Transaction{transaction}, nil
//Citi Alert: A $598.14 transaction was made at REMIX MUSIC SPRINGDA on card ending in 3837
/*
b, err := ioutil.ReadAll(m.Body)
if err != nil {
return nil, err
}
targetLineRegexp := regexp.MustCompile(`Account #: XXXX[0-9]{4} .*`)
targetMatches := targetLineRegexp.FindAll(b, -1)
if len(targetMatches) == 0 {
return nil, errors.New("no lines with transactions found")
}
results := make(map[string][]string)
for _, b := range targetMatches {
// Account #: XXXX3837 $137.87 at AMZN Mktp US Amzn.com/bill WA on 04/03/2020, 09:05 PM ET
regexp := regexp.MustCompile(`Account #: XXXX[0-9]{4} \$(?P<amount>[0-9]+\.[0-9]*) at (?P<account>[^,]*)`)
matches := regexp.FindSubmatch(b)
if len(matches) < 2 {
return nil, fmt.Errorf("no full matches found: %s", b)
}
for i, name := range regexp.SubexpNames() {
if i != 0 && name != "" {
if name == "account" {
matches[i] = bytes.Split(matches[i], []byte(" on "))[0]
}
results[name] = append(results[name], string(matches[i]))
}
}
if len(results) != 2 || len(results["amount"]) != len(results["account"]) {
return nil, fmt.Errorf("unexpected matches found looking for transactions: %+v", results)
}
}
transactions := make([]*Transaction, len(results["amount"]))
for i := range results["amount"] {
transactions[i] = NewTransaction(results["amount"][i], results["account"][i], fmt.Sprint(m.Header["Date"]), Citi)
}
return transactions, nil
*/
}
func (c *uccuScraper) scrape(m *mail.Message) ([]*Transaction, error) {
b, err := ioutil.ReadAll(m.Body)
if err != nil {
return nil, err
}
regexp := regexp.MustCompile(`\$([0-9]+,?)+\.[0-9][0-9]`)
match := regexp.Find(b)
if len(match) == 0 {
return nil, fmt.Errorf("no matches found")
}
match = match[1:]
match = bytes.ReplaceAll(match, []byte(","), []byte{})
f, err := strconv.ParseFloat(string(match), 10)
if err != nil {
return nil, err
}
if !bytes.Contains(b, []byte("credit")) {
f *= -1.0
}
transaction := NewTransaction(fmt.Sprintf("%.2f", f), "?", fmt.Sprint(m.Header["Date"]), UCCU)
return []*Transaction{transaction}, nil
}