Skip to content
This repository has been archived by the owner on Feb 8, 2018. It is now read-only.

backfill remaining exchanges #3912

Closed
wants to merge 63 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
63 commits
Select commit Hold shift + click to select a range
6d0546a
Here's a script for backfilling routes & exchanges
chadwhitacre Feb 6, 2016
5e1d16c
Scope ref uniqueness to network
chadwhitacre Feb 6, 2016
9358207
Drop unique constraint for now
chadwhitacre Feb 11, 2016
6ad00b3
Tweak CSV format
chadwhitacre Feb 11, 2016
d5605b5
Add a matchup script
chadwhitacre Feb 11, 2016
ed4dd57
Log network with backfill
chadwhitacre Feb 11, 2016
a5b8254
Update Stripe match script for payments.csv
chadwhitacre Feb 12, 2016
cd835e5
Clean up a couple things in match-stripe.py
chadwhitacre Feb 16, 2016
5e965f9
Emit rows in original order
chadwhitacre Feb 18, 2016
ab37c09
Ignore the test transaction
chadwhitacre Feb 18, 2016
b122860
Add some debugging aids
chadwhitacre Feb 18, 2016
b1fd83b
Improve match script so we don't exception
chadwhitacre Mar 3, 2016
e9c5581
Stripe's timestamp is only minute granularity
chadwhitacre Mar 3, 2016
dd3ab2f
Smarten up fuzz guessing
chadwhitacre Mar 3, 2016
e2ccc3a
Unbuffer output for easier tailing
chadwhitacre Mar 3, 2016
468a8f5
Fix bug in skipping and use immutable ID as able
chadwhitacre Mar 4, 2016
675df14
Tweak match criteria based on experience
chadwhitacre Mar 4, 2016
30c02be
Tighten up linking based on Customer ID
chadwhitacre Mar 4, 2016
94f1ec6
Blip logging change
chadwhitacre Mar 4, 2016
213f65c
Broaden time horizon more
chadwhitacre Mar 9, 2016
1aed5e9
Filter out Balanced exchanges
chadwhitacre Mar 9, 2016
383a3bd
Map Stripe status to our status
chadwhitacre Mar 9, 2016
27be8cf
Remember matches across months
chadwhitacre Mar 9, 2016
020580f
Output failed exchanges as well as successful ones
chadwhitacre Mar 9, 2016
d468b54
Enable inserting new exchanges (not just updating)
chadwhitacre Mar 9, 2016
64495f4
Start a match-balanced.py script
chadwhitacre Mar 18, 2016
4adfe91
Resolve ambiguity w/ Balanced w/ closest timestamp
chadwhitacre Mar 21, 2016
2fafc3d
Broaden time range to pick up anomalies
chadwhitacre Mar 24, 2016
94b322c
Simplify time range check
chadwhitacre Mar 25, 2016
98e661b
Keep going even if we can't find a match
chadwhitacre Mar 25, 2016
95162c2
Satisfy pyflakes (fix Travis)
chadwhitacre Mar 25, 2016
72ce8e6
Better handle mismatch cases
chadwhitacre Mar 28, 2016
76f1149
Bring over the tally script from #3807
chadwhitacre Mar 30, 2016
3d7d7b1
Wrap db access into a class
chadwhitacre Mar 31, 2016
8922391
Refactor hail mary for a single db call
chadwhitacre Mar 31, 2016
ae3e61f
Start optimizing find; there's a bug!
chadwhitacre Apr 1, 2016
0aeda86
Squish a bug
chadwhitacre Apr 4, 2016
5c5853d
Deprecate hail_mary
chadwhitacre Apr 4, 2016
65f58e2
Start implementing fuzz using in-mem struct
chadwhitacre Apr 4, 2016
92920dc
Fix a bug and try to fix another
chadwhitacre Apr 5, 2016
4100f4e
Add an arg to tally-backfill.py: filename
chadwhitacre Apr 11, 2016
efaf0d3
Enable running *through* a month
chadwhitacre Apr 15, 2016
e6d11f9
Fix a bug ... uncover another?
chadwhitacre Apr 15, 2016
2e98494
Because matches are exchanges, not participants
chadwhitacre Apr 15, 2016
b208646
Prune dead code
chadwhitacre Apr 15, 2016
906bfb8
Fix bug in CLI constraint arg
chadwhitacre Apr 15, 2016
0f319b6
Start moving structs onto Matcher
chadwhitacre Apr 15, 2016
ca680ee
Factor out a little function
chadwhitacre Apr 18, 2016
303daa4
Prune dead code
chadwhitacre Apr 18, 2016
c235c3e
Start simplifying algorithm, to build back up
chadwhitacre Apr 19, 2016
0b2c0b9
Prune more dead code
chadwhitacre Apr 20, 2016
c2cf716
Start inverting loops on Balanced script
chadwhitacre Apr 20, 2016
7094568
Flesh out looping infrastructure
chadwhitacre Apr 20, 2016
49cea72
We have output!
chadwhitacre Apr 20, 2016
2886da2
Seems like our new script is kind of working?
chadwhitacre Apr 21, 2016
958e53c
Show more accurate time remaining
chadwhitacre Apr 21, 2016
4972527
Fix off-by-one bug at the end of the script
chadwhitacre Apr 21, 2016
871a296
Kill the rest of the old code
chadwhitacre Apr 21, 2016
87cbfbb
Rename uncategorized to unmatchable
chadwhitacre Apr 21, 2016
e18f900
Differentiate error cases
chadwhitacre Apr 25, 2016
00bb7e3
Add an option to ctrl-c without dumping
chadwhitacre Apr 25, 2016
be36a26
Match a bunch of failures
chadwhitacre Apr 25, 2016
063a8cf
Start looking at the exchange side
chadwhitacre Apr 28, 2016
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
121 changes: 121 additions & 0 deletions backfill.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
#!/usr/bin/env python2 -u
"""Script for backfilling exchange status, route, and ref.

Symlink a directory of data at `./backfill` and then call like so:

[gratipay] $ run_dammit defaults.env local.env -c env/bin/python backfill.py

Data files should be one per network (named `samurai`, `stripe`, etc), as CSVs
with these columns:

username ignored
user_id required Gratipay participant.id
address optional defaults to 'fake-deadbeef'
exchange_id optional Gratipay exchanges.id; required for status == 'succeeded'
amount optional transaction amount; required if exchange_id is empty
ref optional defaults to 'fake-beeffeed'
status required Gratipay exchanges.status: succeeded, failed, pending

For successfully backfilled exchanges (and routes), the script outputs the same
CSV as was input, with optional fields filled in. The script is idempotent (the
faked address and ref are hashed from other input values).

"""
from __future__ import absolute_import, division, print_function, unicode_literals

import csv
import os
import sha
import sys
from os import path

from gratipay import wireup
from gratipay.models.exchange_route import ExchangeRoute
from gratipay.models.participant import Participant


BASE = path.dirname(__file__)


def fake(*a):
return 'fake-' + sha.new(''.join(map(str, a))).hexdigest()


def _load_stuff(db, user_id, network, address):
participant = Participant.from_id(user_id)
route = ExchangeRoute.from_network(participant, network)
if route is None:
route = ExchangeRoute.insert(participant, network, address)
return participant, route


def link(db, log, network, user_id, address, exchange_id, _, __, ref, status):
participant, route = _load_stuff(db, user_id, network, address)
SQL = "UPDATE exchanges SET status=%s, route=%s, ref=%s WHERE id=%s"
db.run(SQL, (status, route.id, ref, exchange_id))
log(network, participant.username, participant.id, address, exchange_id, _, __, ref, status)


def make(db, log, network, user_id, address, _, timestamp, amount, ref, status):
participant, route = _load_stuff(db, user_id, network, address)

SQL = """\

INSERT INTO exchanges
("timestamp", amount, fee, participant, recorder, note, status, route, ref)
VALUES (%(timestamp)s, %(amount)s, %(fee)s, %(username)s, %(recorder)s, %(note)s,
%(status)s, %(route)s, %(ref)s)
RETURNING id

"""

params = dict( timestamp=timestamp
, amount=amount
, fee=0
, username=participant.username
, recorder='Gratipay'
, note='https://github.com/gratipay/gratipay.com/pull/3912'
, status=status
, route=route.id
, ref=ref
)

exchange_id = db.one(SQL, params)
log(network, participant.username, participant.id, address, exchange_id, timestamp, amount,
ref, status)


def process_row(network, _, user_id, address, exchange_id, timestamp, amount, ref, status):
assert user_id
address = address or fake(network, user_id)
assert status

if status == 'succeeded':
if network in ('cash', 'samurai'):
assert ref == ''
ref = None
else:
assert ref
func = link
elif status == 'failed':
assert ref
func = make
else:
assert 0, locals()

func(db, log, network, user_id, address, exchange_id, timestamp, amount, ref, status)


def main(db, log):
for network in os.listdir('backfill'):
if network.startswith('_'): continue
data = csv.reader(open(path.join('backfill', network)))
for row in data:
process_row(network, *row)


if __name__ == '__main__':
db = wireup.db(wireup.env())
writer = csv.writer(sys.stdout)
log = lambda *a: writer.writerow(a)
main(db, log)
Loading