Skip to content

Commit

Permalink
refactor: move TSV stuff to its own file
Browse files Browse the repository at this point in the history
  • Loading branch information
andylolz committed May 14, 2024
1 parent 42ff830 commit 2f6f825
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 36 deletions.
36 changes: 2 additions & 34 deletions x_notes/helpers.py
Original file line number Diff line number Diff line change
@@ -1,45 +1,13 @@
import csv
import json
from collections import defaultdict
from datetime import date, datetime, timedelta, timezone
from io import StringIO
from typing import Any, Generator

import requests
from datetime import datetime, timezone
from typing import Any


def to_isoformat(ms_since_epoch: str) -> str:
return datetime.fromtimestamp(int(ms_since_epoch[:-3]), timezone.utc).isoformat()


def get_data(date: date, fname: str) -> Generator:
url_tmpl = f"https://ton.twimg.com/birdwatch-public-data/{{date}}/{fname}/{fname}-00000.tsv"
url = url_tmpl.format(date=date.strftime("%Y/%m/%d"))
r = requests.get(url, stream=True)
r.raise_for_status()

def _data_generator() -> Generator:
headers = None
for line in r.iter_lines():
cols = next(csv.reader(StringIO(line.decode()), delimiter="\t"))
if not headers:
headers = cols
continue
yield dict(zip(headers, cols))

return _data_generator()


def get_generator(fname: str) -> Generator:
today = date.today()
try:
return get_data(today, fname)
except Exception:
pass
yesterday = today - timedelta(days=1)
return get_data(yesterday, fname)


def load_notes() -> dict[str, dict[str, Any]]:
try:
with open("output/data/notes.json") as fh:
Expand Down
3 changes: 2 additions & 1 deletion x_notes/notes.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
from datetime import datetime, timedelta, timezone
from typing import Any

from .helpers import get_generator, to_isoformat
from .helpers import to_isoformat
from .tsv import get_generator

url_re = re.compile(r"(https?://[^\s]+)")
one_week_ago = (datetime.now(timezone.utc) - timedelta(days=7)).timestamp()
Expand Down
3 changes: 2 additions & 1 deletion x_notes/statuses.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from typing import Any

from .helpers import get_generator, to_isoformat
from .helpers import to_isoformat
from .tsv import get_generator

helpful = "CURRENTLY_RATED_HELPFUL"
unhelpful = "CURRENTLY_RATED_NOT_HELPFUL"
Expand Down
34 changes: 34 additions & 0 deletions x_notes/tsv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import csv
from datetime import date, timedelta
from io import StringIO
from typing import Generator

import requests


def get_data(date: date, fname: str) -> Generator:
url_tmpl = f"https://ton.twimg.com/birdwatch-public-data/{{date}}/{fname}/{fname}-00000.tsv"
url = url_tmpl.format(date=date.strftime("%Y/%m/%d"))
r = requests.get(url, stream=True)
r.raise_for_status()

def _data_generator() -> Generator:
headers = None
for line in r.iter_lines():
cols = next(csv.reader(StringIO(line.decode()), delimiter="\t"))
if not headers:
headers = cols
continue
yield dict(zip(headers, cols))

return _data_generator()


def get_generator(fname: str) -> Generator:
today = date.today()
try:
return get_data(today, fname)
except Exception:
pass
yesterday = today - timedelta(days=1)
return get_data(yesterday, fname)

0 comments on commit 2f6f825

Please sign in to comment.