Skip to content

Commit

Permalink
[WIP] Create Validation step (#10)
Browse files Browse the repository at this point in the history
* Add validation code and test case

* Update .gitignore
  • Loading branch information
webb-ben committed Jul 1, 2024
1 parent bf956d8 commit 7f87d8b
Show file tree
Hide file tree
Showing 7 changed files with 2,524 additions and 16 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -133,3 +133,5 @@ dmypy.json
*.pyc
yourls.sql.gz
/namespaces/
bin/
pyvenv.cfg
40 changes: 40 additions & 0 deletions tests/data/namespaces/iow/links__0.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@

<?xml version='1.0' encoding='utf-8'?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
<url>
<loc>https://geoconnex.us/iow/homepage</loc>
<lastmod>2023-09-01T05:34:10Z</lastmod>
</url><url>
<loc>https://geoconnex.us/iow/aboutus</loc>
<lastmod>2023-09-01T05:34:10Z</lastmod>
</url><url>
<loc>https://geoconnex.us/demo</loc>
<lastmod>2023-09-01T05:34:10Z</lastmod>
</url><url>
<loc>https://geoconnex.us/iow/nldi</loc>
<lastmod>2023-09-01T05:34:10Z</lastmod>
</url><url>
<loc>https://geoconnex.us/iow/nldi/demo</loc>
<lastmod>2023-09-01T05:34:10Z</lastmod>
</url><url>
<loc>https://geoconnex.us/iow/map</loc>
<lastmod>2023-09-01T05:34:10Z</lastmod>
</url><url>
<loc>https://geoconnex.us/iow/cr-demo</loc>
<lastmod>2023-09-01T05:34:10Z</lastmod>
</url><url>
<loc>https://geoconnex.us/iow/tracker</loc>
<lastmod>2023-09-01T05:34:10Z</lastmod>
</url><url>
<loc>https://geoconnex.us/iow/hyriver</loc>
<lastmod>2023-09-01T05:34:10Z</lastmod>
</url><url>
<loc>https://geoconnex.us/iow/principles</loc>
<lastmod>2023-09-01T05:34:10Z</lastmod>
</url><url>
<loc>https://geoconnex.us/iow/api-mockups</loc>
<lastmod>2023-09-01T05:34:10Z</lastmod>
</url><url>
<loc>https://geoconnex.us/iow/guidance</loc>
<lastmod>2023-09-01T05:34:10Z</lastmod>
</url></ns0:urlset>
3 changes: 3 additions & 0 deletions tests/data/namespaces/iow/test.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
id, target, creator, description
https://geoconnexus/iow/homepage, https://internetofwater.org, [email protected], Internet Of Water homepage
https://geoconnex.us//usgs/monitoring-location/([a-zA-Z0-9_]+).*$, https://waterdata.usgs.gov/monitoring-location/$1, [email protected], USGS Monitoring Locations
2,401 changes: 2,401 additions & 0 deletions tests/data/namespaces/ref/hu08/hu08.csv

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion yourls-action/yourls_action/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@

import click

from yourls_action.cli import run
from yourls_action.cli import run, validate


@click.group()
Expand All @@ -42,3 +42,4 @@ def cli():


cli.add_command(run)
cli.add_command(validate)
71 changes: 56 additions & 15 deletions yourls-action/yourls_action/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,24 +55,33 @@ def url_join(*parts):

class yourls:
# https://stackoverflow.com/questions/60286623/python-loses-connection-to-mysql-database-after-about-a-day
mysql.connector.connect(
host=os.environ.get('YOURLS_DB_HOST') or 'mysql',
user=os.environ.get('YOURLS_DB_USER') or 'root',
password=os.environ.get('YOURLS_DB_PASSWORD') or 'arootpassword',
database="yourls",
pool_name="yourls_loader",
pool_size=3
)
try:
mysql.connector.connect(
host=os.environ.get('YOURLS_DB_HOST') or 'mysql',
user=os.environ.get('YOURLS_DB_USER') or 'root',
password=os.environ.get('YOURLS_DB_PASSWORD') or 'arootpassword',
database="yourls",
pool_name="yourls_loader",
pool_size=3
)
connection = True
except mysql.connector.errors.DatabaseError as err:
connection = False
if all(os.environ.get(var) for var in
['YOURLS_DB_HOST', 'YOURLS_DB_USER', 'YOURLS_DB_PASSWORD']):
print(f'No SQL connection found: {err}')

def __init__(self, **kwargs):
self.kwargs = kwargs
mydb, cursor = connection()
sql_statement = 'DELETE FROM yourls_url WHERE ip = "0.0.0.0"'
cursor.execute(sql_statement)
mydb.commit()
print(cursor.rowcount, "was deleted.")
cursor.close()
mydb.close()
self.history = set()
if self.connection:
mydb, cursor = connection()
sql_statement = 'DELETE FROM yourls_url WHERE ip = "0.0.0.0"'
cursor.execute(sql_statement)
mydb.commit()
print(cursor.rowcount, "was deleted.")
cursor.close()
mydb.close()

def _check_kwargs(self, keys):
"""
Expand Down Expand Up @@ -163,6 +172,38 @@ def handle_csv(self, file):
for chunk in chunky_parsed:
self.post_mysql(file, chunk)

def _validate_csvs(self, files):
"""
Splits list of csv files into individual csv files.
:param files: required, string. URL to be shortened.
"""
for f in files:
self.validate_csv(f)

def validate_csv(self, file):
"""
Parses and validates CSV file.
:param file: required, name of csv to be shortened
"""
if isinstance(file, list):
self._validate_csvs(file)
return

parsed_csv = self.parse_csv(file)

chunky_parsed = self.chunkify(parsed_csv, 1)
uri_stem = self.kwargs['uri_stem']
for _ in chunky_parsed:
chunk = _.strip().split(',')
[pid_, target_] = chunk[:2]
if pid_ in self.history:
print(f'Duplicate IRI detected at {uri_stem}{pid_}')
exit(1)
else:
self.history.add(pid_)

def parse_csv(self, filename):
"""
Parse CSV file into yourls-friendly csv.
Expand Down
20 changes: 20 additions & 0 deletions yourls-action/yourls_action/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,5 +73,25 @@ def run(ctx, **kwargs):
urls.handle_csv(walk_path(p))


@click.command()
@click.pass_context
@click.argument('path', type=str, nargs=-1)
@click.option('-s', '--uri_stem', type=str, default='https://geoconnex.us/',
help='uri stem to be removed from short url for keyword')
@click.option('-k', '--keyword', type=str, default='id',
help='field in CSV to be used as keyword')
@click.option('-l', '--long_url', type=str, default='target',
help='field in CSV to be used as long url')
@click.option('-t', '--title', type=str, default='description',
help='field in CSV to be used as title')
def validate(ctx, **kwargs):
urls = yourls(**kwargs)
for p in kwargs['path']:
if p.endswith(CSV):
urls.validate_csv(p)
else:
urls.validate_csv(walk_path(p))


if __name__ == "__main__":
run()

0 comments on commit 7f87d8b

Please sign in to comment.