Skip to content

Commit

Permalink
fix: resolve some scraper issues
Browse files Browse the repository at this point in the history
  • Loading branch information
andylolz committed Mar 27, 2024
1 parent 10c9ade commit 835eace
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 1 deletion.
2 changes: 1 addition & 1 deletion orgidfinder/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def parse_org(organisation):

if not org_name.get(default_lang):
if len(org_name) > 1:
print('Unclear which lang should be default')
print('Unclear which lang should be default: {}'.format(org_name))
default_lang = list(org_name.keys())[0]

return {
Expand Down
4 changes: 4 additions & 0 deletions scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,12 +50,16 @@ def zip_discard_compr(*iterables, sentinel=None):
for d in data:
default_lang = d['lang']
default_name = d['name'].get(default_lang)
if not default_name:
continue

text = d['org_id'].lower()
for subtext in set([text[i: j] for i in range(len(text)) for j in range(i + 1, len(text) + 1) if len(text[i:j]) == minlen]):
counter[subtext].add((default_name, d['org_id']))

for lang, name in d['name'].items():
if not name:
continue
text = name.lower()
if lang != default_lang:
name += ' ({})'.format(d['name'][default_lang])
Expand Down

0 comments on commit 835eace

Please sign in to comment.