Skip to content

Commit

Permalink
fix: update how to retrieve HUDOC cases (#196)
Browse files Browse the repository at this point in the history
* fix: update how to retrieve HUDOC cases

* fix: determine current year automatically

* fix: deactivate test workflow and test related to max_documents

* fix: deactivate test workflow and test related to max_documents

* fix: deactivate test workflow and test related to max_documents

* fix: remove the repondentOrderEng field

* tests: disable failing test due to new HUDOC API

* fix: remove trailing whitespace
  • Loading branch information
aquemy authored Nov 12, 2023
1 parent 61a4c3b commit d41e762
Show file tree
Hide file tree
Showing 7 changed files with 39 additions and 185 deletions.
10 changes: 5 additions & 5 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ jobs:
uses: ./.github/actions/container_run
with:
cont_action: 'test'
- name: Build test dataset
uses: ./.github/actions/container_run
with:
cont_action: 'build'
action_params: '--max_documents 200 --build ./build/test --workflow local'
#- name: Build test dataset
# uses: ./.github/actions/container_run
# with:
# cont_action: 'build'
# action_params: '--max_documents 200 --build ./build/test --workflow local'
98 changes: 20 additions & 78 deletions echr/steps/cases_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,14 @@
import time
import urllib3
from concurrent.futures import ThreadPoolExecutor
import datetime

from echr.utils.logger import getlogger
from echr.utils.cli import StatusColumn, TAB
from echr.utils.cli import TAB
from echr.utils.folders import make_build_folder
from rich.markdown import Markdown
from rich.console import Console
from rich.progress import (
TextColumn,
Progress,
BarColumn,
TimeRemainingColumn,
Expand Down Expand Up @@ -62,46 +62,13 @@
"separateopinion",
"scl"
]
BASE_URL = "http://hudoc.echr.coe.int/app/query/results" \
"?query=((((((((((((((((((((%20contentsitename%3AECHR%20AND%20(NOT%20(doctype%3DPR%20OR%20" \
"doctype%3DHFCOMOLD%20OR%20doctype%3DHECOMOLD)))%20XRANK(cb%3D14)%20doctypebranch%3AGRANDCHAMBER" \
")%20XRANK(cb%3D13)%20doctypebranch%3ADECGRANDCHAMBER)%20XRANK(cb%3D12)%20doctypebranch%3ACHAMBER)" \
"%20XRANK(cb%3D11)%20doctypebranch%3AADMISSIBILITY)%20XRANK(cb%3D10)%20doctypebranch%3ACOMMITTEE)" \
"%20XRANK(cb%3D9)%20doctypebranch%3AADMISSIBILITYCOM)%20XRANK(cb%3D8)%20doctypebranch%3ADECCOMMISSION)" \
"%20XRANK(cb%3D7)%20doctypebranch%3ACOMMUNICATEDCASES)%20XRANK(cb%3D6)%20doctypebranch%3ACLIN)%20" \
"XRANK(cb%3D5)%20doctypebranch%3AADVISORYOPINIONS)%20XRANK(cb%3D4)%20doctypebranch%3AREPORTS)%20" \
"XRANK(cb%3D3)%20doctypebranch%3AEXECUTION)%20XRANK(cb%3D2)%20doctypebranch%3AMERITS)%20XRANK(cb%3D1)" \
"%20doctypebranch%3ASCREENINGPANEL)%20XRANK(cb%3D4)%20importance%3A1)%20XRANK(cb%3D3)%20importance%3A2)" \
"%20XRANK(cb%3D2)%20importance%3A3)%20XRANK(cb%3D1)%20importance%3A4)%20XRANK(cb%3D2)%20" \
"languageisocode%3AENG)%20XRANK(cb%3D1)%20languageisocode%3AFRE" \
"&select={}&sort=&rankingModelId=4180000c-8692-45ca-ad63-74bc4163871b".format(','.join(fields))
LENGTH = 500 # maximum number of items per request


def determine_max_documents(base_url, default_value):
"""
Automatically determine the number of available documents in HUDOC

:param default_value: fallback value
:type default_value: [int]
"""
url = base_url + "&start={}&length={}".format(0, 1)
for i in range(MAX_RETRY):
try:
r = requests.get(url)
if not r.ok:
print('\t({}/{}) Failed to fetch max document numbers'.format(i + 1, MAX_RETRY))
continue
else:
output = json.loads(r.content)
return 0, int(output['resultcount'])
except Exception as e:
__console.print_exception()
log.error(e)
print(TAB + '({}/{}) Failed to fetch max document numbers'.format(i + 1, MAX_RETRY))
print(TAB + "[bold yellow]:warning: Fallback to the default number of cases: {}".format(default_value))
max_documents = default_value
return 1, max_documents
BASE_URL = 'https://hudoc.echr.coe.int/app/query/results?query=contentsitename:ECHR' \
' AND (NOT (doctype=PR OR doctype=HFCOMOLD OR doctype=HECOMOLD)) AND ((languageisocode="ENG"))' \
' AND (kpdate>="YEAR-01-01T00:00:00.0Z" AND kpdate<="YEAR_1-01-01T00:00:00.0Z")' \
' AND ((organisations:"ECHR"))&select={}&sort=&start=0&length=10000&rankingModelId=11111111-0000-0000-0000-000000000000'.format(','.join(fields))
LENGTH = 10_000 # maximum number of items per request
YEARS = range(1959, datetime.date.today().year+1)


def get_case_info(console, base_url, max_documents, path):
Expand All @@ -115,16 +82,12 @@ def get_case_info(console, base_url, max_documents, path):
:param: path: path to store the information
:type: str
"""
length = min(LENGTH, max_documents)
if length <= 0:
return 2

def get_cases_info_step(start, length, progress, task):
def get_cases_info_step(year, progress, task):
error = ""
file_path = os.path.join(path, "{}.json".format(start))
file_path = os.path.join(path, "{}.json".format(year))
failed_to_get_some_cases = False
with open(file_path, 'wb') as f:
url = base_url + "&start=%d&length=%d" % (start, length)
url = base_url.replace('YEAR_1', str(year+1)).replace('YEAR', str(year))
for i in range(MAX_RETRY):
error = ""
try:
Expand All @@ -140,31 +103,31 @@ def get_cases_info_step(start, length, progress, task):
except OSError:
pass
__console.print_exception()
log.error('({}/{}) Failed to fetch information {} to {}'.format(
i + 1, MAX_RETRY, start, start + length))
error = '\n| ({}/{}) Failed to fetch information {} to {}'.format(
i + 1, MAX_RETRY, start, start + length)
log.error('({}/{}) Failed to fetch information for year {}'.format(
i + 1, MAX_RETRY, year))
error = '\n| ({}/{}) Failed to fetch information for year {}'.format(
i + 1, MAX_RETRY, year)
time.sleep(0.001)
if error:
progress.update(task, advance=0, error=error)
else:
failed_to_get_some_cases = True
progress.update(task, advance=length, to_be_completed=start + 2 * length)
progress.update(task, advance=1, to_be_completed=len(YEARS))
return failed_to_get_some_cases

with Progress(
TAB + "> Downloading... [IN PROGRESS]\n",
BarColumn(30),
TimeRemainingColumn(),
"| ({task.completed}/{task.total}) Fetching information from cases {task.completed} to {task.fields[to_be_completed]}"
"| ({task.completed}/{task.total}) Fetching cases information for year {task.completed}"
"{task.fields[error]}",
transient=True,
console=console
) as progress:
task = progress.add_task("Downloading...", total=max_documents, to_be_completed=length, error="")
f = lambda x: get_cases_info_step(x, length, progress, task)
task = progress.add_task("Downloading...", total=len(YEARS), to_be_completed=len(YEARS), error="")
f = lambda x: get_cases_info_step(x, progress, task)
with ThreadPoolExecutor(16) as executor:
results = list(executor.map(f, range(0, max_documents, length)))
results = list(executor.map(f, YEARS))
failed_to_get_some_cases = all(results)
if failed_to_get_some_cases:
print(TAB + '> Downloading... [yellow][WARNING]')
Expand Down Expand Up @@ -198,29 +161,8 @@ def run(console, build, title, doc_ids=None, max_documents=-1, force=False):
make_build_folder(console, output_folder, force, strict=False)

print(Markdown("- **Determining the number cases**"))

if doc_ids:
_, max_documents = determine_max_documents(BASE_URL, 144579)
print(TAB + "> Doc ids given")

else:
if max_documents == -1:
print(TAB + "> The total number of documents is not provided")
with Progress(
TextColumn(TAB + "> Querying HUDOC...", justify="right"),
StatusColumn({
None: '[IN PROGRESS]',
0: '[green] [DONE]',
1: '[red] [FAILED]'
}),
transient=True,
console=console
) as progress:
task = progress.add_task("Get total number of documents")
while not progress.finished:
rc, max_documents = determine_max_documents(BASE_URL, 144579) # v1.0.0 value
progress.update(task, rc=rc)
print(TAB + "> The total number of documents to retrieve: {}".format(max_documents))
print(Markdown("- **Get case information from HUDOC**"))
get_case_info(console, BASE_URL, max_documents, output_folder)

Expand Down
8 changes: 4 additions & 4 deletions echr/steps/filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,12 +332,12 @@ def format_cases(console, cases):
cases[i]['originatingbody_type'] = ORIGINATING_BODY[cases[i]['originatingbody']]['type']
cases[i]['originatingbody_name'] = ORIGINATING_BODY[cases[i]['originatingbody']]['name']

cases[i]["rank"] = cases[i]['Rank']
del cases[i]["Rank"]
#cases[i]["rank"] = cases[i]['Rank']
#del cases[i]["Rank"]

del cases[i]["isplaceholder"]
cases[i]["kpdate"] = cases[i]['kpdateAsText']
del cases[i]['kpdateAsText']
#cases[i]["kpdate"] = cases[i]['kpdateAsText']
#del cases[i]['kpdateAsText']
del cases[i]["documentcollectionid2"]
cases[i]["kpthesaurus"] = cases[i]["kpthesaurus"].split(';')
cases[i]["scl"] = cases[i]["scl"].split(';') if cases[i]["scl"].strip() else []
Expand Down
4 changes: 3 additions & 1 deletion echr/steps/generate_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ def run(console, build, title, doc_ids=None, articles=[], processed_folder='all'
"originatingbody_type",
"originatingbody_name",
"respondent",
"respondentOrderEng",
#"respondentOrderEng",
"separateopinion",
"typedescription"

Expand All @@ -207,6 +207,8 @@ def run(console, build, title, doc_ids=None, articles=[], processed_folder='all'
keys_list = ["article", "documentcollectionid", "externalsources", "extractedappno", "kpthesaurus", "parties",
"scl", "representedby"]



feature_index = {k: i for i, k in enumerate(keys + keys_list)}
feature_to_value = dict(zip(keys + keys_list, [None] * (len(keys) + len(keys_list))))
for c in cases:
Expand Down
12 changes: 6 additions & 6 deletions tests/data/test_filter_samples.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,7 @@
u'extractedappno': u'8675/15;8697/15;16483/12;47/15;646/16;444/17;391/16;77/17;78/17;2011/95;27765/09;60125/11;16643/09;30880/10;56437/07;25424/05;41744/10;41805/10;43611/11;41738/10;23531/94;39630/09;20261/12;52207/99;24833/94;48787/99;36925/07;71503/01;30696/09;55721/07;13255/07;18670/03;2344/02;2005/85;2013/32;37201/06;26565/05;44774/98;50541/08;5809/08;25781/94;59793/17;17056/06;42750/09;25424/09;59166/12;25389/05;12552/12;47287/15;45917/99;16387/03;45223/05;45302/05;41872/10;17502/07',
u'typedescription': u'15', u'article': u'1;13;13+P4-4;34;35;35-1;35-3-a;37;37-1;37-1-b;37-1-c;P4-4',
u'externalsources': u'Guidelines of the Committee of Ministers of the Council of Europe on Forced Return, adopted on 4 May 2005;Report of the European Committee for the Prevention of Torture and Inhuman or Degrading Treatment or Punishment (CPT) on their visit to Spain in July 2014 (published on 9 April 2015);The 2015 annual activity report by the Commissioner for Human Rights of the Council of Europe;Report of the fact-finding mission by the Special Representative of the Secretary General of the Council of Europe on migration and refugees, to Spain, March 2018 (SG/Inf(2018)25);Resolution 2299 (2019) of the Parliamentary Assembly of the Council of Europe on the pushback policies and practice in Council of Europe member States;Charter of the United Nations (UN Charter), signed on 26 June 1945 in San Francisco;Articles 27, 31 and 32 of the Vienna Convention on the Law of Treaties of 23 May 1969;Geneva Convention of 28 July 1951 relating to the Status of Refugees;Convention against Torture and Other Cruel, Inhuman or Degrading Treatment or Punishment of 10 December 1984 (UNCAT);Declaration on Territorial Asylum adopted by the United Nations General Assembly on 14 December 1967 (Resolution 2312 (XXII));Draft Articles on the Expulsion of Aliens adopted by the International Law Commission at their sixty-sixth session (2014) of which the United Nations General Assembly took note (Resolution A/RES/69/119 of 10 December 2014);Second report on the expulsion of aliens, dated 20 July 2006 (Document A/CN.4/573), by Mr Maurice Kamto, Special Rapporteur;Conclusions on International Protection adopted by the Executive Committee of the UNHCR Programme 1975 \u2013 2017;Views adopted by the Committee on the Rights of the Child on 12 February 2019 under the Optional Protocol to the Convention on the Rights of the Child on a communications procedure, concerning communication No. 4/2016',
u'meetingnumber': u'', u'doctype': u'HEJUD', u'Rank': u'25.8842315673828',
u'meetingnumber': u'', u'doctype': u'HEJUD', u'rank': u'25.8842315673828',
u'conclusion': u'Preliminary objection dismissed (Article 34 - Victim);Preliminary objection dismissed (Article 35-1 - Exhaustion of domestic remedies);Preliminary objection dismissed (Article 37-1 - Respect for human rights;Article 37-1-b - Matter resolved;Article 37-1-c - Continued examination not justified);Preliminary objection joined to merits and dismissed (Article 35-3-a - Ratione materiae);No violation of Article 4 of Protocol No. 4 - Prohibition of collective expulsion of aliens-{general} (Article 4 of Protocol No. 4 - Prohibition of collective expulsion of aliens);No violation of Article 13+P4-4 - Right to an effective remedy (Article 13 - Effective remedy) (Article 4 of Protocol No. 4 - Prohibition of collective expulsion of aliens-{general};Prohibition of collective expulsion of aliens)'
},
{
Expand All @@ -312,7 +312,7 @@
u'ecli': u'ECLI:CE:ECHR:2007:0515JUD005239199', u'importance': u'1', u'kpdate': u'5/15/2007 12:00:00 AM',
u'judgementdate': u'15/05/2007 00:00:00', u'extractedappno': u'52391/99', u'typedescription': u'15',
u'article': u'2;2-2;2-1;6;6-1;13;41', u'externalsources': u'source', u'meetingnumber': u'1', u'doctype': u'HJUDAZE',
u'Rank': u'17.1129493713379',
u'rank': u'17.1129493713379',
u'conclusion': u'Violation of Art. 2;No violation of Art. 2;No separate issue under Art. 13;Non-pecuniary damage - financial award;Costs and expenses award - domestic proceedings;Costs and expenses partial award - Convention proceedings'
},
{
Expand All @@ -332,7 +332,7 @@
u'extractedappno': u'74613/01;58442/00;32492/96;32547/96;32548/96;33209/96;33210/96;16875/90;17495/90;27077/95;30943/96;31871/96;34044/96;35532/97;44801/98;37201/97',
u'typedescription': u'14', u'article': u'5;5-1-a;5-1;6;6-1;6-3-d;7;7-1;29;29-3',
u'externalsources': u'Convention on the Prevention and Punishment of the Crime of Genocide (1948);Convention on the Prevention and Suppression of Genocide (1948);Resolution of the UN General Assembly 47/121 of 18 December 1992;Case-law of the ICTY (Prosecutor v. Krstic, judgment of 2 August 2001;Prosecutor v. Kupreskic and Others, judgment of 14 January 2000) and ICJ (Bosnia and Herzegovina v. Serbia and Montenegro, judgment of 26 February 2007)',
u'meetingnumber': u'', u'doctype': u'HJUDGEO', u'Rank': u'15.1129503250122',
u'meetingnumber': u'', u'doctype': u'HJUDGEO', u'rank': u'15.1129503250122',
u'conclusion': u'Remainder inadmissible;No violation of Art. 6-1 or 5-1;No violation of Art. 7'
},
{
Expand All @@ -348,7 +348,7 @@
u'issue': u'Code de proc\xe9dure p\xe9nale, articles 174-185, 197', u'ecli': u'ECLI:CE:ECHR:2000:0725JUD002495494',
u'importance': u'1', u'kpdate': u'7/25/2000 12:00:00 AM', u'judgementdate': u'25/07/2000 00:00:00',
u'extractedappno': u'24954/94;24971/94;24972/94;22774/93', u'typedescription': u'15', u'article': u'6;6-1;41',
u'externalsources': u'', u'meetingnumber': u'', u'doctype': u'HEJUD', u'Rank': u'16.1129493713379',
u'externalsources': u'', u'meetingnumber': u'', u'doctype': u'HEJUD', u'rank': u'16.1129493713379',
u'conclusion': u"Violation de l'Art. 6-1 du fait du manque d'impartialit\xe9 du tribunal;Violation de l'Art. 6-1 du fait de l'impossibilit\xe9 d'\xeatre entendu en personne par le juge d'appel;Dommage mat\xe9riel - demande rejet\xe9e;Pr\xe9judice moral - r\xe9paration p\xe9cuniaire;Remboursement partiel frais et d\xe9pens"
},
{
Expand All @@ -363,7 +363,7 @@
u'kpdate': u'5/12/2020 12:00:00 AM', u'judgementdate': u'12/05/2020 00:00:00',
u'extractedappno': u'8211/10;25253/08;36391/02;27422/05;38802/08;21980/04;36658/05;71409/10;57837/09;50541/08;22744/07;76577/13;34779/09;4268/04;42371/02;25703/11;2308/06;48016/06;7817/07;9106/09;46661/09;7851/05;38907/09;30733/08',
u'typedescription': u'15', u'article': u'6;6+6-1;6-1;6-3-c', u'externalsources': u'', u'meetingnumber': u'',
u'doctype': u'HEJUD', u'Rank': u'12.1129503250122',
u'doctype': u'HEJUD', u'rank': u'12.1129503250122',
u'conclusion': u'Article 6+6-1 - Right to a fair trial (Article 6-3-c - Defence through legal assistance) (Article 6 - Right to a fair trial;Criminal proceedings;Article 6-1 - Fair hearing)'
},
{
Expand All @@ -377,7 +377,7 @@
u'ecli': u'ECLI:CE:ECHR:2020:0519JUD004554009', u'importance': u'4', u'kpdate': u'5/19/2020 12:00:00 AM',
u'judgementdate': u'19/05/2020 00:00:00', u'extractedappno': u'', u'typedescription': u'15',
u'article': u'11;11-1', u'externalsources': u'', u'meetingnumber': u'', u'doctype': u'HEJUD',
u'Rank': u'',
u'rank': u'',
u'conclusion': u'Violation of Article 11 - Freedom of assembly and association (Article 11-1 - Freedom of peaceful assembly)'
}
]
Loading

0 comments on commit d41e762

Please sign in to comment.