Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Localised organisation names - part 1 #388

Merged
merged 9 commits into from
Jan 18, 2023
8 changes: 6 additions & 2 deletions iati_datastore/iatilib/crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ def parse_resource(resource):
return resource # , new_identifiers


def update_activities(dataset_name):
def update_activities(dataset_name, ignore_hashes=False):
'''
Parses and stores the raw XML associated with a resource [see parse_resource()], or logs the invalid resource
:param resource_url:
Expand All @@ -239,6 +239,9 @@ def update_activities(dataset_name):

dataset = Dataset.query.get(dataset_name)
resource = dataset.resources[0]

if ignore_hashes: db.session._update_all_unique = True

try:
db.session.query(Log).filter(sa.and_(
Log.logger.in_(
Expand All @@ -262,6 +265,7 @@ def update_activities(dataset_name):
))
db.session.commit()

if ignore_hashes: db.session._update_all_unique = False

def update_dataset(dataset_name, ignore_hashes):
'''
Expand Down Expand Up @@ -307,7 +311,7 @@ def update_dataset(dataset_name, ignore_hashes):

if resource.last_status_code == 200 and not resource.last_parsed:
queue.enqueue(
update_activities, args=(dataset_name,),
update_activities, args=(dataset_name, ignore_hashes),
result_ttl=0, job_timeout=100000)


Expand Down
12 changes: 11 additions & 1 deletion iati_datastore/iatilib/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,13 @@ def _unique(session, cls, hashfunc, queryfunc, constructor, arg, kw):

key = (cls, hashfunc(*arg, **kw))
if key in cache:
return cache[key]
if getattr(session, '_update_all_unique', False):
obj = cache[key]
for name, value in kw.items():
setattr(obj, name, value)
return obj
else:
return cache[key]
else:
with session.no_autoflush:
q = session.query(cls)
Expand All @@ -46,6 +52,9 @@ def _unique(session, cls, hashfunc, queryfunc, constructor, arg, kw):
if not obj:
obj = constructor(*arg, **kw)
session.add(obj)
elif getattr(session, '_update_all_unique', False):
for name, value in kw.items():
setattr(obj, name, value)
cache[key] = obj
return obj

Expand Down Expand Up @@ -195,6 +204,7 @@ class Organisation(db.Model, UniqueMixin):
id = sa.Column(sa.Integer, primary_key=True, nullable=False)
ref = sa.Column(sa.Unicode, nullable=False)
name = sa.Column(sa.Unicode, default=u"", nullable=True)
name_all_values = sa.Column(JSONB, nullable=True)
type = sa.Column(codelists.OrganisationType.db_type())
__table_args__ = (sa.UniqueConstraint('ref', 'name', 'type'),)

Expand Down
37 changes: 28 additions & 9 deletions iati_datastore/iatilib/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,10 +103,26 @@ def xpath_decimal(xpath, xml, resource=None, major_version='1'):
return None


def parse_org(xml, resource=no_resource, major_version='1'):
def xvals_lang(xml, major_version, default_lang="default"):
ret = {}
if major_version == '1':
for ele in xml.xpath("."):
lang = xval(ele, "@xml:lang", default_lang)
value = xval(ele, "text()")
ret[lang] = value
else:
for ele in xml.xpath("./narrative"):
lang = xval(ele, "@xml:lang", default_lang)
value = xval(ele, "text()")
ret[lang] = value
return ret


def parse_org(xml, resource=no_resource, major_version='1', default_lang="default"):
data = {
"ref": xval(xml, "@ref", u""),
"name": xval(xml, TEXT_ELEMENT[major_version], u""),
"name_all_values": xvals_lang(xml, TEXT_ELEMENT[major_version], default_lang=default_lang)
}
try:
data['type'] = codelists.by_major_version[major_version].OrganisationType.from_string(xval(xml, "@type"))
Expand All @@ -115,7 +131,7 @@ def parse_org(xml, resource=no_resource, major_version='1'):
return Organisation.as_unique(db.session, **data)


def reporting_org(element, resource=no_resource, major_version='1'):
def reporting_org(element, resource=no_resource, major_version='1', default_lang="default"):
try:
xml = element.xpath("./reporting-org")[0]
except IndexError:
Expand All @@ -125,6 +141,7 @@ def reporting_org(element, resource=no_resource, major_version='1'):
data = {
"ref": xval(xml, "@ref"),
"name": xval(xml, TEXT_ELEMENT[major_version], u""),
"name_all_values": xvals_lang(xml, TEXT_ELEMENT[major_version], default_lang=default_lang)
}
try:
data.update({
Expand All @@ -143,7 +160,7 @@ def reporting_org(element, resource=no_resource, major_version='1'):
return Organisation.as_unique(db.session, **data)


def participating_orgs(xml, resource=None, major_version='1'):
def participating_orgs(xml, resource=None, major_version='1', default_lang="default"):
ret = []
seen = set()
for ele in xml.xpath("./participating-org"):
Expand All @@ -164,7 +181,7 @@ def participating_orgs(xml, resource=None, major_version='1'):
role = codelists.by_major_version['1'].OrganisationRole.from_string(value)
else:
role = codelists.by_major_version[major_version].OrganisationRole.from_string(xval(ele, "@role").title())
organisation = parse_org(ele, major_version=major_version)
organisation = parse_org(ele, major_version=major_version, default_lang=default_lang)
if not (role, organisation.ref) in seen:
seen.add((role, organisation.ref))
ret.append(Participation(role=role, organisation=organisation))
Expand Down Expand Up @@ -310,14 +327,14 @@ def description_all_values(xml, resource=None, major_version='1'):
return ret


def transactions(xml, resource=no_resource, major_version='1'):
def transactions(xml, resource=no_resource, major_version='1', default_lang="default"):
def from_cl(code, codelist):
return codelist.from_string(code) if code is not None else None

def from_org(path, ele, resource=None, major_version='1'):
organisation = ele.xpath(path)
if organisation:
return parse_org(organisation[0], major_version=major_version)
return parse_org(organisation[0], major_version=major_version, default_lang=default_lang)
# return Organisation.as_unique(db.session, ref=org) if org else Nonejk

def process(ele):
Expand Down Expand Up @@ -537,6 +554,8 @@ def activity(xml, resource=no_resource, major_version='1', version=None):
Expects xml argument of type lxml.etree._Element
"""

default_lang = xval(xml, "@xml:lang", "default")

if major_version == '2':
start_planned = partial(xval_date, "./activity-date[@type='1']")
start_actual = partial(xval_date, "./activity-date[@type='2']")
Expand Down Expand Up @@ -568,12 +587,12 @@ def activity(xml, resource=no_resource, major_version='1', version=None):
"hierarchy": hierarchy,
"last_updated_datetime": last_updated_datetime,
"default_language": default_language,
"reporting_org": reporting_org,
"reporting_org": partial(reporting_org, default_lang=default_lang),
"websites": websites,
"participating_orgs": participating_orgs,
"participating_orgs": partial(participating_orgs, default_lang=default_lang),
"recipient_country_percentages": recipient_country_percentages,
"recipient_region_percentages": recipient_region_percentages,
"transactions": transactions,
"transactions": partial(transactions, default_lang=default_lang),
"start_planned": start_planned,
"end_planned": end_planned,
"start_actual": start_actual,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
<iati-activities version="2.03">
<iati-activity default-currency="EUR" hierarchy="1" xml:lang="de">
<iati-identifier>DE-1-998966376</iati-identifier>
<reporting-org ref="DE-1" secondary-reporter="0" type="10">
<narrative>Bundesministerium für wirtschaftliche Zusammenarbeit und Entwicklung (BMZ)</narrative>
<narrative xml:lang="en">Federal Ministry for Economic Cooperation and Development (BMZ)</narrative>
</reporting-org>
<title>
<narrative xml:lang="pt">Esgotamento Sanitário Pernambuco</narrative>
<narrative>Abwasserentsorgung Pernambuco (Invest.)</narrative>
</title>
<participating-org ref="DE-1" role="1" type="10">
<narrative>Bundesministerium für wirtschaftliche Zusammenarbeit und Entwicklung (BMZ)</narrative>
<narrative xml:lang="en">Federal Ministry for Economic Cooperation and Development (BMZ)</narrative>
</participating-org>
<participating-org ref="XM-DAC-5-2" role="2">
<narrative>KfW Bankengruppe (KfW)</narrative>
</participating-org>
<participating-org ref="XM-DAC-5-2" role="3">
<narrative>KfW Bankengruppe (KfW)</narrative>
</participating-org>
<transaction>
<transaction-type code="11"/>
<transaction-date iso-date="2015-09-08"/>
<value value-date="2015-09-08">1749796.82</value>
<description>
<narrative xml:lang="en">Aid from other bilateral donors</narrative>
</description>
<provider-org ref="NO-BRC-971277882" type="10">
<narrative>Norwegische Agentur für Entwicklungszusammenarbeit (NORAD)</narrative>
</provider-org>
<receiver-org ref="DK-CVR-12921047" receiver-activity-id="DK-CVR-12921047-2018-21LotCivSPA" type="22">
<narrative>Pflege Dänemark</narrative>
</receiver-org>
</transaction>
<transaction>
<transaction-type code="11"/>
<transaction-date iso-date="2016-12-22"/>
<value value-date="2016-12-22">4555468.00</value>
<description>
<narrative xml:lang="en">Aid from other bilateral donors</narrative>
</description>
<provider-org ref="BD-NAB-0210" provider-activity-id="BD-NAB-0210-POWER" type="21">
<narrative>ActionAid Bangladesch</narrative>
</provider-org>
<receiver-org type="22">
<narrative>SKS-Stiftung</narrative>
</receiver-org>
</transaction>
</iati-activity>
</iati-activities>
51 changes: 51 additions & 0 deletions iati_datastore/iatilib/test/fixtures/localised-org-names.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
<iati-activities version="2.03">
<iati-activity default-currency="EUR" hierarchy="1" xml:lang="de">
<iati-identifier>DE-1-198966376</iati-identifier>
<reporting-org ref="DE-1" secondary-reporter="0" type="10">
<narrative xml:lang="de">Bundesministerium für wirtschaftliche Zusammenarbeit und Entwicklung (BMZ)</narrative>
<narrative xml:lang="en">Federal Ministry for Economic Cooperation and Development (BMZ)</narrative>
</reporting-org>
<title>
<narrative xml:lang="pt">Esgotamento Sanitário Pernambuco</narrative>
<narrative xml:lang="de">Abwasserentsorgung Pernambuco (Invest.)</narrative>
</title>
<participating-org ref="DE-1" role="1" type="10">
<narrative xml:lang="de">Bundesministerium für wirtschaftliche Zusammenarbeit und Entwicklung (BMZ)</narrative>
<narrative xml:lang="en">Federal Ministry for Economic Cooperation and Development (BMZ)</narrative>
</participating-org>
<participating-org ref="XM-DAC-5-2" role="2">
<narrative xml:lang="de">KfW Bankengruppe (KfW)</narrative>
</participating-org>
<participating-org ref="XM-DAC-5-2" role="3">
<narrative xml:lang="de">KfW Bankengruppe (KfW)</narrative>
</participating-org>
<transaction>
<transaction-type code="11"/>
<transaction-date iso-date="2015-09-08"/>
<value value-date="2015-09-08">1749796.82</value>
<description>
<narrative xml:lang="en">Aid from other bilateral donors</narrative>
</description>
<provider-org ref="NO-BRC-971277882" type="10">
<narrative xml:lang="en">Norwegian Agency for Development Cooperation (NORAD)</narrative>
</provider-org>
<receiver-org ref="DK-CVR-12921047" receiver-activity-id="DK-CVR-12921047-2018-21LotCivSPA" type="22">
<narrative xml:lang="en">Care Danmark</narrative>
</receiver-org>
</transaction>
<transaction>
<transaction-type code="11"/>
<transaction-date iso-date="2016-12-22"/>
<value value-date="2016-12-22">4555468.00</value>
<description>
<narrative xml:lang="en">Aid from other bilateral donors</narrative>
</description>
<provider-org ref="BD-NAB-0210" provider-activity-id="BD-NAB-0210-POWER" type="21">
<narrative xml:lang="en">ActionAid Bangladesh</narrative>
</provider-org>
<receiver-org type="22">
<narrative xml:lang="en">SKS Foundation</narrative>
</receiver-org>
</transaction>
</iati-activity>
</iati-activities>
92 changes: 92 additions & 0 deletions iati_datastore/iatilib/test/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -1099,3 +1099,95 @@ def test_budget_conversion_usd(self):
def test_budget_conversion_eur(self):
self.assertEquals(548485.69, self.act.budgets[0].value_eur) # 2011-08-01: GBP 480637

class TestLocalisedOrganisationNames(AppTestCase):
def setUp(self):
super().setUp()
self.activities = list(parse.document_from_file(fixture_filename("localised-org-names.xml")))
self.act = self.activities[0]

def test_title_all_values(self):
self.assertEquals(
{
'pt': 'Esgotamento Sanitário Pernambuco',
'de': "Abwasserentsorgung Pernambuco (Invest.)"
},
self.act.title_all_values
)

def test_reporting_org_name(self):
self.assertEquals(
self.act.reporting_org.name_all_values,
{'de': 'Bundesministerium für wirtschaftliche Zusammenarbeit und Entwicklung (BMZ)',
'en': 'Federal Ministry for Economic Cooperation and Development (BMZ)'}
)

def test_participating_orgs(self):
self.assertEquals(
self.act.participating_orgs[0].organisation.name_all_values,
{"de": 'Bundesministerium für wirtschaftliche Zusammenarbeit und Entwicklung (BMZ)',
"en": 'Federal Ministry for Economic Cooperation and Development (BMZ)'}
)
self.assertEquals(
self.act.participating_orgs[1].organisation.name_all_values,
{"de": 'KfW Bankengruppe (KfW)'}
)
self.assertEquals(
self.act.participating_orgs[2].organisation.name_all_values,
{"de": 'KfW Bankengruppe (KfW)'}
)

def test_transaction_reciever_org_name(self):
self.assertEquals(self.act.transactions[0].provider_org.name_all_values,
{"en": 'Norwegian Agency for Development Cooperation (NORAD)'}
)
self.assertEquals(self.act.transactions[0].receiver_org.name_all_values,
{"en": 'Care Danmark'}
)
self.assertEquals(self.act.transactions[1].provider_org.name_all_values,
{"en": 'ActionAid Bangladesh'}
)
self.assertEquals(self.act.transactions[1].receiver_org.name_all_values,
{"en": 'SKS Foundation'}
)

class TestLocalisedOrganisationNamesActivityDefault(AppTestCase):
def setUp(self):
super().setUp()
self.activities = list(parse.document_from_file(fixture_filename("localised-org-names-default.xml")))
self.act = self.activities[0]

def test_reporting_org_name(self):
self.assertEquals(
self.act.reporting_org.name_all_values,
{'de': 'Bundesministerium für wirtschaftliche Zusammenarbeit und Entwicklung (BMZ)',
'en': 'Federal Ministry for Economic Cooperation and Development (BMZ)'}
)

def test_participating_orgs(self):
self.assertEquals(
self.act.participating_orgs[0].organisation.name_all_values,
{"de": 'Bundesministerium für wirtschaftliche Zusammenarbeit und Entwicklung (BMZ)',
"en": 'Federal Ministry for Economic Cooperation and Development (BMZ)'}
)
self.assertEquals(
self.act.participating_orgs[1].organisation.name_all_values,
{"de": 'KfW Bankengruppe (KfW)'}
)
self.assertEquals(
self.act.participating_orgs[2].organisation.name_all_values,
{"de": 'KfW Bankengruppe (KfW)'}
)

def test_transaction_reciever_org_name(self):
self.assertEquals(self.act.transactions[0].provider_org.name_all_values,
{"de": 'Norwegische Agentur für Entwicklungszusammenarbeit (NORAD)'}
)
self.assertEquals(self.act.transactions[0].receiver_org.name_all_values,
{"de": 'Pflege Dänemark'}
)
self.assertEquals(self.act.transactions[1].provider_org.name_all_values,
{"de": 'ActionAid Bangladesch'}
)
self.assertEquals(self.act.transactions[1].receiver_org.name_all_values,
{"de": 'SKS-Stiftung'}
)
Loading