Skip to content

Commit

Permalink
Merge pull request #388 from codeforIATI/localised_organisation_names…
Browse files Browse the repository at this point in the history
…_part1

Localised organisation names - part 1
  • Loading branch information
radix0000 authored Jan 18, 2023
2 parents 3f43b24 + f11595a commit 33fcb50
Show file tree
Hide file tree
Showing 8 changed files with 291 additions and 12 deletions.
8 changes: 6 additions & 2 deletions iati_datastore/iatilib/crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ def parse_resource(resource):
return resource # , new_identifiers


def update_activities(dataset_name):
def update_activities(dataset_name, ignore_hashes=False):
'''
Parses and stores the raw XML associated with a resource [see parse_resource()], or logs the invalid resource
:param resource_url:
Expand All @@ -239,6 +239,9 @@ def update_activities(dataset_name):

dataset = Dataset.query.get(dataset_name)
resource = dataset.resources[0]

if ignore_hashes: db.session._update_all_unique = True

try:
db.session.query(Log).filter(sa.and_(
Log.logger.in_(
Expand All @@ -262,6 +265,7 @@ def update_activities(dataset_name):
))
db.session.commit()

if ignore_hashes: db.session._update_all_unique = False

def update_dataset(dataset_name, ignore_hashes):
'''
Expand Down Expand Up @@ -307,7 +311,7 @@ def update_dataset(dataset_name, ignore_hashes):

if resource.last_status_code == 200 and not resource.last_parsed:
queue.enqueue(
update_activities, args=(dataset_name,),
update_activities, args=(dataset_name, ignore_hashes),
result_ttl=0, job_timeout=100000)


Expand Down
12 changes: 11 additions & 1 deletion iati_datastore/iatilib/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,13 @@ def _unique(session, cls, hashfunc, queryfunc, constructor, arg, kw):

key = (cls, hashfunc(*arg, **kw))
if key in cache:
return cache[key]
if getattr(session, '_update_all_unique', False):
obj = cache[key]
for name, value in kw.items():
setattr(obj, name, value)
return obj
else:
return cache[key]
else:
with session.no_autoflush:
q = session.query(cls)
Expand All @@ -46,6 +52,9 @@ def _unique(session, cls, hashfunc, queryfunc, constructor, arg, kw):
if not obj:
obj = constructor(*arg, **kw)
session.add(obj)
elif getattr(session, '_update_all_unique', False):
for name, value in kw.items():
setattr(obj, name, value)
cache[key] = obj
return obj

Expand Down Expand Up @@ -195,6 +204,7 @@ class Organisation(db.Model, UniqueMixin):
id = sa.Column(sa.Integer, primary_key=True, nullable=False)
ref = sa.Column(sa.Unicode, nullable=False)
name = sa.Column(sa.Unicode, default=u"", nullable=True)
name_all_values = sa.Column(JSONB, nullable=True)
type = sa.Column(codelists.OrganisationType.db_type())
__table_args__ = (sa.UniqueConstraint('ref', 'name', 'type'),)

Expand Down
37 changes: 28 additions & 9 deletions iati_datastore/iatilib/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,10 +103,26 @@ def xpath_decimal(xpath, xml, resource=None, major_version='1'):
return None


def parse_org(xml, resource=no_resource, major_version='1'):
def xvals_lang(xml, major_version, default_lang="default"):
ret = {}
if major_version == '1':
for ele in xml.xpath("."):
lang = xval(ele, "@xml:lang", default_lang)
value = xval(ele, "text()")
ret[lang] = value
else:
for ele in xml.xpath("./narrative"):
lang = xval(ele, "@xml:lang", default_lang)
value = xval(ele, "text()")
ret[lang] = value
return ret


def parse_org(xml, resource=no_resource, major_version='1', default_lang="default"):
data = {
"ref": xval(xml, "@ref", u""),
"name": xval(xml, TEXT_ELEMENT[major_version], u""),
"name_all_values": xvals_lang(xml, TEXT_ELEMENT[major_version], default_lang=default_lang)
}
try:
data['type'] = codelists.by_major_version[major_version].OrganisationType.from_string(xval(xml, "@type"))
Expand All @@ -115,7 +131,7 @@ def parse_org(xml, resource=no_resource, major_version='1'):
return Organisation.as_unique(db.session, **data)


def reporting_org(element, resource=no_resource, major_version='1'):
def reporting_org(element, resource=no_resource, major_version='1', default_lang="default"):
try:
xml = element.xpath("./reporting-org")[0]
except IndexError:
Expand All @@ -125,6 +141,7 @@ def reporting_org(element, resource=no_resource, major_version='1'):
data = {
"ref": xval(xml, "@ref"),
"name": xval(xml, TEXT_ELEMENT[major_version], u""),
"name_all_values": xvals_lang(xml, TEXT_ELEMENT[major_version], default_lang=default_lang)
}
try:
data.update({
Expand All @@ -143,7 +160,7 @@ def reporting_org(element, resource=no_resource, major_version='1'):
return Organisation.as_unique(db.session, **data)


def participating_orgs(xml, resource=None, major_version='1'):
def participating_orgs(xml, resource=None, major_version='1', default_lang="default"):
ret = []
seen = set()
for ele in xml.xpath("./participating-org"):
Expand All @@ -164,7 +181,7 @@ def participating_orgs(xml, resource=None, major_version='1'):
role = codelists.by_major_version['1'].OrganisationRole.from_string(value)
else:
role = codelists.by_major_version[major_version].OrganisationRole.from_string(xval(ele, "@role").title())
organisation = parse_org(ele, major_version=major_version)
organisation = parse_org(ele, major_version=major_version, default_lang=default_lang)
if not (role, organisation.ref) in seen:
seen.add((role, organisation.ref))
ret.append(Participation(role=role, organisation=organisation))
Expand Down Expand Up @@ -310,14 +327,14 @@ def description_all_values(xml, resource=None, major_version='1'):
return ret


def transactions(xml, resource=no_resource, major_version='1'):
def transactions(xml, resource=no_resource, major_version='1', default_lang="default"):
def from_cl(code, codelist):
return codelist.from_string(code) if code is not None else None

def from_org(path, ele, resource=None, major_version='1'):
organisation = ele.xpath(path)
if organisation:
return parse_org(organisation[0], major_version=major_version)
return parse_org(organisation[0], major_version=major_version, default_lang=default_lang)
# return Organisation.as_unique(db.session, ref=org) if org else Nonejk

def process(ele):
Expand Down Expand Up @@ -537,6 +554,8 @@ def activity(xml, resource=no_resource, major_version='1', version=None):
Expects xml argument of type lxml.etree._Element
"""

default_lang = xval(xml, "@xml:lang", "default")

if major_version == '2':
start_planned = partial(xval_date, "./activity-date[@type='1']")
start_actual = partial(xval_date, "./activity-date[@type='2']")
Expand Down Expand Up @@ -568,12 +587,12 @@ def activity(xml, resource=no_resource, major_version='1', version=None):
"hierarchy": hierarchy,
"last_updated_datetime": last_updated_datetime,
"default_language": default_language,
"reporting_org": reporting_org,
"reporting_org": partial(reporting_org, default_lang=default_lang),
"websites": websites,
"participating_orgs": participating_orgs,
"participating_orgs": partial(participating_orgs, default_lang=default_lang),
"recipient_country_percentages": recipient_country_percentages,
"recipient_region_percentages": recipient_region_percentages,
"transactions": transactions,
"transactions": partial(transactions, default_lang=default_lang),
"start_planned": start_planned,
"end_planned": end_planned,
"start_actual": start_actual,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
<iati-activities version="2.03">
<iati-activity default-currency="EUR" hierarchy="1" xml:lang="de">
<iati-identifier>DE-1-998966376</iati-identifier>
<reporting-org ref="DE-1" secondary-reporter="0" type="10">
<narrative>Bundesministerium für wirtschaftliche Zusammenarbeit und Entwicklung (BMZ)</narrative>
<narrative xml:lang="en">Federal Ministry for Economic Cooperation and Development (BMZ)</narrative>
</reporting-org>
<title>
<narrative xml:lang="pt">Esgotamento Sanitário Pernambuco</narrative>
<narrative>Abwasserentsorgung Pernambuco (Invest.)</narrative>
</title>
<participating-org ref="DE-1" role="1" type="10">
<narrative>Bundesministerium für wirtschaftliche Zusammenarbeit und Entwicklung (BMZ)</narrative>
<narrative xml:lang="en">Federal Ministry for Economic Cooperation and Development (BMZ)</narrative>
</participating-org>
<participating-org ref="XM-DAC-5-2" role="2">
<narrative>KfW Bankengruppe (KfW)</narrative>
</participating-org>
<participating-org ref="XM-DAC-5-2" role="3">
<narrative>KfW Bankengruppe (KfW)</narrative>
</participating-org>
<transaction>
<transaction-type code="11"/>
<transaction-date iso-date="2015-09-08"/>
<value value-date="2015-09-08">1749796.82</value>
<description>
<narrative xml:lang="en">Aid from other bilateral donors</narrative>
</description>
<provider-org ref="NO-BRC-971277882" type="10">
<narrative>Norwegische Agentur für Entwicklungszusammenarbeit (NORAD)</narrative>
</provider-org>
<receiver-org ref="DK-CVR-12921047" receiver-activity-id="DK-CVR-12921047-2018-21LotCivSPA" type="22">
<narrative>Pflege Dänemark</narrative>
</receiver-org>
</transaction>
<transaction>
<transaction-type code="11"/>
<transaction-date iso-date="2016-12-22"/>
<value value-date="2016-12-22">4555468.00</value>
<description>
<narrative xml:lang="en">Aid from other bilateral donors</narrative>
</description>
<provider-org ref="BD-NAB-0210" provider-activity-id="BD-NAB-0210-POWER" type="21">
<narrative>ActionAid Bangladesch</narrative>
</provider-org>
<receiver-org type="22">
<narrative>SKS-Stiftung</narrative>
</receiver-org>
</transaction>
</iati-activity>
</iati-activities>
51 changes: 51 additions & 0 deletions iati_datastore/iatilib/test/fixtures/localised-org-names.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
<iati-activities version="2.03">
<iati-activity default-currency="EUR" hierarchy="1" xml:lang="de">
<iati-identifier>DE-1-198966376</iati-identifier>
<reporting-org ref="DE-1" secondary-reporter="0" type="10">
<narrative xml:lang="de">Bundesministerium für wirtschaftliche Zusammenarbeit und Entwicklung (BMZ)</narrative>
<narrative xml:lang="en">Federal Ministry for Economic Cooperation and Development (BMZ)</narrative>
</reporting-org>
<title>
<narrative xml:lang="pt">Esgotamento Sanitário Pernambuco</narrative>
<narrative xml:lang="de">Abwasserentsorgung Pernambuco (Invest.)</narrative>
</title>
<participating-org ref="DE-1" role="1" type="10">
<narrative xml:lang="de">Bundesministerium für wirtschaftliche Zusammenarbeit und Entwicklung (BMZ)</narrative>
<narrative xml:lang="en">Federal Ministry for Economic Cooperation and Development (BMZ)</narrative>
</participating-org>
<participating-org ref="XM-DAC-5-2" role="2">
<narrative xml:lang="de">KfW Bankengruppe (KfW)</narrative>
</participating-org>
<participating-org ref="XM-DAC-5-2" role="3">
<narrative xml:lang="de">KfW Bankengruppe (KfW)</narrative>
</participating-org>
<transaction>
<transaction-type code="11"/>
<transaction-date iso-date="2015-09-08"/>
<value value-date="2015-09-08">1749796.82</value>
<description>
<narrative xml:lang="en">Aid from other bilateral donors</narrative>
</description>
<provider-org ref="NO-BRC-971277882" type="10">
<narrative xml:lang="en">Norwegian Agency for Development Cooperation (NORAD)</narrative>
</provider-org>
<receiver-org ref="DK-CVR-12921047" receiver-activity-id="DK-CVR-12921047-2018-21LotCivSPA" type="22">
<narrative xml:lang="en">Care Danmark</narrative>
</receiver-org>
</transaction>
<transaction>
<transaction-type code="11"/>
<transaction-date iso-date="2016-12-22"/>
<value value-date="2016-12-22">4555468.00</value>
<description>
<narrative xml:lang="en">Aid from other bilateral donors</narrative>
</description>
<provider-org ref="BD-NAB-0210" provider-activity-id="BD-NAB-0210-POWER" type="21">
<narrative xml:lang="en">ActionAid Bangladesh</narrative>
</provider-org>
<receiver-org type="22">
<narrative xml:lang="en">SKS Foundation</narrative>
</receiver-org>
</transaction>
</iati-activity>
</iati-activities>
92 changes: 92 additions & 0 deletions iati_datastore/iatilib/test/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -1099,3 +1099,95 @@ def test_budget_conversion_usd(self):
def test_budget_conversion_eur(self):
self.assertEquals(548485.69, self.act.budgets[0].value_eur) # 2011-08-01: GBP 480637

class TestLocalisedOrganisationNames(AppTestCase):
def setUp(self):
super().setUp()
self.activities = list(parse.document_from_file(fixture_filename("localised-org-names.xml")))
self.act = self.activities[0]

def test_title_all_values(self):
self.assertEquals(
{
'pt': 'Esgotamento Sanitário Pernambuco',
'de': "Abwasserentsorgung Pernambuco (Invest.)"
},
self.act.title_all_values
)

def test_reporting_org_name(self):
self.assertEquals(
self.act.reporting_org.name_all_values,
{'de': 'Bundesministerium für wirtschaftliche Zusammenarbeit und Entwicklung (BMZ)',
'en': 'Federal Ministry for Economic Cooperation and Development (BMZ)'}
)

def test_participating_orgs(self):
self.assertEquals(
self.act.participating_orgs[0].organisation.name_all_values,
{"de": 'Bundesministerium für wirtschaftliche Zusammenarbeit und Entwicklung (BMZ)',
"en": 'Federal Ministry for Economic Cooperation and Development (BMZ)'}
)
self.assertEquals(
self.act.participating_orgs[1].organisation.name_all_values,
{"de": 'KfW Bankengruppe (KfW)'}
)
self.assertEquals(
self.act.participating_orgs[2].organisation.name_all_values,
{"de": 'KfW Bankengruppe (KfW)'}
)

def test_transaction_reciever_org_name(self):
self.assertEquals(self.act.transactions[0].provider_org.name_all_values,
{"en": 'Norwegian Agency for Development Cooperation (NORAD)'}
)
self.assertEquals(self.act.transactions[0].receiver_org.name_all_values,
{"en": 'Care Danmark'}
)
self.assertEquals(self.act.transactions[1].provider_org.name_all_values,
{"en": 'ActionAid Bangladesh'}
)
self.assertEquals(self.act.transactions[1].receiver_org.name_all_values,
{"en": 'SKS Foundation'}
)

class TestLocalisedOrganisationNamesActivityDefault(AppTestCase):
def setUp(self):
super().setUp()
self.activities = list(parse.document_from_file(fixture_filename("localised-org-names-default.xml")))
self.act = self.activities[0]

def test_reporting_org_name(self):
self.assertEquals(
self.act.reporting_org.name_all_values,
{'de': 'Bundesministerium für wirtschaftliche Zusammenarbeit und Entwicklung (BMZ)',
'en': 'Federal Ministry for Economic Cooperation and Development (BMZ)'}
)

def test_participating_orgs(self):
self.assertEquals(
self.act.participating_orgs[0].organisation.name_all_values,
{"de": 'Bundesministerium für wirtschaftliche Zusammenarbeit und Entwicklung (BMZ)',
"en": 'Federal Ministry for Economic Cooperation and Development (BMZ)'}
)
self.assertEquals(
self.act.participating_orgs[1].organisation.name_all_values,
{"de": 'KfW Bankengruppe (KfW)'}
)
self.assertEquals(
self.act.participating_orgs[2].organisation.name_all_values,
{"de": 'KfW Bankengruppe (KfW)'}
)

def test_transaction_reciever_org_name(self):
self.assertEquals(self.act.transactions[0].provider_org.name_all_values,
{"de": 'Norwegische Agentur für Entwicklungszusammenarbeit (NORAD)'}
)
self.assertEquals(self.act.transactions[0].receiver_org.name_all_values,
{"de": 'Pflege Dänemark'}
)
self.assertEquals(self.act.transactions[1].provider_org.name_all_values,
{"de": 'ActionAid Bangladesch'}
)
self.assertEquals(self.act.transactions[1].receiver_org.name_all_values,
{"de": 'SKS-Stiftung'}
)
24 changes: 24 additions & 0 deletions migrations/versions/864375812164_merging_two_heads.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
"""merging two heads
Revision ID: 864375812164
Revises: c6fe2cf16adb, 3daa1d4ab046
Create Date: 2022-12-13 17:03:56.861063
"""
from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = '864375812164'
down_revision = ('c6fe2cf16adb', '3daa1d4ab046')
branch_labels = None
depends_on = None


def upgrade():
pass


def downgrade():
pass
Loading

0 comments on commit 33fcb50

Please sign in to comment.