diff --git a/iati_datastore/iatilib/crawler.py b/iati_datastore/iatilib/crawler.py
index 2f78a2d1..e60a8378 100644
--- a/iati_datastore/iatilib/crawler.py
+++ b/iati_datastore/iatilib/crawler.py
@@ -224,7 +224,7 @@ def parse_resource(resource):
return resource # , new_identifiers
-def update_activities(dataset_name):
+def update_activities(dataset_name, ignore_hashes=False):
'''
Parses and stores the raw XML associated with a resource [see parse_resource()], or logs the invalid resource
:param resource_url:
@@ -239,6 +239,9 @@ def update_activities(dataset_name):
dataset = Dataset.query.get(dataset_name)
resource = dataset.resources[0]
+
+ if ignore_hashes: db.session._update_all_unique = True
+
try:
db.session.query(Log).filter(sa.and_(
Log.logger.in_(
@@ -262,6 +265,7 @@ def update_activities(dataset_name):
))
db.session.commit()
+ if ignore_hashes: db.session._update_all_unique = False
def update_dataset(dataset_name, ignore_hashes):
'''
@@ -307,7 +311,7 @@ def update_dataset(dataset_name, ignore_hashes):
if resource.last_status_code == 200 and not resource.last_parsed:
queue.enqueue(
- update_activities, args=(dataset_name,),
+ update_activities, args=(dataset_name, ignore_hashes),
result_ttl=0, job_timeout=100000)
diff --git a/iati_datastore/iatilib/model.py b/iati_datastore/iatilib/model.py
index 5249c7cc..777c049e 100644
--- a/iati_datastore/iatilib/model.py
+++ b/iati_datastore/iatilib/model.py
@@ -37,7 +37,13 @@ def _unique(session, cls, hashfunc, queryfunc, constructor, arg, kw):
key = (cls, hashfunc(*arg, **kw))
if key in cache:
- return cache[key]
+ if getattr(session, '_update_all_unique', False):
+ obj = cache[key]
+ for name, value in kw.items():
+ setattr(obj, name, value)
+ return obj
+ else:
+ return cache[key]
else:
with session.no_autoflush:
q = session.query(cls)
@@ -46,6 +52,9 @@ def _unique(session, cls, hashfunc, queryfunc, constructor, arg, kw):
if not obj:
obj = constructor(*arg, **kw)
session.add(obj)
+ elif getattr(session, '_update_all_unique', False):
+ for name, value in kw.items():
+ setattr(obj, name, value)
cache[key] = obj
return obj
@@ -195,6 +204,7 @@ class Organisation(db.Model, UniqueMixin):
id = sa.Column(sa.Integer, primary_key=True, nullable=False)
ref = sa.Column(sa.Unicode, nullable=False)
name = sa.Column(sa.Unicode, default=u"", nullable=True)
+ name_all_values = sa.Column(JSONB, nullable=True)
type = sa.Column(codelists.OrganisationType.db_type())
__table_args__ = (sa.UniqueConstraint('ref', 'name', 'type'),)
diff --git a/iati_datastore/iatilib/parse.py b/iati_datastore/iatilib/parse.py
index 8a4cbd74..df462c48 100644
--- a/iati_datastore/iatilib/parse.py
+++ b/iati_datastore/iatilib/parse.py
@@ -103,10 +103,26 @@ def xpath_decimal(xpath, xml, resource=None, major_version='1'):
return None
-def parse_org(xml, resource=no_resource, major_version='1'):
+def xvals_lang(xml, major_version, default_lang="default"):
+ ret = {}
+ if major_version == '1':
+ for ele in xml.xpath("."):
+ lang = xval(ele, "@xml:lang", default_lang)
+ value = xval(ele, "text()")
+ ret[lang] = value
+ else:
+ for ele in xml.xpath("./narrative"):
+ lang = xval(ele, "@xml:lang", default_lang)
+ value = xval(ele, "text()")
+ ret[lang] = value
+ return ret
+
+
+def parse_org(xml, resource=no_resource, major_version='1', default_lang="default"):
data = {
"ref": xval(xml, "@ref", u""),
"name": xval(xml, TEXT_ELEMENT[major_version], u""),
+ "name_all_values": xvals_lang(xml, TEXT_ELEMENT[major_version], default_lang=default_lang)
}
try:
data['type'] = codelists.by_major_version[major_version].OrganisationType.from_string(xval(xml, "@type"))
@@ -115,7 +131,7 @@ def parse_org(xml, resource=no_resource, major_version='1'):
return Organisation.as_unique(db.session, **data)
-def reporting_org(element, resource=no_resource, major_version='1'):
+def reporting_org(element, resource=no_resource, major_version='1', default_lang="default"):
try:
xml = element.xpath("./reporting-org")[0]
except IndexError:
@@ -125,6 +141,7 @@ def reporting_org(element, resource=no_resource, major_version='1'):
data = {
"ref": xval(xml, "@ref"),
"name": xval(xml, TEXT_ELEMENT[major_version], u""),
+ "name_all_values": xvals_lang(xml, TEXT_ELEMENT[major_version], default_lang=default_lang)
}
try:
data.update({
@@ -143,7 +160,7 @@ def reporting_org(element, resource=no_resource, major_version='1'):
return Organisation.as_unique(db.session, **data)
-def participating_orgs(xml, resource=None, major_version='1'):
+def participating_orgs(xml, resource=None, major_version='1', default_lang="default"):
ret = []
seen = set()
for ele in xml.xpath("./participating-org"):
@@ -164,7 +181,7 @@ def participating_orgs(xml, resource=None, major_version='1'):
role = codelists.by_major_version['1'].OrganisationRole.from_string(value)
else:
role = codelists.by_major_version[major_version].OrganisationRole.from_string(xval(ele, "@role").title())
- organisation = parse_org(ele, major_version=major_version)
+ organisation = parse_org(ele, major_version=major_version, default_lang=default_lang)
if not (role, organisation.ref) in seen:
seen.add((role, organisation.ref))
ret.append(Participation(role=role, organisation=organisation))
@@ -310,14 +327,14 @@ def description_all_values(xml, resource=None, major_version='1'):
return ret
-def transactions(xml, resource=no_resource, major_version='1'):
+def transactions(xml, resource=no_resource, major_version='1', default_lang="default"):
def from_cl(code, codelist):
return codelist.from_string(code) if code is not None else None
def from_org(path, ele, resource=None, major_version='1'):
organisation = ele.xpath(path)
if organisation:
- return parse_org(organisation[0], major_version=major_version)
+ return parse_org(organisation[0], major_version=major_version, default_lang=default_lang)
# return Organisation.as_unique(db.session, ref=org) if org else Nonejk
def process(ele):
@@ -537,6 +554,8 @@ def activity(xml, resource=no_resource, major_version='1', version=None):
Expects xml argument of type lxml.etree._Element
"""
+ default_lang = xval(xml, "@xml:lang", "default")
+
if major_version == '2':
start_planned = partial(xval_date, "./activity-date[@type='1']")
start_actual = partial(xval_date, "./activity-date[@type='2']")
@@ -568,12 +587,12 @@ def activity(xml, resource=no_resource, major_version='1', version=None):
"hierarchy": hierarchy,
"last_updated_datetime": last_updated_datetime,
"default_language": default_language,
- "reporting_org": reporting_org,
+ "reporting_org": partial(reporting_org, default_lang=default_lang),
"websites": websites,
- "participating_orgs": participating_orgs,
+ "participating_orgs": partial(participating_orgs, default_lang=default_lang),
"recipient_country_percentages": recipient_country_percentages,
"recipient_region_percentages": recipient_region_percentages,
- "transactions": transactions,
+ "transactions": partial(transactions, default_lang=default_lang),
"start_planned": start_planned,
"end_planned": end_planned,
"start_actual": start_actual,
diff --git a/iati_datastore/iatilib/test/fixtures/localised-org-names-default.xml b/iati_datastore/iatilib/test/fixtures/localised-org-names-default.xml
new file mode 100644
index 00000000..7a1e5254
--- /dev/null
+++ b/iati_datastore/iatilib/test/fixtures/localised-org-names-default.xml
@@ -0,0 +1,51 @@
+
+
+ DE-1-998966376
+
+ Bundesministerium für wirtschaftliche Zusammenarbeit und Entwicklung (BMZ)
+ Federal Ministry for Economic Cooperation and Development (BMZ)
+
+
+ Esgotamento Sanitário Pernambuco
+ Abwasserentsorgung Pernambuco (Invest.)
+
+
+ Bundesministerium für wirtschaftliche Zusammenarbeit und Entwicklung (BMZ)
+ Federal Ministry for Economic Cooperation and Development (BMZ)
+
+
+ KfW Bankengruppe (KfW)
+
+
+ KfW Bankengruppe (KfW)
+
+
+
+
+ 1749796.82
+
+ Aid from other bilateral donors
+
+
+ Norwegische Agentur für Entwicklungszusammenarbeit (NORAD)
+
+
+ Pflege Dänemark
+
+
+
+
+
+ 4555468.00
+
+ Aid from other bilateral donors
+
+
+ ActionAid Bangladesch
+
+
+ SKS-Stiftung
+
+
+
+
diff --git a/iati_datastore/iatilib/test/fixtures/localised-org-names.xml b/iati_datastore/iatilib/test/fixtures/localised-org-names.xml
new file mode 100644
index 00000000..acbe19f2
--- /dev/null
+++ b/iati_datastore/iatilib/test/fixtures/localised-org-names.xml
@@ -0,0 +1,51 @@
+
+
+ DE-1-198966376
+
+ Bundesministerium für wirtschaftliche Zusammenarbeit und Entwicklung (BMZ)
+ Federal Ministry for Economic Cooperation and Development (BMZ)
+
+
+ Esgotamento Sanitário Pernambuco
+ Abwasserentsorgung Pernambuco (Invest.)
+
+
+ Bundesministerium für wirtschaftliche Zusammenarbeit und Entwicklung (BMZ)
+ Federal Ministry for Economic Cooperation and Development (BMZ)
+
+
+ KfW Bankengruppe (KfW)
+
+
+ KfW Bankengruppe (KfW)
+
+
+
+
+ 1749796.82
+
+ Aid from other bilateral donors
+
+
+ Norwegian Agency for Development Cooperation (NORAD)
+
+
+ Care Danmark
+
+
+
+
+
+ 4555468.00
+
+ Aid from other bilateral donors
+
+
+ ActionAid Bangladesh
+
+
+ SKS Foundation
+
+
+
+
diff --git a/iati_datastore/iatilib/test/test_parser.py b/iati_datastore/iatilib/test/test_parser.py
index d14b1f3f..1d8c5300 100644
--- a/iati_datastore/iatilib/test/test_parser.py
+++ b/iati_datastore/iatilib/test/test_parser.py
@@ -1099,3 +1099,95 @@ def test_budget_conversion_usd(self):
def test_budget_conversion_eur(self):
self.assertEquals(548485.69, self.act.budgets[0].value_eur) # 2011-08-01: GBP 480637
+class TestLocalisedOrganisationNames(AppTestCase):
+ def setUp(self):
+ super().setUp()
+ self.activities = list(parse.document_from_file(fixture_filename("localised-org-names.xml")))
+ self.act = self.activities[0]
+
+ def test_title_all_values(self):
+ self.assertEquals(
+ {
+ 'pt': 'Esgotamento Sanitário Pernambuco',
+ 'de': "Abwasserentsorgung Pernambuco (Invest.)"
+ },
+ self.act.title_all_values
+ )
+
+ def test_reporting_org_name(self):
+ self.assertEquals(
+ self.act.reporting_org.name_all_values,
+ {'de': 'Bundesministerium für wirtschaftliche Zusammenarbeit und Entwicklung (BMZ)',
+ 'en': 'Federal Ministry for Economic Cooperation and Development (BMZ)'}
+ )
+
+ def test_participating_orgs(self):
+ self.assertEquals(
+ self.act.participating_orgs[0].organisation.name_all_values,
+ {"de": 'Bundesministerium für wirtschaftliche Zusammenarbeit und Entwicklung (BMZ)',
+ "en": 'Federal Ministry for Economic Cooperation and Development (BMZ)'}
+ )
+ self.assertEquals(
+ self.act.participating_orgs[1].organisation.name_all_values,
+ {"de": 'KfW Bankengruppe (KfW)'}
+ )
+ self.assertEquals(
+ self.act.participating_orgs[2].organisation.name_all_values,
+ {"de": 'KfW Bankengruppe (KfW)'}
+ )
+
+ def test_transaction_reciever_org_name(self):
+ self.assertEquals(self.act.transactions[0].provider_org.name_all_values,
+ {"en": 'Norwegian Agency for Development Cooperation (NORAD)'}
+ )
+ self.assertEquals(self.act.transactions[0].receiver_org.name_all_values,
+ {"en": 'Care Danmark'}
+ )
+ self.assertEquals(self.act.transactions[1].provider_org.name_all_values,
+ {"en": 'ActionAid Bangladesh'}
+ )
+ self.assertEquals(self.act.transactions[1].receiver_org.name_all_values,
+ {"en": 'SKS Foundation'}
+ )
+
+class TestLocalisedOrganisationNamesActivityDefault(AppTestCase):
+ def setUp(self):
+ super().setUp()
+ self.activities = list(parse.document_from_file(fixture_filename("localised-org-names-default.xml")))
+ self.act = self.activities[0]
+
+ def test_reporting_org_name(self):
+ self.assertEquals(
+ self.act.reporting_org.name_all_values,
+ {'de': 'Bundesministerium für wirtschaftliche Zusammenarbeit und Entwicklung (BMZ)',
+ 'en': 'Federal Ministry for Economic Cooperation and Development (BMZ)'}
+ )
+
+ def test_participating_orgs(self):
+ self.assertEquals(
+ self.act.participating_orgs[0].organisation.name_all_values,
+ {"de": 'Bundesministerium für wirtschaftliche Zusammenarbeit und Entwicklung (BMZ)',
+ "en": 'Federal Ministry for Economic Cooperation and Development (BMZ)'}
+ )
+ self.assertEquals(
+ self.act.participating_orgs[1].organisation.name_all_values,
+ {"de": 'KfW Bankengruppe (KfW)'}
+ )
+ self.assertEquals(
+ self.act.participating_orgs[2].organisation.name_all_values,
+ {"de": 'KfW Bankengruppe (KfW)'}
+ )
+
+ def test_transaction_reciever_org_name(self):
+ self.assertEquals(self.act.transactions[0].provider_org.name_all_values,
+ {"de": 'Norwegische Agentur für Entwicklungszusammenarbeit (NORAD)'}
+ )
+ self.assertEquals(self.act.transactions[0].receiver_org.name_all_values,
+ {"de": 'Pflege Dänemark'}
+ )
+ self.assertEquals(self.act.transactions[1].provider_org.name_all_values,
+ {"de": 'ActionAid Bangladesch'}
+ )
+ self.assertEquals(self.act.transactions[1].receiver_org.name_all_values,
+ {"de": 'SKS-Stiftung'}
+ )
diff --git a/migrations/versions/864375812164_merging_two_heads.py b/migrations/versions/864375812164_merging_two_heads.py
new file mode 100644
index 00000000..071dcd65
--- /dev/null
+++ b/migrations/versions/864375812164_merging_two_heads.py
@@ -0,0 +1,24 @@
+"""merging two heads
+
+Revision ID: 864375812164
+Revises: c6fe2cf16adb, 3daa1d4ab046
+Create Date: 2022-12-13 17:03:56.861063
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = '864375812164'
+down_revision = ('c6fe2cf16adb', '3daa1d4ab046')
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+ pass
+
+
+def downgrade():
+ pass
diff --git a/migrations/versions/c6fe2cf16adb_add_name_all_values_column_to_.py b/migrations/versions/c6fe2cf16adb_add_name_all_values_column_to_.py
new file mode 100644
index 00000000..575b32c5
--- /dev/null
+++ b/migrations/versions/c6fe2cf16adb_add_name_all_values_column_to_.py
@@ -0,0 +1,28 @@
+"""Add name_all_values column to Organisation table
+
+Revision ID: c6fe2cf16adb
+Revises: fdc7716bb5ea
+Create Date: 2022-11-02 10:36:23.526062
+
+"""
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
+
+# revision identifiers, used by Alembic.
+revision = 'c6fe2cf16adb'
+down_revision = 'fdc7716bb5ea'
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+ # ### commands auto generated by Alembic - please adjust! ###
+ op.add_column('organisation', sa.Column('name_all_values', postgresql.JSONB(astext_type=sa.Text()), nullable=True))
+ # ### end Alembic commands ###
+
+
+def downgrade():
+ # ### commands auto generated by Alembic - please adjust! ###
+ op.drop_column('organisation', 'name_all_values')
+ # ### end Alembic commands ###