diff --git a/iati_datastore/iatilib/crawler.py b/iati_datastore/iatilib/crawler.py index 2f78a2d1..e60a8378 100644 --- a/iati_datastore/iatilib/crawler.py +++ b/iati_datastore/iatilib/crawler.py @@ -224,7 +224,7 @@ def parse_resource(resource): return resource # , new_identifiers -def update_activities(dataset_name): +def update_activities(dataset_name, ignore_hashes=False): ''' Parses and stores the raw XML associated with a resource [see parse_resource()], or logs the invalid resource :param resource_url: @@ -239,6 +239,9 @@ def update_activities(dataset_name): dataset = Dataset.query.get(dataset_name) resource = dataset.resources[0] + + if ignore_hashes: db.session._update_all_unique = True + try: db.session.query(Log).filter(sa.and_( Log.logger.in_( @@ -262,6 +265,7 @@ def update_activities(dataset_name): )) db.session.commit() + if ignore_hashes: db.session._update_all_unique = False def update_dataset(dataset_name, ignore_hashes): ''' @@ -307,7 +311,7 @@ def update_dataset(dataset_name, ignore_hashes): if resource.last_status_code == 200 and not resource.last_parsed: queue.enqueue( - update_activities, args=(dataset_name,), + update_activities, args=(dataset_name, ignore_hashes), result_ttl=0, job_timeout=100000) diff --git a/iati_datastore/iatilib/model.py b/iati_datastore/iatilib/model.py index 5249c7cc..777c049e 100644 --- a/iati_datastore/iatilib/model.py +++ b/iati_datastore/iatilib/model.py @@ -37,7 +37,13 @@ def _unique(session, cls, hashfunc, queryfunc, constructor, arg, kw): key = (cls, hashfunc(*arg, **kw)) if key in cache: - return cache[key] + if getattr(session, '_update_all_unique', False): + obj = cache[key] + for name, value in kw.items(): + setattr(obj, name, value) + return obj + else: + return cache[key] else: with session.no_autoflush: q = session.query(cls) @@ -46,6 +52,9 @@ def _unique(session, cls, hashfunc, queryfunc, constructor, arg, kw): if not obj: obj = constructor(*arg, **kw) session.add(obj) + elif getattr(session, '_update_all_unique', False): + for name, value in kw.items(): + setattr(obj, name, value) cache[key] = obj return obj @@ -195,6 +204,7 @@ class Organisation(db.Model, UniqueMixin): id = sa.Column(sa.Integer, primary_key=True, nullable=False) ref = sa.Column(sa.Unicode, nullable=False) name = sa.Column(sa.Unicode, default=u"", nullable=True) + name_all_values = sa.Column(JSONB, nullable=True) type = sa.Column(codelists.OrganisationType.db_type()) __table_args__ = (sa.UniqueConstraint('ref', 'name', 'type'),) diff --git a/iati_datastore/iatilib/parse.py b/iati_datastore/iatilib/parse.py index 8a4cbd74..df462c48 100644 --- a/iati_datastore/iatilib/parse.py +++ b/iati_datastore/iatilib/parse.py @@ -103,10 +103,26 @@ def xpath_decimal(xpath, xml, resource=None, major_version='1'): return None -def parse_org(xml, resource=no_resource, major_version='1'): +def xvals_lang(xml, major_version, default_lang="default"): + ret = {} + if major_version == '1': + for ele in xml.xpath("."): + lang = xval(ele, "@xml:lang", default_lang) + value = xval(ele, "text()") + ret[lang] = value + else: + for ele in xml.xpath("./narrative"): + lang = xval(ele, "@xml:lang", default_lang) + value = xval(ele, "text()") + ret[lang] = value + return ret + + +def parse_org(xml, resource=no_resource, major_version='1', default_lang="default"): data = { "ref": xval(xml, "@ref", u""), "name": xval(xml, TEXT_ELEMENT[major_version], u""), + "name_all_values": xvals_lang(xml, TEXT_ELEMENT[major_version], default_lang=default_lang) } try: data['type'] = codelists.by_major_version[major_version].OrganisationType.from_string(xval(xml, "@type")) @@ -115,7 +131,7 @@ def parse_org(xml, resource=no_resource, major_version='1'): return Organisation.as_unique(db.session, **data) -def reporting_org(element, resource=no_resource, major_version='1'): +def reporting_org(element, resource=no_resource, major_version='1', default_lang="default"): try: xml = element.xpath("./reporting-org")[0] except IndexError: @@ -125,6 +141,7 @@ def reporting_org(element, resource=no_resource, major_version='1'): data = { "ref": xval(xml, "@ref"), "name": xval(xml, TEXT_ELEMENT[major_version], u""), + "name_all_values": xvals_lang(xml, TEXT_ELEMENT[major_version], default_lang=default_lang) } try: data.update({ @@ -143,7 +160,7 @@ def reporting_org(element, resource=no_resource, major_version='1'): return Organisation.as_unique(db.session, **data) -def participating_orgs(xml, resource=None, major_version='1'): +def participating_orgs(xml, resource=None, major_version='1', default_lang="default"): ret = [] seen = set() for ele in xml.xpath("./participating-org"): @@ -164,7 +181,7 @@ def participating_orgs(xml, resource=None, major_version='1'): role = codelists.by_major_version['1'].OrganisationRole.from_string(value) else: role = codelists.by_major_version[major_version].OrganisationRole.from_string(xval(ele, "@role").title()) - organisation = parse_org(ele, major_version=major_version) + organisation = parse_org(ele, major_version=major_version, default_lang=default_lang) if not (role, organisation.ref) in seen: seen.add((role, organisation.ref)) ret.append(Participation(role=role, organisation=organisation)) @@ -310,14 +327,14 @@ def description_all_values(xml, resource=None, major_version='1'): return ret -def transactions(xml, resource=no_resource, major_version='1'): +def transactions(xml, resource=no_resource, major_version='1', default_lang="default"): def from_cl(code, codelist): return codelist.from_string(code) if code is not None else None def from_org(path, ele, resource=None, major_version='1'): organisation = ele.xpath(path) if organisation: - return parse_org(organisation[0], major_version=major_version) + return parse_org(organisation[0], major_version=major_version, default_lang=default_lang) # return Organisation.as_unique(db.session, ref=org) if org else Nonejk def process(ele): @@ -537,6 +554,8 @@ def activity(xml, resource=no_resource, major_version='1', version=None): Expects xml argument of type lxml.etree._Element """ + default_lang = xval(xml, "@xml:lang", "default") + if major_version == '2': start_planned = partial(xval_date, "./activity-date[@type='1']") start_actual = partial(xval_date, "./activity-date[@type='2']") @@ -568,12 +587,12 @@ def activity(xml, resource=no_resource, major_version='1', version=None): "hierarchy": hierarchy, "last_updated_datetime": last_updated_datetime, "default_language": default_language, - "reporting_org": reporting_org, + "reporting_org": partial(reporting_org, default_lang=default_lang), "websites": websites, - "participating_orgs": participating_orgs, + "participating_orgs": partial(participating_orgs, default_lang=default_lang), "recipient_country_percentages": recipient_country_percentages, "recipient_region_percentages": recipient_region_percentages, - "transactions": transactions, + "transactions": partial(transactions, default_lang=default_lang), "start_planned": start_planned, "end_planned": end_planned, "start_actual": start_actual, diff --git a/iati_datastore/iatilib/test/fixtures/localised-org-names-default.xml b/iati_datastore/iatilib/test/fixtures/localised-org-names-default.xml new file mode 100644 index 00000000..7a1e5254 --- /dev/null +++ b/iati_datastore/iatilib/test/fixtures/localised-org-names-default.xml @@ -0,0 +1,51 @@ + + + DE-1-998966376 + + Bundesministerium für wirtschaftliche Zusammenarbeit und Entwicklung (BMZ) + Federal Ministry for Economic Cooperation and Development (BMZ) + + + <narrative xml:lang="pt">Esgotamento Sanitário Pernambuco</narrative> + <narrative>Abwasserentsorgung Pernambuco (Invest.)</narrative> + + + Bundesministerium für wirtschaftliche Zusammenarbeit und Entwicklung (BMZ) + Federal Ministry for Economic Cooperation and Development (BMZ) + + + KfW Bankengruppe (KfW) + + + KfW Bankengruppe (KfW) + + + + + 1749796.82 + + Aid from other bilateral donors + + + Norwegische Agentur für Entwicklungszusammenarbeit (NORAD) + + + Pflege Dänemark + + + + + + 4555468.00 + + Aid from other bilateral donors + + + ActionAid Bangladesch + + + SKS-Stiftung + + + + diff --git a/iati_datastore/iatilib/test/fixtures/localised-org-names.xml b/iati_datastore/iatilib/test/fixtures/localised-org-names.xml new file mode 100644 index 00000000..acbe19f2 --- /dev/null +++ b/iati_datastore/iatilib/test/fixtures/localised-org-names.xml @@ -0,0 +1,51 @@ + + + DE-1-198966376 + + Bundesministerium für wirtschaftliche Zusammenarbeit und Entwicklung (BMZ) + Federal Ministry for Economic Cooperation and Development (BMZ) + + + <narrative xml:lang="pt">Esgotamento Sanitário Pernambuco</narrative> + <narrative xml:lang="de">Abwasserentsorgung Pernambuco (Invest.)</narrative> + + + Bundesministerium für wirtschaftliche Zusammenarbeit und Entwicklung (BMZ) + Federal Ministry for Economic Cooperation and Development (BMZ) + + + KfW Bankengruppe (KfW) + + + KfW Bankengruppe (KfW) + + + + + 1749796.82 + + Aid from other bilateral donors + + + Norwegian Agency for Development Cooperation (NORAD) + + + Care Danmark + + + + + + 4555468.00 + + Aid from other bilateral donors + + + ActionAid Bangladesh + + + SKS Foundation + + + + diff --git a/iati_datastore/iatilib/test/test_parser.py b/iati_datastore/iatilib/test/test_parser.py index d14b1f3f..1d8c5300 100644 --- a/iati_datastore/iatilib/test/test_parser.py +++ b/iati_datastore/iatilib/test/test_parser.py @@ -1099,3 +1099,95 @@ def test_budget_conversion_usd(self): def test_budget_conversion_eur(self): self.assertEquals(548485.69, self.act.budgets[0].value_eur) # 2011-08-01: GBP 480637 +class TestLocalisedOrganisationNames(AppTestCase): + def setUp(self): + super().setUp() + self.activities = list(parse.document_from_file(fixture_filename("localised-org-names.xml"))) + self.act = self.activities[0] + + def test_title_all_values(self): + self.assertEquals( + { + 'pt': 'Esgotamento Sanitário Pernambuco', + 'de': "Abwasserentsorgung Pernambuco (Invest.)" + }, + self.act.title_all_values + ) + + def test_reporting_org_name(self): + self.assertEquals( + self.act.reporting_org.name_all_values, + {'de': 'Bundesministerium für wirtschaftliche Zusammenarbeit und Entwicklung (BMZ)', + 'en': 'Federal Ministry for Economic Cooperation and Development (BMZ)'} + ) + + def test_participating_orgs(self): + self.assertEquals( + self.act.participating_orgs[0].organisation.name_all_values, + {"de": 'Bundesministerium für wirtschaftliche Zusammenarbeit und Entwicklung (BMZ)', + "en": 'Federal Ministry for Economic Cooperation and Development (BMZ)'} + ) + self.assertEquals( + self.act.participating_orgs[1].organisation.name_all_values, + {"de": 'KfW Bankengruppe (KfW)'} + ) + self.assertEquals( + self.act.participating_orgs[2].organisation.name_all_values, + {"de": 'KfW Bankengruppe (KfW)'} + ) + + def test_transaction_reciever_org_name(self): + self.assertEquals(self.act.transactions[0].provider_org.name_all_values, + {"en": 'Norwegian Agency for Development Cooperation (NORAD)'} + ) + self.assertEquals(self.act.transactions[0].receiver_org.name_all_values, + {"en": 'Care Danmark'} + ) + self.assertEquals(self.act.transactions[1].provider_org.name_all_values, + {"en": 'ActionAid Bangladesh'} + ) + self.assertEquals(self.act.transactions[1].receiver_org.name_all_values, + {"en": 'SKS Foundation'} + ) + +class TestLocalisedOrganisationNamesActivityDefault(AppTestCase): + def setUp(self): + super().setUp() + self.activities = list(parse.document_from_file(fixture_filename("localised-org-names-default.xml"))) + self.act = self.activities[0] + + def test_reporting_org_name(self): + self.assertEquals( + self.act.reporting_org.name_all_values, + {'de': 'Bundesministerium für wirtschaftliche Zusammenarbeit und Entwicklung (BMZ)', + 'en': 'Federal Ministry for Economic Cooperation and Development (BMZ)'} + ) + + def test_participating_orgs(self): + self.assertEquals( + self.act.participating_orgs[0].organisation.name_all_values, + {"de": 'Bundesministerium für wirtschaftliche Zusammenarbeit und Entwicklung (BMZ)', + "en": 'Federal Ministry for Economic Cooperation and Development (BMZ)'} + ) + self.assertEquals( + self.act.participating_orgs[1].organisation.name_all_values, + {"de": 'KfW Bankengruppe (KfW)'} + ) + self.assertEquals( + self.act.participating_orgs[2].organisation.name_all_values, + {"de": 'KfW Bankengruppe (KfW)'} + ) + + def test_transaction_reciever_org_name(self): + self.assertEquals(self.act.transactions[0].provider_org.name_all_values, + {"de": 'Norwegische Agentur für Entwicklungszusammenarbeit (NORAD)'} + ) + self.assertEquals(self.act.transactions[0].receiver_org.name_all_values, + {"de": 'Pflege Dänemark'} + ) + self.assertEquals(self.act.transactions[1].provider_org.name_all_values, + {"de": 'ActionAid Bangladesch'} + ) + self.assertEquals(self.act.transactions[1].receiver_org.name_all_values, + {"de": 'SKS-Stiftung'} + ) diff --git a/migrations/versions/864375812164_merging_two_heads.py b/migrations/versions/864375812164_merging_two_heads.py new file mode 100644 index 00000000..071dcd65 --- /dev/null +++ b/migrations/versions/864375812164_merging_two_heads.py @@ -0,0 +1,24 @@ +"""merging two heads + +Revision ID: 864375812164 +Revises: c6fe2cf16adb, 3daa1d4ab046 +Create Date: 2022-12-13 17:03:56.861063 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = '864375812164' +down_revision = ('c6fe2cf16adb', '3daa1d4ab046') +branch_labels = None +depends_on = None + + +def upgrade(): + pass + + +def downgrade(): + pass diff --git a/migrations/versions/c6fe2cf16adb_add_name_all_values_column_to_.py b/migrations/versions/c6fe2cf16adb_add_name_all_values_column_to_.py new file mode 100644 index 00000000..575b32c5 --- /dev/null +++ b/migrations/versions/c6fe2cf16adb_add_name_all_values_column_to_.py @@ -0,0 +1,28 @@ +"""Add name_all_values column to Organisation table + +Revision ID: c6fe2cf16adb +Revises: fdc7716bb5ea +Create Date: 2022-11-02 10:36:23.526062 + +""" +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision = 'c6fe2cf16adb' +down_revision = 'fdc7716bb5ea' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('organisation', sa.Column('name_all_values', postgresql.JSONB(astext_type=sa.Text()), nullable=True)) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column('organisation', 'name_all_values') + # ### end Alembic commands ###