diff --git a/iati_datastore/iatilib/crawler.py b/iati_datastore/iatilib/crawler.py
index e60a8378..91c6f2aa 100644
--- a/iati_datastore/iatilib/crawler.py
+++ b/iati_datastore/iatilib/crawler.py
@@ -157,8 +157,8 @@ def hash(string):
return m.digest()
-def parse_activity(new_identifiers, old_xml, resource):
- for activity in parse.document_from_bytes(resource.document, resource):
+def parse_activity(new_identifiers, old_xml, resource, ignore_hashes=False):
+ for activity in parse.document_from_bytes(resource.document, resource, ignore_hashes):
activity.resource = resource
if activity.iati_identifier not in new_identifiers:
@@ -184,7 +184,7 @@ def parse_activity(new_identifiers, old_xml, resource):
db.session.commit()
-def parse_resource(resource):
+def parse_resource(resource, ignore_hashes=False):
db.session.add(resource)
current = Activity.query.filter_by(resource_url=resource.url)
current_identifiers = set([i.iati_identifier for i in current.all()])
@@ -197,7 +197,7 @@ def parse_resource(resource):
db.session.query(Activity).filter_by(resource_url=resource.url).delete()
new_identifiers = set()
- parse_activity(new_identifiers, old_xml, resource)
+ parse_activity(new_identifiers, old_xml, resource, ignore_hashes)
resource.version = parse.document_metadata(resource.document)
@@ -240,7 +240,7 @@ def update_activities(dataset_name, ignore_hashes=False):
dataset = Dataset.query.get(dataset_name)
resource = dataset.resources[0]
- if ignore_hashes: db.session._update_all_unique = True
+ #if ignore_hashes: db.session._update_all_unique = True
try:
db.session.query(Log).filter(sa.and_(
@@ -248,7 +248,7 @@ def update_activities(dataset_name, ignore_hashes=False):
['activity_importer', 'failed_activity', 'xml_parser']),
Log.resource == dataset_name,
)).delete(synchronize_session=False)
- parse_resource(resource)
+ parse_resource(resource, ignore_hashes)
db.session.commit()
except parse.ParserError as exc:
db.session.rollback()
@@ -265,7 +265,7 @@ def update_activities(dataset_name, ignore_hashes=False):
))
db.session.commit()
- if ignore_hashes: db.session._update_all_unique = False
+ #if ignore_hashes: db.session._update_all_unique = False
def update_dataset(dataset_name, ignore_hashes):
'''
diff --git a/iati_datastore/iatilib/model.py b/iati_datastore/iatilib/model.py
index ba4e5428..bcde9851 100644
--- a/iati_datastore/iatilib/model.py
+++ b/iati_datastore/iatilib/model.py
@@ -31,15 +31,16 @@
# The "Unique Object" pattern
# http://www.sqlalchemy.org/trac/wiki/UsageRecipes/UniqueObject
def _unique(session, cls, hashfunc, queryfunc, constructor, arg, kw):
+ kwargs = {k: kw[k] for k in kw if k != 'ignore_hashes'}
cache = getattr(session, '_unique_cache', None)
if cache is None:
session._unique_cache = cache = {}
- key = (cls, hashfunc(*arg, **kw))
+ key = (cls, hashfunc(*arg, **kwargs))
if key in cache:
- if getattr(session, '_update_all_unique', False):
+ if kw['ignore_hashes']:
obj = cache[key]
- for name, value in kw.items():
+ for name, value in kwargs.items():
setattr(obj, name, value)
return obj
else:
@@ -47,10 +48,10 @@ def _unique(session, cls, hashfunc, queryfunc, constructor, arg, kw):
else:
with session.no_autoflush:
q = session.query(cls)
- q = queryfunc(q, *arg, **kw)
+ q = queryfunc(q, *arg, **kwargs)
obj = q.first()
if not obj:
- obj = constructor(*arg, **kw)
+ obj = constructor(*arg, **kwargs)
session.add(obj)
cache[key] = obj
return obj
diff --git a/iati_datastore/iatilib/parse.py b/iati_datastore/iatilib/parse.py
index 3f10f4d3..7307565f 100644
--- a/iati_datastore/iatilib/parse.py
+++ b/iati_datastore/iatilib/parse.py
@@ -118,7 +118,7 @@ def xvals_lang(xml, major_version):
return ret
-def parse_org(xml, resource=no_resource, major_version='1'):
+def parse_org(ignore_hashes, xml, resource=no_resource, major_version='1'):
data = {
"ref": xval(xml, "@ref", u""),
"name": xval(xml, TEXT_ELEMENT[major_version], u""),
@@ -128,10 +128,10 @@ def parse_org(xml, resource=no_resource, major_version='1'):
data['type'] = codelists.by_major_version[major_version].OrganisationType.from_string(xval(xml, "@type"))
except (MissingValue, ValueError):
data['type'] = None
- return Organisation.as_unique(db.session, **data)
+ return Organisation.as_unique(db.session, **data, ignore_hashes=ignore_hashes)
-def reporting_org(element, resource=no_resource, major_version='1'):
+def reporting_org(ignore_hashes, element, resource=no_resource, major_version='1'):
try:
xml = element.xpath("./reporting-org")[0]
except IndexError:
@@ -157,10 +157,10 @@ def reporting_org(element, resource=no_resource, major_version='1'):
exc_info=exe
)
- return Organisation.as_unique(db.session, **data)
+ return Organisation.as_unique(db.session, **data, ignore_hashes=ignore_hashes)
-def participating_orgs(xml, resource=None, major_version='1'):
+def participating_orgs(ignore_hashes, xml, resource=None, major_version='1'):
ret = []
seen = set()
for ele in xml.xpath("./participating-org"):
@@ -181,7 +181,7 @@ def participating_orgs(xml, resource=None, major_version='1'):
role = codelists.by_major_version['1'].OrganisationRole.from_string(value)
else:
role = codelists.by_major_version[major_version].OrganisationRole.from_string(xval(ele, "@role").title())
- organisation = parse_org(ele, major_version=major_version)
+ organisation = parse_org(ignore_hashes, ele, major_version=major_version)
if not (role, organisation.ref) in seen:
seen.add((role, organisation.ref))
ret.append(Participation(role=role, organisation=organisation))
@@ -327,14 +327,14 @@ def description_all_values(xml, resource=None, major_version='1'):
return ret
-def transactions(xml, resource=no_resource, major_version='1'):
+def transactions(ignore_hashes, xml, resource=no_resource, major_version='1'):
def from_cl(code, codelist):
return codelist.from_string(code) if code is not None else None
def from_org(path, ele, resource=None, major_version='1'):
organisation = ele.xpath(path)
if organisation:
- return parse_org(organisation[0], major_version=major_version)
+ return parse_org(ignore_hashes, organisation[0], major_version=major_version)
# return Organisation.as_unique(db.session, ref=org) if org else Nonejk
def process(ele):
@@ -549,7 +549,7 @@ def from_codelist_with_major_version(codelist_name, path, xml, resource, major_v
return from_codelist(getattr(codelists.by_major_version[major_version], codelist_name), path, xml, resource)
-def activity(xml, resource=no_resource, major_version='1', version=None):
+def activity(xml, resource=no_resource, major_version='1', version=None, ignore_hashes=False):
"""
Expects xml argument of type lxml.etree._Element
"""
@@ -585,12 +585,12 @@ def activity(xml, resource=no_resource, major_version='1', version=None):
"hierarchy": hierarchy,
"last_updated_datetime": last_updated_datetime,
"default_language": default_language,
- "reporting_org": reporting_org,
+ "reporting_org": partial(reporting_org, ignore_hashes),
"websites": websites,
- "participating_orgs": participating_orgs,
+ "participating_orgs": partial(participating_orgs, ignore_hashes),
"recipient_country_percentages": recipient_country_percentages,
"recipient_region_percentages": recipient_region_percentages,
- "transactions": transactions,
+ "transactions": partial(transactions, ignore_hashes),
"start_planned": start_planned,
"end_planned": end_planned,
"start_actual": start_actual,
@@ -637,15 +637,15 @@ def activity(xml, resource=no_resource, major_version='1', version=None):
return Activity(**data)
-def document_from_bytes(xml_resource, resource=no_resource):
- return activities(BytesIO(xml_resource), resource)
+def document_from_bytes(xml_resource, resource=no_resource, ignore_hashes=False):
+ return activities(BytesIO(xml_resource), resource, ignore_hashes)
def document_from_file(xml_resource, resource=no_resource):
return activities(open(xml_resource, 'rb'), resource)
-def activities(xmlfile, resource=no_resource):
+def activities(xmlfile, resource=no_resource, ignore_hashes=False):
major_version = '1'
version = None
try:
@@ -656,7 +656,7 @@ def activities(xmlfile, resource=no_resource):
major_version = '2'
elif event == 'end' and elem.tag == 'iati-activity':
try:
- yield activity(elem, resource=resource, major_version=major_version, version=version)
+ yield activity(elem, resource=resource, major_version=major_version, version=version, ignore_hashes=ignore_hashes)
except MissingValue as exe:
log.error(_("Failed to import a valid Activity error was: {0}".format(exe),
logger='failed_activity', dataset=resource.dataset_id, resource=resource.url),
diff --git a/iati_datastore/iatilib/test/test_parser.py b/iati_datastore/iatilib/test/test_parser.py
index 5c731ecb..106cf312 100644
--- a/iati_datastore/iatilib/test/test_parser.py
+++ b/iati_datastore/iatilib/test/test_parser.py
@@ -696,7 +696,7 @@ class TestOrganisation(AppTestCase):
def test_org_role_looseness(self):
# organisationrole should be "Implementing" but can be "implementing".
# This also tests role V1->V2 mapping.
- orgrole = parse.participating_orgs(ET.XML(
+ orgrole = parse.participating_orgs(False, ET.XML(
u''
))[0]
self.assertEquals(
@@ -705,13 +705,13 @@ def test_org_role_looseness(self):
)
def test_org_type(self):
- orgtype = parse.reporting_org(ET.XML(
+ orgtype = parse.reporting_org(False, ET.XML(
u""""""
))
self.assertEquals(cl.OrganisationType.international_ngo, orgtype.type)
def test_org_type_missing(self):
- orgtype = parse.reporting_org(ET.XML(
+ orgtype = parse.reporting_org(False, ET.XML(
u""""""
))
self.assertEquals(None, orgtype.type)
@@ -720,7 +720,7 @@ def test_org_type_missing(self):
class TestParticipation(AppTestCase):
def test_repeated_participation(self):
# Identical participations should be filtered
- participations = parse.participating_orgs(
+ participations = parse.participating_orgs(False,
ET.XML(u"""
Concern Universal
@@ -731,7 +731,7 @@ def test_repeated_participation(self):
self.assertEquals(1, len(participations))
def test_same_org_different_role(self):
- participations = parse.participating_orgs(
+ participations = parse.participating_orgs(False,
ET.XML(u"""
Concern Universal
Concern Universal
@@ -794,7 +794,7 @@ def __init__(self, methodName='runTest'):
"""
def test_missing_code(self):
- transactions = parse.transactions(
+ transactions = parse.transactions(False,
ET.XML(u'''
test
@@ -805,7 +805,7 @@ def test_missing_code(self):
self.assertEquals(1, len(transactions))
def test_big_value(self):
- transaction = parse.transactions(
+ transaction = parse.transactions(False,
ET.XML(u'''
test
@@ -817,7 +817,7 @@ def test_big_value(self):
@mock.patch('iatilib.parse.iati_decimal')
def test_iati_int_called(self, mock):
- parse.transactions(
+ parse.transactions(False,
ET.XML(u'''
test
@@ -837,7 +837,7 @@ def test_provider_activity_id(self):
"""
- transaction = parse.transactions(ET.XML(sample))[0]
+ transaction = parse.transactions(False, ET.XML(sample))[0]
self.assertEquals(u'GB-1-202907', transaction.provider_org_activity_id)
def test_provider_org_text(self):
@@ -848,7 +848,7 @@ def test_provider_org_text(self):
"""
- transaction = parse.transactions(ET.XML(sample))[0]
+ transaction = parse.transactions(False, ET.XML(sample))[0]
self.assertEquals(u'DFID', transaction.provider_org_text)
def test_receiver_activity_id(self):
@@ -859,7 +859,7 @@ def test_receiver_activity_id(self):
"""
- transaction = parse.transactions(ET.XML(sample))[0]
+ transaction = parse.transactions(False, ET.XML(sample))[0]
self.assertEquals(u'GB-CHC-1068839-dfid_ag_11-13', transaction.receiver_org_activity_id)
def test_receiver_org_text(self):
@@ -870,7 +870,7 @@ def test_receiver_org_text(self):
"""
- transaction = parse.transactions(ET.XML(sample))[0]
+ transaction = parse.transactions(False, ET.XML(sample))[0]
self.assertEquals(u'Bond', transaction.receiver_org_text)
def test_description(self):
@@ -880,30 +880,30 @@ def test_description(self):
Funds received from DFID for activities in Aug- Sept 2011
"""
- transaction = parse.transactions(ET.XML(sample))[0]
+ transaction = parse.transactions(False, ET.XML(sample))[0]
self.assertEquals(
u'Funds received from DFID for activities in Aug- Sept 2011',
transaction.description
)
def test_flow_type(self):
- transaction = parse.transactions(ET.XML(self.codelists))[0]
+ transaction = parse.transactions(False, ET.XML(self.codelists))[0]
self.assertEquals(u'30', transaction.flow_type.value)
def test_finance_type(self):
- transaction = parse.transactions(ET.XML(self.codelists))[0]
+ transaction = parse.transactions(False, ET.XML(self.codelists))[0]
self.assertEquals(u'110', transaction.finance_type.value)
def test_aid_type(self):
- transaction = parse.transactions(ET.XML(self.codelists))[0]
+ transaction = parse.transactions(False, ET.XML(self.codelists))[0]
self.assertEquals(u'B01', transaction.aid_type.value)
def test_tied_status(self):
- transaction = parse.transactions(ET.XML(self.codelists))[0]
+ transaction = parse.transactions(False, ET.XML(self.codelists))[0]
self.assertEquals(u'5', transaction.tied_status.value)
def test_disbursement_channel(self):
- transaction = parse.transactions(ET.XML(self.codelists))[0]
+ transaction = parse.transactions(False, ET.XML(self.codelists))[0]
self.assertEquals(u'2', transaction.disbursement_channel.value)