diff --git a/api/schemas/views.py b/api/schemas/views.py index aeedc0c4f..57051f55f 100644 --- a/api/schemas/views.py +++ b/api/schemas/views.py @@ -31,7 +31,7 @@ def format_link(model): schema_models.add(model) link = '- [{0}](/api/schema/{0})'.format(model.__name__) if model.__doc__ and not re.fullmatch(DEFAULT_DOC.format(model.__name__), model.__doc__): - link += ': ' + next(l for l in model.__doc__.splitlines() if l) + link += ': ' + next(line for line in model.__doc__.splitlines() if line) return link @@ -40,7 +40,7 @@ def subclass_links(base_model, include_base=True): for model in sorted(base_model.__subclasses__(), key=lambda m: m.__name__): subclasses = subclass_links(model) if include_base: - subclasses = [INDENT + l for l in subclasses] + subclasses = [INDENT + link for link in subclasses] links.extend(subclasses) return links diff --git a/share/harvesters/gov_nih.py b/share/harvesters/gov_nih.py index 4b220a8ae..90d82d6e5 100644 --- a/share/harvesters/gov_nih.py +++ b/share/harvesters/gov_nih.py @@ -100,7 +100,7 @@ def parse_row(self, row, day_of_week): if month_column.lower() == u"all": return (None, fiscal_year, url) - elif re.match('[A-Za-z\s]* [0-9]{4}, WEEK \d+', month_column): + elif re.match(r'[A-Za-z\s]* [0-9]{4}, WEEK \d+', month_column): date = self.parse_month_column(month_column, day_of_week) return (date, fiscal_year, url) else: diff --git a/share/harvesters/oai.py b/share/harvesters/oai.py index f008d03ab..d4d423c05 100644 --- a/share/harvesters/oai.py +++ b/share/harvesters/oai.py @@ -68,7 +68,7 @@ def fetch_records(self, url: furl) -> list: if not token or not records: break - def fetch_page(self, url: furl, token: str=None) -> (list, str): + def fetch_page(self, url: furl, token: str = None) -> (list, str): if token: url.args = {'resumptionToken': token, 'verb': 'ListRecords'} diff --git a/share/harvesters/org_biorxiv_html.py b/share/harvesters/org_biorxiv_html.py index 49344463a..30eeef39d 100644 --- a/share/harvesters/org_biorxiv_html.py +++ b/share/harvesters/org_biorxiv_html.py @@ -73,7 +73,7 @@ def fetch_records(self, url, start_date, end_date): el.extract() # Links have PKs and dates in them. /content/early/YYYY/MM/DD/PK or /content/early/YYYY/MM/DD/PK.REV - identifier = re.match('/content/early/\d{4}/\d{2}/\d{2}/(\d+)(?:\.\d+)?$', link.decode()).group(1) + identifier = re.match(r'/content/early/\d{4}/\d{2}/\d{2}/(\d+)(?:\.\d+)?$', link.decode()).group(1) yield identifier, str(soup) diff --git a/share/harvesters/org_swbiodiversity.py b/share/harvesters/org_swbiodiversity.py index aa40b0c18..014d35071 100644 --- a/share/harvesters/org_swbiodiversity.py +++ b/share/harvesters/org_swbiodiversity.py @@ -31,7 +31,7 @@ def fetch_records(self, list_url): record_list = [] for record in records: - record_content = re.findall('collid=(\d+)', record.get('href')) + record_content = re.findall(r'collid=(\d+)', record.get('href')) if record_content and record_content[0] not in record_list: record_list.append(record_content[0]) total = len(record_list) diff --git a/share/management/commands/forceingest.py b/share/management/commands/forceingest.py index fc9de0038..ba31f5f85 100644 --- a/share/management/commands/forceingest.py +++ b/share/management/commands/forceingest.py @@ -130,8 +130,8 @@ def merge_agents(self, dupes): self.describe_smash(winner, losers) if self.interactive: safe_to_assume_yes = all( - loser.name == winner.name and - len(loser.identifiers.all()) == 0 + loser.name == winner.name + and len(loser.identifiers.all()) == 0 for loser in losers ) if not (safe_to_assume_yes or self.command.input_confirm('OK? (y/n) ')): diff --git a/share/models/fields.py b/share/models/fields.py index feb4c4796..47b24f9ed 100644 --- a/share/models/fields.py +++ b/share/models/fields.py @@ -279,8 +279,8 @@ def _check_relationship_model(self, from_model=None, **kwargs): self_referential = from_model == to_model # Check symmetrical attribute. - if (self_referential and self.remote_field.symmetrical and - not self.remote_field.through._meta.auto_created): + if (self_referential and self.remote_field.symmetrical + and not self.remote_field.through._meta.auto_created): errors.append( checks.Error( 'Many-to-many fields with intermediate tables must not be symmetrical.', @@ -374,8 +374,9 @@ def _check_relationship_model(self, from_model=None, **kwargs): if self.remote_field.through_fields is not None: # Validate that we're given an iterable of at least two items # and that none of them is "falsy". - if not (len(self.remote_field.through_fields) >= 2 and - self.remote_field.through_fields[0] and self.remote_field.through_fields[1]): + if not (len(self.remote_field.through_fields) >= 2 + and self.remote_field.through_fields[0] + and self.remote_field.through_fields[1]): errors.append( checks.Error( ("Field specifies 'through_fields' but does not " @@ -429,8 +430,8 @@ def _check_relationship_model(self, from_model=None, **kwargs): ) ) else: - if not (hasattr(field, 'remote_field') and - getattr(field.remote_field, 'model', None) == related_model): + if not (hasattr(field, 'remote_field') + and getattr(field.remote_field, 'model', None) == related_model): errors.append( checks.Error( "'%s.%s' is not a foreign key to '%s'." % ( @@ -477,41 +478,41 @@ def _get_m2m_reverse_attr(self, related, attr): return getattr(self, cache_attr) def contribute_to_class(self, cls, name, **kwargs): - # To support multiple relations to self, it's useful to have a non-None - # related name on symmetrical relations for internal reasons. The - # concept doesn't make a lot of sense externally ("you want me to - # specify *what* on my non-reversible relation?!"), so we set it up - # automatically. The funky name reduces the chance of an accidental - # clash. - if self.remote_field.symmetrical and ( - self.remote_field.model == "self" or self.remote_field.model == cls._meta.object_name): - self.remote_field.related_name = "%s_rel_+" % name - elif self.remote_field.is_hidden(): - # If the backwards relation is disabled, replace the original - # related_name with one generated from the m2m field name. Django - # still uses backwards relations internally and we need to avoid - # clashes between multiple m2m fields with related_name == '+'. - self.remote_field.related_name = "_%s_%s_+" % (cls.__name__.lower(), name) - - super(models.ManyToManyField, self).contribute_to_class(cls, name, **kwargs) - - # The intermediate m2m model is not auto created if: - # 1) There is a manually specified intermediate, or - # 2) The class owning the m2m field is abstract. - # 3) The class owning the m2m field has been swapped out. - if not cls._meta.abstract: - if self.remote_field.through: - def resolve_through_model(_, model, field): - field.remote_field.through = model - lazy_related_operation(resolve_through_model, cls, self.remote_field.through, field=self) - elif not cls._meta.swapped: - self.remote_field.through = create_many_to_many_intermediary_model(self, cls) - - # Add the descriptor for the m2m relation. - setattr(cls, self.name, ManyToManyDescriptor(self.remote_field, reverse=False)) - - # Set up the accessor for the m2m table name for the relation. - self.m2m_db_table = curry(self._get_m2m_db_table, cls._meta) + # To support multiple relations to self, it's useful to have a non-None + # related name on symmetrical relations for internal reasons. The + # concept doesn't make a lot of sense externally ("you want me to + # specify *what* on my non-reversible relation?!"), so we set it up + # automatically. The funky name reduces the chance of an accidental + # clash. + if self.remote_field.symmetrical and ( + self.remote_field.model == "self" or self.remote_field.model == cls._meta.object_name): + self.remote_field.related_name = "%s_rel_+" % name + elif self.remote_field.is_hidden(): + # If the backwards relation is disabled, replace the original + # related_name with one generated from the m2m field name. Django + # still uses backwards relations internally and we need to avoid + # clashes between multiple m2m fields with related_name == '+'. + self.remote_field.related_name = "_%s_%s_+" % (cls.__name__.lower(), name) + + super(models.ManyToManyField, self).contribute_to_class(cls, name, **kwargs) + + # The intermediate m2m model is not auto created if: + # 1) There is a manually specified intermediate, or + # 2) The class owning the m2m field is abstract. + # 3) The class owning the m2m field has been swapped out. + if not cls._meta.abstract: + if self.remote_field.through: + def resolve_through_model(_, model, field): + field.remote_field.through = model + lazy_related_operation(resolve_through_model, cls, self.remote_field.through, field=self) + elif not cls._meta.swapped: + self.remote_field.through = create_many_to_many_intermediary_model(self, cls) + + # Add the descriptor for the m2m relation. + setattr(cls, self.name, ManyToManyDescriptor(self.remote_field, reverse=False)) + + # Set up the accessor for the m2m table name for the relation. + self.m2m_db_table = curry(self._get_m2m_db_table, cls._meta) class ShareManyToManyField(ShareRelatedField, TypedManyToManyField): diff --git a/share/models/jobs.py b/share/models/jobs.py index 0eca794c2..abaff8fdb 100644 --- a/share/models/jobs.py +++ b/share/models/jobs.py @@ -302,7 +302,7 @@ class Meta: class LockableQuerySet(models.QuerySet): - LOCK_ACQUIRED = re.sub('\s\s+', ' ', ''' + LOCK_ACQUIRED = re.sub(r'\s\s+', ' ', ''' pg_try_advisory_lock(%s::REGCLASS::INTEGER, "{0.model._meta.db_table}"."{0.column}") ''').strip() diff --git a/share/models/validators.py b/share/models/validators.py index a372c3677..32602e84d 100644 --- a/share/models/validators.py +++ b/share/models/validators.py @@ -56,7 +56,7 @@ def __call__(self, value): self.validate_node(node, refs, nodes) except exceptions.ValidationError as e: e.path.appendleft(i) # Hack to add in a leading slash - raise ValidationError('{} at /@graph/{}'.format(e.message, i, '/'.join(str(x) for x in e.path))) + raise ValidationError('{} at /@graph/{}'.format(e.message, '/'.join(str(x) for x in e.path))) if refs['blank'] - nodes['blank']: raise ValidationError('Unresolved references {}'.format(json.dumps([ diff --git a/share/regulate/steps/normalize_iris.py b/share/regulate/steps/normalize_iris.py index c41cb2caf..30de9732f 100644 --- a/share/regulate/steps/normalize_iris.py +++ b/share/regulate/steps/normalize_iris.py @@ -74,6 +74,6 @@ def regulate_node(self, node): self.info('Discarding identifier based on invalid authority "{}"'.format(ret['authority']), node.id) node.delete() - except InvalidIRI as e: + except InvalidIRI: self.info('Discarding identifier based on unrecognized IRI "{}"'.format(old_iri), node.id) node.delete() diff --git a/share/search/__init__.py b/share/search/__init__.py index f91415e31..761a83a02 100644 --- a/share/search/__init__.py +++ b/share/search/__init__.py @@ -58,13 +58,11 @@ def pks_to_reindex(self, model, pks): Q( outgoing_creative_work_relations__type=parent_relation, outgoing_creative_work_relations__related_id__in=pks - ) | - Q( + ) | Q( outgoing_creative_work_relations__type=parent_relation, outgoing_creative_work_relations__related__outgoing_creative_work_relations__type=parent_relation, outgoing_creative_work_relations__related__outgoing_creative_work_relations__related_id__in=pks - ) | - Q( + ) | Q( outgoing_creative_work_relations__type=parent_relation, outgoing_creative_work_relations__related__outgoing_creative_work_relations__type=parent_relation, outgoing_creative_work_relations__related__outgoing_creative_work_relations__related__outgoing_creative_work_relations__type=parent_relation, diff --git a/share/search/daemon.py b/share/search/daemon.py index 41e796468..5cfb54a1f 100644 --- a/share/search/daemon.py +++ b/share/search/daemon.py @@ -126,7 +126,7 @@ def _action_loop(self, model_name, timeout=5): logger.info('%r: Prepared %d %ss to be indexed in %.02fs', self, len(msgs), model_name, time.time() - start) except Exception as e: client.captureException() - logger.exception('%r: _action_loop(%s) encountered an unexpected error', self, model_name) + logger.exception('%r: _action_loop(%s) encountered an unexpected error (%s)', self, model_name, e) self.stop() def _actions(self, msgs, timeout=5): @@ -168,7 +168,7 @@ def _index_loop(self): logger.debug('%r: Recieved no messages for %.02fs', self, time.time() - start) except Exception as e: client.captureException() - logger.exception('%r: _index_loop encountered an unexpected error', self) + logger.exception('%r: _index_loop encountered an unexpected error (%s)', self, e) self.stop() def __repr__(self): diff --git a/share/tasks/jobs.py b/share/tasks/jobs.py index b96006e13..c86362d37 100644 --- a/share/tasks/jobs.py +++ b/share/tasks/jobs.py @@ -377,7 +377,7 @@ def _apply_changes(self, job, graph, normalized_datum): matches = change_set_builder.matches # Retry if it was just the wrong place at the wrong time - except (exceptions.IngestConflict, OperationalError) as e: + except (exceptions.IngestConflict, OperationalError): job.retries = (job.retries or 0) + 1 job.save(update_fields=('retries',)) if job.retries > self.MAX_RETRIES: diff --git a/share/transform/chain/links.py b/share/transform/chain/links.py index ddc78cc3c..c12de0c83 100644 --- a/share/transform/chain/links.py +++ b/share/transform/chain/links.py @@ -716,7 +716,7 @@ def checksum(cls, digits): for lower, upper in cls.BOUNDS: if (not lower or lower < literal) and (not upper or upper > literal): return - raise InvalidIRI('\'{0}\' is outside reserved {1} range.'.format(digits, cls.FORMAT, lower, upper)) + raise InvalidIRI('\'{0}\' is outside reserved {1} range.'.format(digits, cls.FORMAT)) def _parse(self, obj): match = re.search(self.ISNI_RE, obj.upper()) @@ -924,7 +924,7 @@ class InfoURILink(AbstractIRILink): # https://tools.ietf.org/html/rfc4452 SCHEME = 'info' - INFO_RE = re.compile('^\s*info:([\w-]+)(/\S+)\s*$') + INFO_RE = re.compile(r'^\s*info:([\w-]+)(/\S+)\s*$') @classmethod def hint(cls, obj): @@ -946,8 +946,8 @@ def _parse(self, obj): class ISBNLink(AbstractIRILink): SCHEME = 'urn' AUTHORITY = 'isbn' - ISBN10_RE = re.compile('^(?:urn:\/\/isbn\/|ISBN:? ?)?(\d\d?)-(\d{3,7})-(\d{1,6})-(\d|x)$', re.I) - ISBN13_RE = re.compile('^(?:urn://isbn/|ISBN:? ?)?(978|979)-(\d\d?)-(\d{3,5})-(\d{2,5})-(\d)$', re.I) + ISBN10_RE = re.compile(r'^(?:urn:\/\/isbn\/|ISBN:? ?)?(\d\d?)-(\d{3,7})-(\d{1,6})-(\d|x)$', re.I) + ISBN13_RE = re.compile(r'^(?:urn://isbn/|ISBN:? ?)?(978|979)-(\d\d?)-(\d{3,5})-(\d{2,5})-(\d)$', re.I) @classmethod def hint(cls, obj): diff --git a/share/transform/chain/utils.py b/share/transform/chain/utils.py index 400163449..35365701f 100644 --- a/share/transform/chain/utils.py +++ b/share/transform/chain/utils.py @@ -123,9 +123,9 @@ def get_emails(s): # Removing lines that start with '//' because the regular expression # mistakenly matches patterns like 'http://foo@bar.com' as '//foo@bar.com'. # Adopted from code by Dennis Ideler ideler.dennis@gmail.com - regex = re.compile(("([a-z0-9!#$%&'*+\/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+\/=?^_`" - "{|}~-]+)*(@|\sat\s)(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?(\.|" - "\sdot\s))+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?)")) + regex = re.compile((r"([a-z0-9!#$%&'*+\/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+\/=?^_`" + r"{|}~-]+)*(@|\sat\s)(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?(\.|" + r"\sdot\s))+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?)")) s = s.lower() result = re.findall(regex, s) if result: diff --git a/share/transformers/edu_ageconsearch.py b/share/transformers/edu_ageconsearch.py index 4052ae71f..27606b6df 100644 --- a/share/transformers/edu_ageconsearch.py +++ b/share/transformers/edu_ageconsearch.py @@ -121,7 +121,7 @@ def get_agent_emails(self, ctx, agent_key, email_key): agent_email = next((x for x in emails if agent in x), None) if agent_email: - agent_object['email'] = re.compile('\((\S+?)\)').search(agent_email).group(1) + agent_object['email'] = re.compile(r'\((\S+?)\)').search(agent_email).group(1) agent_objects.append(agent_object) return agent_objects diff --git a/share/transformers/oai.py b/share/transformers/oai.py index 617b4623a..d8a47db0f 100644 --- a/share/transformers/oai.py +++ b/share/transformers/oai.py @@ -310,7 +310,7 @@ class RootParser(OAICreativeWork): type_map = root_type_map if property_list: - logger.debug('Attaching addition properties %s to transformer for %s'.format(property_list, self.config.label)) + logger.debug('Attaching addition properties %s to transformer for %s', property_list, self.config.label) for prop in property_list: if prop in RootParser._extra: logger.warning('Skipping property %s, it already exists', prop) diff --git a/share/transformers/org_neurovault.py b/share/transformers/org_neurovault.py index 4691578bc..684e7f9e1 100644 --- a/share/transformers/org_neurovault.py +++ b/share/transformers/org_neurovault.py @@ -39,7 +39,7 @@ class CreativeWork(Parser): def parse_names(self, authors): if not authors: return [] - return re.split(',\s|\sand\s', authors) + return re.split(r',\s|\sand\s', authors) class NeurovaultTransformer(ChainTransformer): diff --git a/tests/api/test_sources_endpoint.py b/tests/api/test_sources_endpoint.py index 80eebc4c8..6695190c1 100644 --- a/tests/api/test_sources_endpoint.py +++ b/tests/api/test_sources_endpoint.py @@ -18,8 +18,8 @@ def exceptionCallback(request, uri, headers): - time.sleep(6) - return (400, headers, uri) + time.sleep(6) + return (400, headers, uri) @pytest.fixture diff --git a/tests/api/test_validator.py b/tests/api/test_validator.py index f771d59ad..1740cbdfb 100644 --- a/tests/api/test_validator.py +++ b/tests/api/test_validator.py @@ -264,7 +264,7 @@ class TestValidator: "'INSTITUTION', 'Institution', 'ORGANIZATION', " "'Organization', 'PERSON', 'Person', 'agent', " "'consortium', 'department', 'institution', 'organization', 'person'" - "] at /@graph/1", + "] at /@graph/1/agent/@type", 'source': {'pointer': '/data/attributes/data'}, 'status': '400' }] diff --git a/tests/share/harvesters/test_swbiodiversity_harvester.py b/tests/share/harvesters/test_swbiodiversity_harvester.py index 5cb02ec2c..ded09aae2 100644 --- a/tests/share/harvesters/test_swbiodiversity_harvester.py +++ b/tests/share/harvesters/test_swbiodiversity_harvester.py @@ -107,7 +107,7 @@ def test_swbiodiversity_harvester(): body=main_page, content_type='text/html', match_querystring=True) collection = furl(url) collection.args['collid'] = 223 - httpretty.register_uri(httpretty.GET, url + ';collid=(\d+)', + httpretty.register_uri(httpretty.GET, url + r';collid=(\d+)', body=collection_page, content_type='text/html', match_querystring=True) start = pendulum.utcnow() - timedelta(days=3) end = pendulum.utcnow() diff --git a/tests/validation/test_changeset.py b/tests/validation/test_changeset.py index ccfc79529..f5414f15b 100644 --- a/tests/validation/test_changeset.py +++ b/tests/validation/test_changeset.py @@ -153,7 +153,7 @@ class TestJSONLDValidator: }] } }, { - 'out': "1 is not of type 'string' at /@graph/0", + 'out': "1 is not of type 'string' at /@graph/0/name", 'in': { '@graph': [{ '@id': '_:123', @@ -185,7 +185,7 @@ class TestJSONLDValidator: }] } }, { - 'out': "'throughtugs' is not one of ['THROUGHTAGS', 'ThroughTags', 'throughtags'] at /@graph/0", + 'out': "'throughtugs' is not one of ['THROUGHTAGS', 'ThroughTags', 'throughtags'] at /@graph/0/tags/0/@type", 'in': { '@graph': [{ '@id': '_:123', @@ -208,7 +208,7 @@ class TestJSONLDValidator: }] } }, { - 'out': "'giraffe' is not a 'uri' at /@graph/0", + 'out': "'giraffe' is not a 'uri' at /@graph/0/uri", 'in': { '@graph': [{ '@id': '_:123',