From 5c3a25b2d19cac8ae9b0af2ef5f5929de7d9cc06 Mon Sep 17 00:00:00 2001 From: Virginia Dooley Date: Fri, 7 Jun 2024 13:56:25 +0100 Subject: [PATCH 1/2] Remove inactive person links --- ...test_remove_inactive_person_identifiers.py | 65 ++++++++++++++++ .../commands/remove_inactive_person_links.py | 74 +++++++++++++++++++ 2 files changed, 139 insertions(+) create mode 100644 ynr/apps/candidates/tests/test_remove_inactive_person_identifiers.py create mode 100644 ynr/apps/people/management/commands/remove_inactive_person_links.py diff --git a/ynr/apps/candidates/tests/test_remove_inactive_person_identifiers.py b/ynr/apps/candidates/tests/test_remove_inactive_person_identifiers.py new file mode 100644 index 000000000..0aaa962d3 --- /dev/null +++ b/ynr/apps/candidates/tests/test_remove_inactive_person_identifiers.py @@ -0,0 +1,65 @@ +from unittest import TestCase + +import pytest +from candidates.models.popolo_extra import Ballot +from candidates.tests.factories import ( + ElectionFactory, + MembershipFactory, + PostFactory, +) +from django.core.management import call_command +from parties.tests.factories import PartyFactory +from people.tests.factories import PersonFactory, PersonIdentifierFactory + + +class TestPersonIdentifiers(TestCase): + def setUp(self): + self.person = PersonFactory.create() + # 200 example + PersonIdentifierFactory.create( + person=self.person, + value="https://en.wikipedia.org/wiki/Rishi_Sunak", + value_type="https://en.wikipedia_url", + ) + # 404 example + PersonIdentifierFactory.create( + person=self.person, + value="http://www.conservatives.com/about/our-team/example.com", + value_type="party_ppc_page_url", + ) + post = PostFactory.create(slug="parl.2024-07-04") + + election = ElectionFactory.create( + slug="parl.2024-07-04", + election_date="2024-07-04", + name="2024 General Election", + ) + ballot = Ballot.objects.create( + election=election, post=post, ballot_paper_id="parl.2024-07-04" + ) + party = PartyFactory.create() + MembershipFactory.create( + person=self.person, + post=post, + party=party, + ballot=ballot, + ) + + @pytest.mark.django_db + def test_remove_inactive_person_identifiers(self): + self.assertEqual(len(self.person.get_all_identifiers), 2) + self.assertEqual( + self.person.get_all_identifiers[0].value, + "https://en.wikipedia.org/wiki/Rishi_Sunak", + ) + self.assertEqual( + self.person.get_all_identifiers[1].value, + "http://www.conservatives.com/about/our-team/example.com", + ) + call_command("remove_inactive_person_links") + self.person.refresh_from_db() + self.assertEqual(len(self.person.get_all_identifiers), 1) + self.assertEqual( + self.person.get_all_identifiers[0].value, + "https://en.wikipedia.org/wiki/Rishi_Sunak", + ) diff --git a/ynr/apps/people/management/commands/remove_inactive_person_links.py b/ynr/apps/people/management/commands/remove_inactive_person_links.py new file mode 100644 index 000000000..e20ee3771 --- /dev/null +++ b/ynr/apps/people/management/commands/remove_inactive_person_links.py @@ -0,0 +1,74 @@ +from typing import List +from urllib.parse import urlparse + +import requests +from django.core.management.base import BaseCommand +from people.models import Person +from popolo.models import Membership + + +def get_domain(url): + parsed_url = urlparse(url) + return parsed_url.netloc + + +def is_facebook_url(url): + domain = get_domain(url) + return "facebook.com" in domain or "fb.com" in domain + + +class Command(BaseCommand): + """ + Test and remove inactive or dead links from Person objects. + """ + + def handle(self, *args, **options): + """ + Iterate over all Person objects and check if the + person identifier urls return a 200 status code. + """ + inactive_links: List[List] = [] + # facebook_url is any url with facebook or fb in the url + memberships = Membership.objects.filter( + ballot__election__slug="parl.2024-07-04" + ) + + people = Person.objects.all().filter(memberships__in=memberships) + for person in people: + person_identifiers = person.get_all_identifiers + person_identifiers = [ + identifier + for identifier in person_identifiers + if identifier.value.startswith("http") + ] + + if not person_identifiers: + continue + for identifier in person_identifiers: + resp = None + try: + resp = requests.get(identifier.value, timeout=2).status_code + except requests.exceptions.RequestException as e: + self.stdout.write( + f"Request exception: {e} for {person.name}" + ) + pass + if resp == 404 and not is_facebook_url(identifier.value): + self.stdout.write( + f"Status code: {resp} for {person.name} {identifier.value}" + ) + inactive_links.append( + [ + str(person.pk), + person.name, + identifier.value, + str(resp), + ] + ) + # delete the identifier from the person identifiers + identifier.person.get_all_identifiers.remove(identifier) + identifier.delete() + identifier.person.save() + print( + f"Deleted {identifier.value_type}:{identifier.value} from {person.name}" + ) From 5395206207a4cc892c9ed6c429ff84ee3335273b Mon Sep 17 00:00:00 2001 From: Virginia Dooley Date: Wed, 12 Jun 2024 08:17:35 +0100 Subject: [PATCH 2/2] fixup! Remove inactive person links --- ynr/apps/candidatebot/helpers.py | 8 ++++++++ .../candidatebot_remove_inactive_person_links.py} | 14 ++++++++++---- .../test_remove_inactive_person_identifiers.py | 7 ++++++- 3 files changed, 24 insertions(+), 5 deletions(-) rename ynr/apps/{people/management/commands/remove_inactive_person_links.py => candidatebot/management/commands/candidatebot_remove_inactive_person_links.py} (84%) diff --git a/ynr/apps/candidatebot/helpers.py b/ynr/apps/candidatebot/helpers.py index 46bf2ac64..a9eec573b 100644 --- a/ynr/apps/candidatebot/helpers.py +++ b/ynr/apps/candidatebot/helpers.py @@ -1,3 +1,4 @@ +import contextlib import re import pypandoc @@ -196,3 +197,10 @@ def add_theyworkforyou_id(self, twfy_id): value = f"https://www.theyworkforyou.com/mp/{twfy_id}/" internal_id = f"uk.org.publicwhip/person/{twfy_id}" self.edit_field("theyworkforyou", value, internal_id=internal_id) + + def remove_person_identifier(self, identifier): + with contextlib.suppress(PersonIdentifier.DoesNotExist): + self.person.get_all_identifiers.remove(identifier) + self.person.save() + identifier.delete() + return self.person diff --git a/ynr/apps/people/management/commands/remove_inactive_person_links.py b/ynr/apps/candidatebot/management/commands/candidatebot_remove_inactive_person_links.py similarity index 84% rename from ynr/apps/people/management/commands/remove_inactive_person_links.py rename to ynr/apps/candidatebot/management/commands/candidatebot_remove_inactive_person_links.py index e20ee3771..7fa83f887 100644 --- a/ynr/apps/people/management/commands/remove_inactive_person_links.py +++ b/ynr/apps/candidatebot/management/commands/candidatebot_remove_inactive_person_links.py @@ -2,6 +2,7 @@ from urllib.parse import urlparse import requests +from candidatebot.helpers import CandidateBot from django.core.management.base import BaseCommand from people.models import Person from popolo.models import Membership @@ -22,6 +23,12 @@ class Command(BaseCommand): Test and remove inactive or dead links from Person objects. """ + def add_arguments(self, parser): + parser.add_argument( + "--person-id", + help="Person ID to test", + ) + def handle(self, *args, **options): """ Iterate over all Person objects and check if the @@ -66,9 +73,8 @@ def handle(self, *args, **options): ] ) # delete the identifier from the person identifiers - identifier.person.get_all_identifiers.remove(identifier) - identifier.delete() - identifier.person.save() + bot = CandidateBot(person.pk, ignore_errors=True) + bot.remove_person_identifier(identifier) print( - f"Deleted {identifier.value_type}:{identifier.value} from {person.name}" + f"Candidatebot deleted {identifier.value_type}:{identifier.value} from {person.name}" ) diff --git a/ynr/apps/candidates/tests/test_remove_inactive_person_identifiers.py b/ynr/apps/candidates/tests/test_remove_inactive_person_identifiers.py index 0aaa962d3..fe572d47f 100644 --- a/ynr/apps/candidates/tests/test_remove_inactive_person_identifiers.py +++ b/ynr/apps/candidates/tests/test_remove_inactive_person_identifiers.py @@ -56,7 +56,12 @@ def test_remove_inactive_person_identifiers(self): self.person.get_all_identifiers[1].value, "http://www.conservatives.com/about/our-team/example.com", ) - call_command("remove_inactive_person_links") + + call_command( + "candidatebot_remove_inactive_person_links", + "--person-id", + self.person.id, + ) self.person.refresh_from_db() self.assertEqual(len(self.person.get_all_identifiers), 1) self.assertEqual(