From 74221915ce21e1c7adbaec8b3d6a172bd193fb4b Mon Sep 17 00:00:00 2001 From: Chad Whitacre Date: Sat, 30 Apr 2016 18:06:45 -0400 Subject: [PATCH] implement encrypted national identities --- gratipay/models/participant/__init__.py | 3 +- .../models/participant/mixins/__init__.py | 3 + .../models/participant/mixins/identity.py | 164 ++++++++++++++++++ gratipay/wireup.py | 3 +- sql/branch.sql | 9 + tests/py/test_participant_identities.py | 145 ++++++++++++++++ tests/py/test_security.py | 10 +- 7 files changed, 330 insertions(+), 7 deletions(-) create mode 100644 gratipay/models/participant/mixins/__init__.py create mode 100644 gratipay/models/participant/mixins/identity.py create mode 100644 tests/py/test_participant_identities.py diff --git a/gratipay/models/participant/__init__.py b/gratipay/models/participant/__init__.py index 440a1a9a03..5271d84459 100644 --- a/gratipay/models/participant/__init__.py +++ b/gratipay/models/participant/__init__.py @@ -38,6 +38,7 @@ from gratipay.models.account_elsewhere import AccountElsewhere from gratipay.models.exchange_route import ExchangeRoute from gratipay.models.team import Team +from gratipay.models.participant import mixins from gratipay.security.crypto import constant_time_compare from gratipay.utils import ( i18n, @@ -60,7 +61,7 @@ USERNAME_MAX_SIZE = 32 -class Participant(Model): +class Participant(Model, mixins.Identity): """Represent a Gratipay participant. """ diff --git a/gratipay/models/participant/mixins/__init__.py b/gratipay/models/participant/mixins/__init__.py new file mode 100644 index 0000000000..b05ab6ff13 --- /dev/null +++ b/gratipay/models/participant/mixins/__init__.py @@ -0,0 +1,3 @@ +from .identity import IdentityMixin as Identity + +__all__ = ['Identity'] diff --git a/gratipay/models/participant/mixins/identity.py b/gratipay/models/participant/mixins/identity.py new file mode 100644 index 0000000000..3ae4d41146 --- /dev/null +++ b/gratipay/models/participant/mixins/identity.py @@ -0,0 +1,164 @@ +from __future__ import absolute_import, division, print_function, unicode_literals + +from psycopg2 import IntegrityError +from gratipay.models import add_event + + +class ParticipantIdentityError(StandardError): pass +class ParticipantIdentitySchemaUnknown(ParticipantIdentityError): pass +class ParticipantIdentityInfoInvalid(ParticipantIdentityError): pass + + +schema_validators = {'nothing-enforced': lambda info: None} + + +def _validate_info(schema_name, info): + if schema_name not in schema_validators: + raise ParticipantIdentitySchemaUnknown("unknown schema '{}'".format(schema_name)) + validate_schema = schema_validators[schema_name] + validate_schema(info) + return None + + +class IdentityMixin(object): + """This mixin provides management of national identities for + :py:class:`~gratipay.models.participant.Participant` objects. + + A participant may have zero or more national identities on file with + Gratipay, with at most one for any given country at any given time. When at + least one of a participant's national identities has been verified, then + they may join the payroll of one or more Teams. + + Since national identity information is more sensitive than other + information in our database, we encrypt it in the application layer before + passing it to the database in :py:meth:`store_identity_info`. We then limit + access to the information to a single method, + :py:meth:`retrieve_identity_info`. + + """ + + def store_identity_info(self, country_id, schema_name, info): + """Store the participant's national identity information for a given country. + + :param int country_id: an ``id`` from the ``countries`` table + :param dict schema_name: the name of the schema of the identity information + :param dict info: a dictionary of identity information + + :returns: the ``id`` of the identity info's record in the + ``participant_identities`` table + + :raises ParticipantIdentitySchemaUnknown: if ``schema_name`` doesn't + name a known schema + :raises ParticipantIdentityInfoInvalid: if the ``info`` dictionary does + not conform to the schema named by ``schema_name`` + + The ``info`` dictionary will be serialized to JSON and then encrypted + with :py:class:`~gratipay.security.crypto.EncryptingPacker` before + being sent to the database. We anticipate multiple schemas evolving for + this dictionary, with enforcement in the application layer (since the + field is opaque in the database layer). For now there is only one + available schema: ``nothing-enforced``. + + """ + _validate_info(schema_name, info) + info = self.encrypting_packer.pack(info) + + def _add_event(action): + payload = dict( id=self.id + , country_id=country_id + , identity_id=identity_id + , action=action + ' identity' + ) + add_event(cursor, 'participant', payload) + + params = dict( participant_id=self.id + , country_id=country_id + , info=info + , schema_name=schema_name + ) + + try: + with self.db.get_cursor() as cursor: + identity_id = cursor.one(""" + + INSERT INTO participant_identities + (participant_id, country_id, schema_name, info) + VALUES (%(participant_id)s, %(country_id)s, %(schema_name)s, %(info)s) + RETURNING id + + """, params) + _add_event('insert') + + except IntegrityError: + with self.db.get_cursor() as cursor: + identity_id, old_schema_name = cursor.one(""" + + UPDATE participant_identities + SET schema_name=%(schema_name)s, info=%(info)s + WHERE participant_id=%(participant_id)s + AND country_id=%(country_id)s + RETURNING id, schema_name + + """, params) + _add_event('update') + + return identity_id + + + def retrieve_identity_info(self, country_id): + """Return the participant's national identity information for a given country. + + :param int country_id: an ``id`` from the ``countries`` table + + :returns: a dictionary of identity information, or ``None`` + + """ + with self.db.get_cursor() as cursor: + identity_id, info = cursor.one(""" + + SELECT id, info + FROM participant_identities + WHERE participant_id=%s + AND country_id=%s + + """, (self.id, country_id), default=(None, None)) + + if info is not None: + info = bytes(info) # psycopg2 returns bytea as buffer; we want bytes + info = self.encrypting_packer.unpack(info) + + payload = dict( id=self.id + , identity_id=identity_id + , country_id=country_id + , action='retrieve identity' + ) + + add_event(cursor, 'participant', payload) + + return info + + + def list_identity_metadata(self): + """Return a list of identity metadata records, sorted by country name. + + Identity metadata records have the following attributes: + + :var int id: the record's primary key in the ``participant_identities`` table + :var Country country: the country this identity applies to + :var unicode schema_name: the name of the schema that the data itself conforms to + + The national identity information itself is not included, only + metadata. Use :py:meth:`retrieve_identity_info` to get the actual data. + + """ + return self.db.all(""" + + SELECT pi.id + , c.*::countries AS country + , schema_name + FROM participant_identities pi + JOIN countries c ON pi.country_id=c.id + WHERE participant_id=%s + ORDER BY c.name + + """, (self.id,)) diff --git a/gratipay/wireup.py b/gratipay/wireup.py index 6b41d89334..b5df7bdc40 100644 --- a/gratipay/wireup.py +++ b/gratipay/wireup.py @@ -34,6 +34,7 @@ from gratipay.models.country import Country from gratipay.models.exchange_route import ExchangeRoute from gratipay.models.participant import Participant +from gratipay.models.participant.mixins import Identity from gratipay.models.team import Team from gratipay.models import GratipayDB from gratipay.security.crypto import EncryptingPacker @@ -63,7 +64,7 @@ def db(env): def crypto(env): keys = [k.encode('ASCII') for k in env.crypto_keys.split()] - Participant.encrypting_packer = EncryptingPacker(*keys) + Identity.encrypting_packer = EncryptingPacker(*keys) def mail(env, project_root='.'): if env.aws_ses_access_key_id and env.aws_ses_secret_access_key and env.aws_ses_default_region: diff --git a/sql/branch.sql b/sql/branch.sql index a4c2fa3987..f50abdad88 100644 --- a/sql/branch.sql +++ b/sql/branch.sql @@ -6,3 +6,12 @@ CREATE TABLE countries -- http://www.iso.org/iso/country_codes ); \i sql/countries.sql + +CREATE TABLE participant_identities +( id bigserial primary key +, participant_id bigint NOT NULL REFERENCES participants(id) +, country_id bigint NOT NULL REFERENCES countries(id) +, schema_name text NOT NULL +, info bytea NOT NULL +, UNIQUE(participant_id, country_id) + ); diff --git a/tests/py/test_participant_identities.py b/tests/py/test_participant_identities.py new file mode 100644 index 0000000000..c1f251002b --- /dev/null +++ b/tests/py/test_participant_identities.py @@ -0,0 +1,145 @@ +from __future__ import absolute_import, division, print_function, unicode_literals + +from gratipay.testing import Harness +from gratipay.models.participant.mixins import identity, Identity +from gratipay.models.participant.mixins.identity import _validate_info +from gratipay.models.participant.mixins.identity import ParticipantIdentityInfoInvalid +from gratipay.models.participant.mixins.identity import ParticipantIdentitySchemaUnknown +from pytest import raises + + +class Tests(Harness): + + @classmethod + def setUpClass(cls): + Harness.setUpClass() + cls.TTO = cls.db.one("SELECT id FROM countries WHERE code3='TTO'") + cls.USA = cls.db.one("SELECT id FROM countries WHERE code3='USA'") + + def _failer(info): + raise ParticipantIdentityInfoInvalid('You failed.') + identity.schema_validators['impossible'] = _failer + + @classmethod + def tearDownClass(cls): + del identity.schema_validators['impossible'] + + def assert_events(self, crusher_id, identity_ids, country_ids, actions): + events = self.db.all("SELECT * FROM events ORDER BY ts ASC") + nevents = len(events) + + assert [e.type for e in events] == ['participant'] * nevents + assert [e.payload['id'] for e in events] == [crusher_id] * nevents + assert [e.payload['identity_id'] for e in events] == identity_ids + assert [e.payload['country_id'] for e in events] == country_ids + assert [e.payload['action'] for e in events] == actions + + + # rii - retrieve_identity_info + + def test_rii_retrieves_identity_info(self): + crusher = self.make_participant('crusher') + crusher.store_identity_info(self.USA, 'nothing-enforced', {'name': 'Crusher'}) + assert crusher.retrieve_identity_info(self.USA)['name'] == 'Crusher' + + def test_rii_retrieves_identity_when_there_are_multiple_identities(self): + crusher = self.make_participant('crusher') + crusher.store_identity_info(self.USA, 'nothing-enforced', {'name': 'Crusher'}) + crusher.store_identity_info(self.TTO, 'nothing-enforced', {'name': 'Bruiser'}) + assert crusher.retrieve_identity_info(self.USA)['name'] == 'Crusher' + assert crusher.retrieve_identity_info(self.TTO)['name'] == 'Bruiser' + + def test_rii_returns_None_if_there_is_no_identity_info(self): + crusher = self.make_participant('crusher') + assert crusher.retrieve_identity_info(self.USA) is None + + def test_rii_logs_event(self): + crusher = self.make_participant('crusher') + iid = crusher.store_identity_info(self.TTO, 'nothing-enforced', {'name': 'Crusher'}) + crusher.retrieve_identity_info(self.TTO) + self.assert_events( crusher.id + , [iid, iid] + , [self.TTO, self.TTO] + , ['insert identity', 'retrieve identity'] + ) + + def test_rii_still_logs_an_event_when_noop(self): + crusher = self.make_participant('crusher') + crusher.retrieve_identity_info(self.TTO) + self.assert_events( crusher.id + , [None] + , [self.TTO] + , ['retrieve identity'] + ) + + + # lim - list_identity_metadata + + def test_lim_lists_identity_metadata(self): + crusher = self.make_participant('crusher') + crusher.store_identity_info(self.USA, 'nothing-enforced', {'name': 'Crusher'}) + assert [x.country.code3 for x in crusher.list_identity_metadata()] == ['USA'] + + def test_lim_lists_metadata_for_multiple_identities(self): + crusher = self.make_participant('crusher') + for country in (self.USA, self.TTO): + crusher.store_identity_info(country, 'nothing-enforced', {'name': 'Crusher'}) + assert [x.country.code3 for x in crusher.list_identity_metadata()] == ['TTO', 'USA'] + + + # sii - store_identity_info + + def test_sii_sets_identity_info(self): + crusher = self.make_participant('crusher') + crusher.store_identity_info(self.TTO, 'nothing-enforced', {'name': 'Crusher'}) + assert [x.country.code3 for x in crusher.list_identity_metadata()] == ['TTO'] + + def test_sii_sets_a_second_identity(self): + crusher = self.make_participant('crusher') + crusher.store_identity_info(self.TTO, 'nothing-enforced', {'name': 'Crusher'}) + crusher.store_identity_info(self.USA, 'nothing-enforced', {'name': 'Crusher'}) + assert [x.country.code3 for x in crusher.list_identity_metadata()] == ['TTO', 'USA'] + + def test_sii_overwrites_first_identity(self): + crusher = self.make_participant('crusher') + crusher.store_identity_info(self.TTO, 'nothing-enforced', {'name': 'Crusher'}) + crusher.store_identity_info(self.TTO, 'nothing-enforced', {'name': 'Bruiser'}) + assert [x.country.code3 for x in crusher.list_identity_metadata()] == ['TTO'] + assert crusher.retrieve_identity_info(self.TTO)['name'] == 'Bruiser' + + def test_sii_validates_identity(self): + crusher = self.make_participant('crusher') + raises( ParticipantIdentityInfoInvalid + , crusher.store_identity_info + , self.TTO + , 'impossible' + , {'foo': 'bar'} + ) + + def test_sii_happily_overwrites_schema_name(self): + crusher = self.make_participant('crusher') + packed = Identity.encrypting_packer.pack({'name': 'Crusher'}) + self.db.run( "INSERT INTO participant_identities " + "(participant_id, country_id, schema_name, info) " + "VALUES (%s, %s, %s, %s)" + , (crusher.id, self.TTO, 'flah', packed) + ) + assert [x.schema_name for x in crusher.list_identity_metadata()] == ['flah'] + crusher.store_identity_info(self.TTO, 'nothing-enforced', {'name': 'Crusher'}) + assert [x.schema_name for x in crusher.list_identity_metadata()] == ['nothing-enforced'] + + def test_sii_logs_event(self): + crusher = self.make_participant('crusher') + iid = crusher.store_identity_info(self.TTO, 'nothing-enforced', {'name': 'Crusher'}) + self.assert_events(crusher.id, [iid], [self.TTO], ['insert identity']) + + + # _vi - _validate_info + + def test__vi_validates_info(self): + err = raises(ParticipantIdentityInfoInvalid, _validate_info, 'impossible', {'foo': 'bar'}) + assert err.value.message == 'You failed.' + + def test__vi_chokes_on_unknown_schema(self): + err = raises(ParticipantIdentitySchemaUnknown, _validate_info, 'floo-floo', {'foo': 'bar'}) + assert err.value.message == "unknown schema 'floo-floo'" diff --git a/tests/py/test_security.py b/tests/py/test_security.py index d34120156b..6f6df64810 100644 --- a/tests/py/test_security.py +++ b/tests/py/test_security.py @@ -4,7 +4,7 @@ from aspen.http.request import Request from base64 import urlsafe_b64decode from gratipay import security -from gratipay.models.participant import Participant +from gratipay.models.participant.mixins import Identity from gratipay.testing import Harness from pytest import raises @@ -49,14 +49,14 @@ def test_ahtr_sets_x_xss_protection(self): # ep - EncryptingPacker def test_ep_packs_encryptingly(self): - packed = Participant.encrypting_packer.pack({"foo": "bar"}) + packed = Identity.encrypting_packer.pack({"foo": "bar"}) assert urlsafe_b64decode(packed)[0] == b'\x80' # Frenet version def test_ep_unpacks_decryptingly(self): packed = b'gAAAAABXJMbdriJ984uMCMKfQ5p2UUNHB1vG43K_uJyzUffbu2Uwy0d71kAnqOKJ7Ww_FEQz9Dliw8'\ b'7UpM5TdyoJsll5nMAicg==' - assert Participant.encrypting_packer.unpack(packed) == {"foo": "bar"} + assert Identity.encrypting_packer.unpack(packed) == {"foo": "bar"} def test_ep_demands_bytes(self): - raises(TypeError, Participant.encrypting_packer.unpack, buffer('buffer')) - raises(TypeError, Participant.encrypting_packer.unpack, 'unicode') + raises(TypeError, Identity.encrypting_packer.unpack, buffer('buffer')) + raises(TypeError, Identity.encrypting_packer.unpack, 'unicode')