Skip to content
This repository has been archived by the owner on Feb 8, 2018. It is now read-only.

add schema to store national identity information for ~users #3976

Closed
wants to merge 14 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions bin/keygen.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#!/usr/bin/env python2
from __future__ import absolute_import, division, print_function, unicode_literals
from cryptography.fernet import Fernet
print(Fernet.generate_key())
14 changes: 14 additions & 0 deletions bin/rekey.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#!/usr/bin/env python2
"""See gratipay.models.participant.mixins.identity.rekey for documentation.
"""
from __future__ import absolute_import, division, print_function, unicode_literals

from gratipay import wireup
from gratipay.models.participant.mixins import identity as participant_identities

env = wireup.env()
db = wireup.db(env)
packer = wireup.crypto(env)

n = participant_identities.rekey(db, packer)
print("Rekeyed {} participant identity record(s).".format(n))
2 changes: 2 additions & 0 deletions defaults.env
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ PORT=8537
BASE_URL=http://localhost:8537
DATABASE_MAXCONN=10

CRYPTO_KEYS="1YrzmaGBUeFrwD9SpBOqv33a2ElGns9mUtldAmoU7hs="

GRATIPAY_ASSET_URL=/assets/
GRATIPAY_CACHE_STATIC=no
GRATIPAY_COMPRESS_ASSETS=no
Expand Down
1 change: 1 addition & 0 deletions gratipay/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@
tell_sentry = website.tell_sentry = gratipay.wireup.make_sentry_teller(env)
website.db = gratipay.wireup.db(env)
website.mailer = gratipay.wireup.mail(env, website.project_root)
gratipay.wireup.crypto(env)
gratipay.wireup.base_url(website, env)
gratipay.wireup.secure_cookies(env)
gratipay.wireup.billing(env)
Expand Down
18 changes: 18 additions & 0 deletions gratipay/models/country.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from __future__ import absolute_import, division, print_function, unicode_literals

from postgres.orm import Model


class Country(Model):
"""Represent country records from our database (read-only).

:var int id: the record's primary key in our ``countries`` table
:var unicode name: the name of the country
:var unicode code2: the country's `ISO 3166-1 alpha-2`_ code
:var unicode code3: the country's `ISO 3166-1 alpha-3`_ code

.. _ISO 3166-1 alpha-2 : https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2
.. _ISO 3166-1 alpha-3 : https://en.wikipedia.org/wiki/ISO_3166-1_alpha-3

"""
typname = 'countries'
4 changes: 3 additions & 1 deletion gratipay/models/participant/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
from gratipay.models.account_elsewhere import AccountElsewhere
from gratipay.models.exchange_route import ExchangeRoute
from gratipay.models.team import Team
from gratipay.models.participant import mixins
from gratipay.security.crypto import constant_time_compare
from gratipay.utils import (
i18n,
Expand All @@ -60,7 +61,7 @@

USERNAME_MAX_SIZE = 32

class Participant(Model):
class Participant(Model, mixins.Identity):
"""Represent a Gratipay participant.
"""

Expand Down Expand Up @@ -355,6 +356,7 @@ def clear_personal_information(self, cursor):

DELETE FROM emails WHERE participant_id = %(participant_id)s;
DELETE FROM statements WHERE participant=%(participant_id)s;
DELETE FROM participant_identities WHERE participant_id=%(participant_id)s;

UPDATE participants
SET anonymous_giving=False
Expand Down
3 changes: 3 additions & 0 deletions gratipay/models/participant/mixins/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from .identity import IdentityMixin as Identity

__all__ = ['Identity']
296 changes: 296 additions & 0 deletions gratipay/models/participant/mixins/identity.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,296 @@
from __future__ import absolute_import, division, print_function, unicode_literals

from psycopg2 import IntegrityError
from gratipay.models import add_event


class ParticipantIdentityError(StandardError): pass
class ParticipantIdentitySchemaUnknown(ParticipantIdentityError): pass
class ParticipantIdentityInfoInvalid(ParticipantIdentityError): pass


schema_validators = {'nothing-enforced': lambda info: None}


def _validate_info(schema_name, info):
if schema_name not in schema_validators:
raise ParticipantIdentitySchemaUnknown("unknown schema '{}'".format(schema_name))
validate_schema = schema_validators[schema_name]
validate_schema(info)
return None


class IdentityMixin(object):
"""This mixin provides management of national identities for
:py:class:`~gratipay.models.participant.Participant` objects.

A participant may have zero or more national identities on file with
Gratipay, with at most one for any given country at any given time. When at
least one of a participant's national identities has been verified, then
they may join the payroll of one or more Teams.

Since national identity information is more sensitive than other
information in our database, we encrypt it in the application layer before
passing it to the database in :py:meth:`store_identity_info`. We then limit
access to the information to a single method,
:py:meth:`retrieve_identity_info`.

"""

def store_identity_info(self, country_id, schema_name, info):
"""Store the participant's national identity information for a given country.

:param int country_id: an ``id`` from the ``countries`` table
:param dict schema_name: the name of the schema of the identity information
:param dict info: a dictionary of identity information

:returns: the ``id`` of the identity info's record in the
``participant_identities`` table

:raises ParticipantIdentitySchemaUnknown: if ``schema_name`` doesn't
name a known schema
:raises ParticipantIdentityInfoInvalid: if the ``info`` dictionary does
not conform to the schema named by ``schema_name``

The ``info`` dictionary will be serialized to JSON and then encrypted
with :py:class:`~gratipay.security.crypto.EncryptingPacker` before
being sent to the database. We anticipate multiple schemas evolving for
this dictionary, with enforcement in the application layer (since the
field is opaque in the database layer). For now there is only one
available schema: ``nothing-enforced``.

New participant identity information for a given country always starts
out unverified.

"""
_validate_info(schema_name, info)
info = self.encrypting_packer.pack(info)

def _add_event(action):
payload = dict( id=self.id
, country_id=country_id
, identity_id=identity_id
, action=action + ' identity'
)
add_event(cursor, 'participant', payload)

params = dict( participant_id=self.id
, country_id=country_id
, info=info
, schema_name=schema_name
)

try:
with self.db.get_cursor() as cursor:
identity_id = cursor.one("""

INSERT INTO participant_identities
(participant_id, country_id, schema_name, info)
VALUES (%(participant_id)s, %(country_id)s, %(schema_name)s, %(info)s)
RETURNING id

""", params)
_add_event('insert')

except IntegrityError as exc:
if exc.pgcode != '23505':
raise
with self.db.get_cursor() as cursor:
identity_id, old_schema_name = cursor.one("""

UPDATE participant_identities
SET schema_name=%(schema_name)s, info=%(info)s, is_verified=false
WHERE participant_id=%(participant_id)s
AND country_id=%(country_id)s
RETURNING id, schema_name

""", params)
_add_event('update')

return identity_id


def retrieve_identity_info(self, country_id):
"""Return the participant's national identity information for a given country.

:param int country_id: an ``id`` from the ``countries`` table

:returns: a dictionary of identity information, or ``None``

"""
with self.db.get_cursor() as cursor:
identity_id, info = cursor.one("""

SELECT id, info
FROM participant_identities
WHERE participant_id=%s
AND country_id=%s

""", (self.id, country_id), default=(None, None))

if info is not None:
info = bytes(info) # psycopg2 returns bytea as buffer; we want bytes
info = self.encrypting_packer.unpack(info)

payload = dict( id=self.id
, identity_id=identity_id
, country_id=country_id
, action='retrieve identity'
)

add_event(cursor, 'participant', payload)

return info


def list_identity_metadata(self, is_verified=None):
"""Return a list of identity metadata records, sorted by country name.

:param bool is_verified: filter records by whether or not the
information is verified; ``None`` returns both

Identity metadata records have the following attributes:

:var int id: the record's primary key in the ``participant_identities`` table
:var Country country: the country this identity applies to
:var unicode schema_name: the name of the schema that the data itself conforms to
:var bool is_verified: whether or not the information has been verified

The national identity information itself is not included, only
metadata. Use :py:meth:`retrieve_identity_info` to get the actual data.

"""
return self.db.all("""

SELECT pi.id
, c.*::countries AS country
, schema_name
, is_verified
FROM participant_identities pi
JOIN countries c ON pi.country_id=c.id
WHERE participant_id=%s
AND COALESCE(is_verified = %s, true)
ORDER BY c.name

""", (self.id, is_verified))
# The COALESCE lets us pass in is_verified instead of concatenating SQL
# (recall that `* = null` evaluates to null, while `true = false` is
# false).


def set_identity_verification(self, country_id, is_verified):
"""Set the verification status of the participant's national identity for a given country.

:param int country_id: an ``id`` from the ``countries`` table
:param bool is_verified: whether the information has been verified or not

This is a no-op if the participant has no identity on file for the
given ``country_id``.

"""
is_verified = bool(is_verified)
action = 'verify' if is_verified else 'unverify'

with self.db.get_cursor() as cursor:
old = cursor.one("""

SELECT id, is_verified
FROM participant_identities
WHERE participant_id=%(participant_id)s
AND country_id=%(country_id)s

""", dict(locals(), participant_id=self.id))

cursor.run("""

UPDATE participant_identities
SET is_verified=%(is_verified)s
WHERE participant_id=%(participant_id)s
AND country_id=%(country_id)s

""", dict(locals(), participant_id=self.id))

payload = dict( id=self.id
, identity_id=old.id if old else None
, country_id=country_id
, new_value=is_verified
, old_value=old.is_verified if old else None
, action=action + ' identity'
)

add_event(cursor, 'participant', payload)


def clear_identity(self, country_id):
"""Clear the participant's national identity record for a given country.

:param int country_id: an ``id`` from the ``countries`` table

"""
with self.db.get_cursor() as cursor:
identity_id = cursor.one("""

DELETE
FROM participant_identities
WHERE participant_id=%(participant_id)s
AND country_id=%(country_id)s
RETURNING id

""", dict(locals(), participant_id=self.id))
payload = dict( id=self.id
, identity_id=identity_id
, country_id=country_id
, action='clear identity'
)
add_event(cursor, 'participant', payload)


# Rekeying
# ========

def rekey(db, packer):
"""Rekey the encrypted participant identity information in our database.

:param GratipayDB db: used to access the database
:param EncryptingPacker packer: used to decrypt and encrypt data

This function features prominently in our procedure for rekeying our
encrypted data, as documented in the "`Keep Secrets`_" howto. It operates
by loading records from `participant_identities` that haven't been updated
in the present month, in batches of 100. It updates a timestamp atomically
with each rekeyed `info`, so it can be safely rerun in the face of network
failure, etc.

.. _Keep Secrets: http://inside.gratipay.com/howto/keep-secrets

"""
n = 0
while 1:
m = _rekey_one_batch(db, packer)
if m == 0:
break
n += m
return n


def _rekey_one_batch(db, packer):
batch = db.all("""

SELECT id, info
FROM participant_identities
WHERE _info_last_keyed < date_trunc('month', now())
ORDER BY _info_last_keyed ASC
LIMIT 100

""")
if not batch:
return 0

for rec in batch:
plaintext = packer.unpack(bytes(rec.info))
new_token = packer.pack(plaintext)
db.run( "UPDATE participant_identities SET info=%s, _info_last_keyed=now() WHERE id=%s"
, (new_token, rec.id)
)

return len(batch)
Loading