From 560ad059930c3b9861b438ec424d36b894e2abad Mon Sep 17 00:00:00 2001 From: John van Breda Date: Thu, 8 Oct 2020 17:49:21 +0100 Subject: [PATCH 1/8] Support for REST access to ES using JWT JWT authenticates as a user of a website so applies website and user ID permissions if configured to do so, unless making a valid alldata claim which overrides the user ID permission. See example config file for more info. --- modules/rest_api/config/rest.example.php | 17 ++++- .../rest_api/controllers/services/rest.php | 76 ++++++++++++++++++- modules/rest_api/i18n/en_GB/rest_api.php | 3 +- 3 files changed, 89 insertions(+), 7 deletions(-) diff --git a/modules/rest_api/config/rest.example.php b/modules/rest_api/config/rest.example.php index f487e4b05c..ae6a935d50 100644 --- a/modules/rest_api/config/rest.example.php +++ b/modules/rest_api/config/rest.example.php @@ -79,9 +79,15 @@ 'resource_options' => [ // Grants full access to all reports. Client configs can override this. 'reports' => [], - // Grant access to elasticsearch. Provide empty array to enable all - // end-points. Configure the clients which can access each index in - // the clients config entry. + // Grant access to elasticsearch via the listed endpoints. Either a + // simple array of endpoint names, or a associative array keyed by name + // containing config in the values. Set config option limit_to_website + // to TRUE to limit to data accessible to this website. Set + // limit_to_own_data to TRUE to restrict to the user's own data. Each + // endpoint needs to be added to the 'elasticsearch' configuration entry + // to define how it maps to Elasticsearch. If using directClient + // authentication, also configure the clients which can access each index + // in the clients config entry. 'elasticsearch' => ['es'], ], ], @@ -97,6 +103,11 @@ 'resource_options' => [ // Grants full access to all reports. Client configs can override this. 'reports' => ['featured' => TRUE, 'limit_to_own_data' => TRUE], + // Grant access to Elasticsearch but in this case, apply website and user ID filters. + // Limit to own data can be overridden by adding claim http://indicia.org.uk/allow_full_dataset=true. + // Best practice is to set both of these to TRUE, then in the Indicia settings enable + // the option to allow users to access all data if appropriate for the website. + 'elasticsearch' => ['es' => ['limit_to_website' => TRUE, 'limit_to_own_data' => TRUE]], ], ], ]; diff --git a/modules/rest_api/controllers/services/rest.php b/modules/rest_api/controllers/services/rest.php index 250aea2ee6..bd9b8abb70 100644 --- a/modules/rest_api/controllers/services/rest.php +++ b/modules/rest_api/controllers/services/rest.php @@ -162,6 +162,13 @@ class Rest_Controller extends Controller { */ private $authenticated = FALSE; + /** + * Name of the authentication method. + * + * @var string + */ + private $authMethod; + /** * Config settings relating to the selected auth method. * @@ -169,6 +176,16 @@ class Rest_Controller extends Controller { */ private $authConfig; + /** + * Allow override of default ES filters on record created_by_id + * + * When using user based auth (jwtUser or oAuth2User), default is for ES to + * limit records in response to user's own data. + * + * @var bool + */ + private $allowAllData = FALSE; + /** * Config settings relating to the authenticated client if any. * @@ -1081,6 +1098,39 @@ private function getColumnsTemplate(&$postObj) { } } + /** + * Adds permissions filters to ES search, based on website ID and user ID. + * + * If the authentication method configuration (e.g. jwtUser) includes the + * option limit_to_website in the settings for the Elasticsearch endpoint, + * then automatically adds a terms filter on metadata.website.id. Also, + * if the settings include limit_to_own_data for the endpoint, then adds a + * terms filter on metadata.created_by_id. This can be overridden by + * including the claim http://indicia.org.uk/alldata in the JWT access token. + */ + private function applyEsPermissionsQuery(&$postObj) { + $filters = []; + if ($this->esConfig['limit_to_own_data'] && !$this->allowAllData && RestObjects::$clientUserId) { + $filters[] = ['term' => ['metadata.created_by_id' => RestObjects::$clientUserId]]; + } + if ($this->esConfig['limit_to_website'] && RestObjects::$clientWebsiteId) { + // @todo Expand to include record sharing. + $filters[] = ['term' => ['metadata.website.id' => RestObjects::$clientWebsiteId]]; + } + if (count($filters) > 0) { + if (!isset($postObj->query)) { + $postObj->query = new stdClass(); + } + if (!isset($postObj->query->bool)) { + $postObj->query->bool = new stdClass(); + } + if (!isset($postObj->query->bool->must)) { + $postObj->query->bool->must = []; + } + $postObj->query->bool->must = array_merge($postObj->query->bool->must, $filters); + } + } + /** * Calculate the data to post to an Elasticsearch search. * @@ -1095,7 +1145,7 @@ private function getColumnsTemplate(&$postObj) { * @return string * Data to post. */ - private function getEsPostData($postObj, $format, $file) { + private function getEsPostData($postObj, $format, $file, $isSearch) { if ($this->pagingMode === 'scroll' && $this->pagingModeState === 'nextPage') { // A subsequent hit on a scrolled request. $postObj = [ @@ -1111,6 +1161,9 @@ private function getEsPostData($postObj, $format, $file) { elseif ($this->pagingMode === 'composite' && isset($file['after_key'])) { $postObj->aggs->_rows->composite->after = $file['after_key']; } + if ($isSearch) { + $this->applyEsPermissionsQuery($postObj); + } if ($format === 'csv') { $csvTemplate = $this->getEsCsvTemplate(); $fields = []; @@ -1361,7 +1414,7 @@ private function proxyToEs($url) { else { echo $this->getEsOutputHeader($format); } - $postData = $this->getEsPostData($postObj, $format, $file); + $postData = $this->getEsPostData($postObj, $format, $file, preg_match('/\/_search/', $url)); $actualUrl = $this->getEsActualUrl($url); $session = curl_init($actualUrl); if (!empty($postData) && $postData !== '[]') { @@ -3220,9 +3273,22 @@ private function authenticate() { // Try this authentication method. call_user_func(array($this, "authenticateUsing$method")); if ($this->authenticated) { + $this->authMethod = $method; // Double checking required for Elasticsearch proxy. if ($this->elasticProxy) { - if (empty($cfg['resource_options']['elasticsearch']) || !in_array($this->elasticProxy, $cfg['resource_options']['elasticsearch'])) { + if (empty($cfg['resource_options']['elasticsearch'])) { + kohana::log('debug', "Elasticsearch request to $this->elasticProxy not enabled for $method"); + RestObjects::$apiResponse->fail('Unauthorized', 401, 'Unable to authorise'); + } + if (in_array($this->elasticProxy, $cfg['resource_options']['elasticsearch'])) { + // Simple array of ES endpoints with no config. + $this->esConfig = []; + } + elseif (array_key_exists($this->elasticProxy, $cfg['resource_options']['elasticsearch'])) { + // Endpoints are keys with array values holding config. + $this->esConfig = $cfg['resource_options']['elasticsearch'][$this->elasticProxy]; + } + else { kohana::log('debug', "Elasticsearch request to $this->elasticProxy not enabled for $method"); RestObjects::$apiResponse->fail('Unauthorized', 401, 'Unable to authorise'); } @@ -3372,6 +3438,10 @@ private function authenticateUsingJwtUser() { if (empty($payloadValues['iss']) || empty($payloadValues['http://indicia.org.uk/user:id'])) { RestObjects::$apiResponse->fail('Bad request', 400); } + // Check for claim that stops ES filtering to just user's own records. + if (!empty($payloadValues['http://indicia.org.uk/alldata'])) { + $this->allowAllData = TRUE; + } $website = $this->getWebsiteByUrl($payloadValues['iss']); if (!$website || empty($website->public_key)) { kohana::log('debug', 'Website has no public key'); diff --git a/modules/rest_api/i18n/en_GB/rest_api.php b/modules/rest_api/i18n/en_GB/rest_api.php index 4598bbacdc..3a3647fc24 100644 --- a/modules/rest_api/i18n/en_GB/rest_api.php +++ b/modules/rest_api/i18n/en_GB/rest_api.php @@ -38,7 +38,8 @@ '
  • Generate a public/private key pair and store the private key in the Warehouse website settings.
  • ' . '
  • Provide a JWT token signed with the public key which provides the following claims:'; $lang['jwtUserHelpHeader'] = 'Set the authorisation header to "Bearer "'; $lang['genericHelpHeader'] = 'Specify an authorisation header with a list of token name/value pairs, using colons as a ' . From 325445789d6715887010dba6cdceb8a6cae6faa6 Mon Sep 17 00:00:00 2001 From: John van Breda Date: Thu, 8 Oct 2020 21:15:48 +0100 Subject: [PATCH 2/8] Example config for minimal JWT setup --- modules/rest_api/config/rest.jwt-only.php | 68 +++++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 modules/rest_api/config/rest.jwt-only.php diff --git a/modules/rest_api/config/rest.jwt-only.php b/modules/rest_api/config/rest.jwt-only.php new file mode 100644 index 0000000000..c71a69c49f --- /dev/null +++ b/modules/rest_api/config/rest.jwt-only.php @@ -0,0 +1,68 @@ + [ + // TRUE to allow CORS from any domain, or provide an array of domain regexes. + 'allow_cors' => TRUE, + 'resource_options' => [ + // Grants full access to all reports. Client configs can override this. + 'reports' => ['featured' => TRUE, 'limit_to_own_data' => TRUE], + // Grant access to Elasticsearch but in this case, apply website and user ID filters. + // Limit to own data can be overridden by adding claim http://indicia.org.uk/allow_full_dataset=true. + // Best practice is to set both of these to TRUE, then in the Indicia settings enable + // the option to allow users to access all data if appropriate for the website. + 'elasticsearch' => ['es' => ['limit_to_website' => TRUE, 'limit_to_own_data' => TRUE]], + ], + ], +]; + +/** + * Should authorisation tokens be allowed in the query parameters rather than the + * authorisation header? Recommended for development servers only. + */ +$config['allow_auth_tokens_in_url'] = FALSE; + +/** + * If this warehouse is configured to work with an Elasticsearch instance then + * the REST API can act as a proxy to avoid having to expose all the public + * APIs. The proxy can point to index aliases to limit the search filter. + */ +$config['elasticsearch'] = [ + 'es' => [ + 'open' => FALSE, + 'index' => 'occurrence', + 'url' => 'http://my.elastic.url:9200', + 'allowed' => [ + 'get' => [ + '/^_search/' => 'GET requests to the search API (/_search?...)', + '/^_mapping/' => 'GET requests to the mappings API (/_mapping?...)', + ], + 'post' => [ + '/^_search/' => 'POST requests to the search API (/_search?...)', + '/^doc\/.*\/_update/' => 'POSTed document updates', + ], + ], + ], +]; From deb53213dbaa68f956553c373345ae325ca77dc4 Mon Sep 17 00:00:00 2001 From: John van Breda Date: Thu, 8 Oct 2020 21:18:12 +0100 Subject: [PATCH 3/8] Small Rest documentation tweaks Showing Elasticsearch API docs doesn't require authentication --- .../rest_api/libraries/RestApiResponse.php | 21 ++++++++----------- 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/modules/rest_api/libraries/RestApiResponse.php b/modules/rest_api/libraries/RestApiResponse.php index f6a46084aa..4589c90c11 100644 --- a/modules/rest_api/libraries/RestApiResponse.php +++ b/modules/rest_api/libraries/RestApiResponse.php @@ -237,19 +237,16 @@ private function indexHtml($resourceConfig) { if ($es) { echo '

    Elasticsearch end-points

    '; foreach ($es as $endpoint => $esConfig) { - // Also allow if authentication provided. - if ($esConfig['open'] === TRUE) { - echo '

    ' . url::base() . "index.php/services/rest/$endpoint

    "; - echo ''; - echo ''; - echo ''; - foreach ($esConfig['allowed'] as $method => $patterns) { - foreach ($patterns as $expr => $desc) { - echo ""; - } + echo '

    ' . url::base() . "index.php/services/rest/$endpoint

    "; + echo '
    Allowed methods
    HTTP methodExpressionDescription
    $method$expr$desc
    '; + echo ''; + echo ''; + foreach ($esConfig['allowed'] as $method => $patterns) { + foreach ($patterns as $expr => $desc) { + echo ""; } - echo '
    Allowed methods
    HTTP methodExpressionDescription
    $method$expr$desc
    '; } + echo ''; } } echo str_replace('{{ base }}', url::base(), $this->htmlFooter); @@ -581,7 +578,7 @@ private function outputArrayAsHtml($array, $options = array()) { $this->outputArrayAsHtml($value, $options); } else { // a simple value to output. If it contains an internal link then process it to hide user/secret data. - if (preg_match('/http(s)?:\/\//', $value)) { + if (preg_match('/^http(s)?:\/\//', $value)) { $parts = explode('?', $value); $displayUrl = $parts[0]; if (count($parts)>1) { From e75013b61558dfe02589459a46b73634f20f0b43 Mon Sep 17 00:00:00 2001 From: John van Breda Date: Thu, 8 Oct 2020 21:18:27 +0100 Subject: [PATCH 4/8] Better handling if REST config file missing. --- .../rest_api/controllers/services/rest.php | 22 ++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/modules/rest_api/controllers/services/rest.php b/modules/rest_api/controllers/services/rest.php index bd9b8abb70..5c07c2ba9a 100644 --- a/modules/rest_api/controllers/services/rest.php +++ b/modules/rest_api/controllers/services/rest.php @@ -762,11 +762,20 @@ public function __construct() { * Outputs help text to describe the available API resources. */ public function index() { - // A temporary array to simulate the arguments, which we can use to check - // for versioning. - $arguments = [$this->uri->last_segment()]; - $this->checkVersion($arguments); - RestObjects::$apiResponse->index($this->resourceConfig); + try { + if (!file_exists(MODPATH . 'rest_api/config/rest.php')) { + RestObjects::$apiResponse->fail('Internal Server Error', 500, + 'Missing config file. See https://indicia-docs.readthedocs.io/en/latest/administrating/warehouse/modules/rest-api.html for more info.'); + } + // A temporary array to simulate the arguments, which we can use to check + // for versioning. + $arguments = [$this->uri->last_segment()]; + $this->checkVersion($arguments); + RestObjects::$apiResponse->index($this->resourceConfig); + } + catch (RestApiAbort $e) { + // No action if a proper abort. + } } /** @@ -843,6 +852,9 @@ public function token() { * @throws exception */ public function __call($name, $arguments) { + if (!file_exists(MODPATH . 'rest_api/config/rest.php')) { + $this->fail('Internal Server Error', 500, 'Missing config file.'); + } $tm = microtime(TRUE); try { // Undo router's conversion of hyphens and underscores. From 82b78f1407d2c1241ea697004314135de25dfab5 Mon Sep 17 00:00:00 2001 From: John van Breda Date: Thu, 8 Oct 2020 21:19:59 +0100 Subject: [PATCH 5/8] Make clear that this is a value to be replaced --- modules/rest_api/config/rest.jwt-only.php | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/modules/rest_api/config/rest.jwt-only.php b/modules/rest_api/config/rest.jwt-only.php index c71a69c49f..4c6dc01a8e 100644 --- a/modules/rest_api/config/rest.jwt-only.php +++ b/modules/rest_api/config/rest.jwt-only.php @@ -30,9 +30,6 @@ // Grants full access to all reports. Client configs can override this. 'reports' => ['featured' => TRUE, 'limit_to_own_data' => TRUE], // Grant access to Elasticsearch but in this case, apply website and user ID filters. - // Limit to own data can be overridden by adding claim http://indicia.org.uk/allow_full_dataset=true. - // Best practice is to set both of these to TRUE, then in the Indicia settings enable - // the option to allow users to access all data if appropriate for the website. 'elasticsearch' => ['es' => ['limit_to_website' => TRUE, 'limit_to_own_data' => TRUE]], ], ], @@ -52,7 +49,7 @@ $config['elasticsearch'] = [ 'es' => [ 'open' => FALSE, - 'index' => 'occurrence', + 'index' => 'my-index', 'url' => 'http://my.elastic.url:9200', 'allowed' => [ 'get' => [ From 06cb99387d7b21946446c8f854e69989e8cce89d Mon Sep 17 00:00:00 2001 From: John van Breda Date: Fri, 9 Oct 2020 09:28:01 +0100 Subject: [PATCH 6/8] Version bump --- application/config/version.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/application/config/version.php b/application/config/version.php index 7ff6ff7e55..2eb8de52c5 100644 --- a/application/config/version.php +++ b/application/config/version.php @@ -29,14 +29,14 @@ * * @var string */ -$config['version'] = '4.6.0'; +$config['version'] = '4.7.0'; /** * Version release date. * * @var string */ -$config['release_date'] = '2020-10-01'; +$config['release_date'] = '2020-10-09'; /** * Link to the code repository downloads page. From 2577445b58803e1f71bb62929b6461ffee8ad40f Mon Sep 17 00:00:00 2001 From: John van Breda Date: Fri, 9 Oct 2020 09:34:06 +0100 Subject: [PATCH 7/8] Comment correction. --- modules/rest_api/controllers/services/rest.php | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/modules/rest_api/controllers/services/rest.php b/modules/rest_api/controllers/services/rest.php index 5c07c2ba9a..7a9217c883 100644 --- a/modules/rest_api/controllers/services/rest.php +++ b/modules/rest_api/controllers/services/rest.php @@ -179,8 +179,9 @@ class Rest_Controller extends Controller { /** * Allow override of default ES filters on record created_by_id * - * When using user based auth (jwtUser or oAuth2User), default is for ES to - * limit records in response to user's own data. + * When using user based auth (jwtUser or oAuth2User), configuration can + * included limit_to_own_data which applies an automatic user filter unless + * the request access token includes a claim that alldata access is allowed. * * @var bool */ From b70096345ba3a733328db8cba929927b486ef81c Mon Sep 17 00:00:00 2001 From: John van Breda Date: Fri, 9 Oct 2020 09:55:21 +0100 Subject: [PATCH 8/8] Support reporting sharing mode for ES limit_to_website option Also a couple of small logic fixes. --- .../rest_api/controllers/services/rest.php | 46 ++++++++++++++++--- 1 file changed, 40 insertions(+), 6 deletions(-) diff --git a/modules/rest_api/controllers/services/rest.php b/modules/rest_api/controllers/services/rest.php index 7a9217c883..c2b32c087c 100644 --- a/modules/rest_api/controllers/services/rest.php +++ b/modules/rest_api/controllers/services/rest.php @@ -1111,6 +1111,40 @@ private function getColumnsTemplate(&$postObj) { } } + /** + * A cached lookup of the websites that are available for a sharing mode. + * + * @param integer $websiteId + * ID of the website that is receiving the shared data. + * + * @return array + * List of website IDs that will share their data. + */ + private function getSharedWebsiteList($websiteId, $sharing = 'reporting') { + $tag = "website-shares-$websiteId"; + $cacheId = "$tag-$sharing"; + $cache = Cache::instance(); + if ($cached = $cache->get($cacheId)) { + return explode(',', $cached); + } + $qry = $this->db->select('to_website_id') + ->from('index_websites_website_agreements') + ->where([ + "receive_for_$sharing" => 't', + 'from_website_id' => $websiteId + ]) + ->get()->result(); + $ids = array(); + foreach ($qry as $row) { + $ids[] = $row->to_website_id; + } + // Tag all cache entries for this website so they can be cleared together + // when changes are saved. Also note the cached entry is an imploded string + // so we benefit from sharing cache hits with the reporting engine. + $cache->set($cacheId, implode(',', $ids), $tag); + return $ids; + } + /** * Adds permissions filters to ES search, based on website ID and user ID. * @@ -1123,12 +1157,12 @@ private function getColumnsTemplate(&$postObj) { */ private function applyEsPermissionsQuery(&$postObj) { $filters = []; - if ($this->esConfig['limit_to_own_data'] && !$this->allowAllData && RestObjects::$clientUserId) { + if (!empty($this->esConfig['limit_to_own_data']) && !$this->allowAllData && RestObjects::$clientUserId) { $filters[] = ['term' => ['metadata.created_by_id' => RestObjects::$clientUserId]]; } - if ($this->esConfig['limit_to_website'] && RestObjects::$clientWebsiteId) { - // @todo Expand to include record sharing. - $filters[] = ['term' => ['metadata.website.id' => RestObjects::$clientWebsiteId]]; + if (!empty($this->esConfig['limit_to_website']) && RestObjects::$clientWebsiteId) { + // @todo Support for other sharing modes in JWT claims. + $filters[] = ['terms' => ['metadata.website.id' => $this->getSharedWebsiteList(RestObjects::$clientWebsiteId)]]; } if (count($filters) > 0) { if (!isset($postObj->query)) { @@ -3305,8 +3339,8 @@ private function authenticate() { kohana::log('debug', "Elasticsearch request to $this->elasticProxy not enabled for $method"); RestObjects::$apiResponse->fail('Unauthorized', 401, 'Unable to authorise'); } - if (!empty($this->clientConfig) && empty($this->clientConfig['elasticsearch']) || - !in_array($this->elasticProxy, $this->clientConfig['elasticsearch'])) { + if (!empty($this->clientConfig) && (empty($this->clientConfig['elasticsearch']) || + !in_array($this->elasticProxy, $this->clientConfig['elasticsearch']))) { kohana::log('debug', "Elasticsearch request to $this->elasticProxy not enabled for client"); RestObjects::$apiResponse->fail('Unauthorized', 401, 'Unable to authorise'); }