From 4b3e614e6e9a350a22075befa6c5ad783b18307c Mon Sep 17 00:00:00 2001 From: John van Breda Date: Tue, 21 Jan 2020 09:28:51 +0000 Subject: [PATCH 01/26] Trigger ensures child samples deleted Also sets updated metadata. --- .../202001210925_sample_delete.sql | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 modules/indicia_setup/db/version_3_2_0/202001210925_sample_delete.sql diff --git a/modules/indicia_setup/db/version_3_2_0/202001210925_sample_delete.sql b/modules/indicia_setup/db/version_3_2_0/202001210925_sample_delete.sql new file mode 100644 index 0000000000..a3aecf343d --- /dev/null +++ b/modules/indicia_setup/db/version_3_2_0/202001210925_sample_delete.sql @@ -0,0 +1,27 @@ +CREATE OR REPLACE FUNCTION cascade_sample_delete() RETURNS TRIGGER AS $$ + BEGIN + IF (OLD.deleted = false AND NEW.deleted = true) THEN + UPDATE occurrences + SET deleted = true, updated_on=now(), updated_by_id=new.updated_by_id + WHERE sample_id = OLD.id; + + UPDATE sample_attribute_values + SET deleted = true, updated_on=now(), updated_by_id=new.updated_by_id + WHERE sample_id = OLD.id; + + UPDATE sample_comments + SET deleted = true, updated_on=now(), updated_by_id=new.updated_by_id + WHERE sample_id = OLD.id; + + UPDATE sample_media + SET deleted = true, updated_on=now(), updated_by_id=new.updated_by_id + WHERE sample_id = OLD.id; + + UPDATE samples + SET deleted = true, updated_on=now(), updated_by_id=new.updated_by_id + WHERE parent_id = OLD.id; + + END IF; + RETURN OLD; +END; +$$ LANGUAGE 'plpgsql'; \ No newline at end of file From 310da3fc2004c28945565af7cbf2b259313deb7e Mon Sep 17 00:00:00 2001 From: John van Breda Date: Tue, 21 Jan 2020 14:44:58 +0000 Subject: [PATCH 02/26] Taxon path building changes * If several taxa in master list with same external key, then the allow_data_entry flag used to prioritise. * Introduces new temp table (master_list_paths) which simplifies the later queries slightly. --- .../cache_builder/config/cache_builder.php | 58 +++++++++++-------- 1 file changed, 34 insertions(+), 24 deletions(-) diff --git a/modules/cache_builder/config/cache_builder.php b/modules/cache_builder/config/cache_builder.php index 8c90f2b9a1..d1def318d5 100644 --- a/modules/cache_builder/config/cache_builder.php +++ b/modules/cache_builder/config/cache_builder.php @@ -285,7 +285,16 @@ INTO TEMPORARY ttl_path FROM q GROUP BY child_pref_ttl_id - ORDER BY child_pref_ttl_id;", + ORDER BY child_pref_ttl_id; + + SELECT DISTINCT ON (cttl.external_key) cttl.external_key, cttlall.id, tp.path + INTO TEMPORARY master_list_paths + FROM ttl_path tp + JOIN cache_taxa_taxon_lists cttl ON cttl.id=tp.child_pref_ttl_id + JOIN cache_taxa_taxon_lists cttlall ON cttlall.external_key=cttl.external_key + WHERE cttl.taxon_list_id=COALESCE(#master_list_id#, cttlall.taxon_list_id) + AND cttlall.preferred=true + ORDER BY cttl.external_key, cttl.allow_data_entry DESC;", 'Taxon paths' => " UPDATE cache_taxon_paths ctp SET path=tp.path, external_key=t.external_key @@ -313,38 +322,39 @@ 'Ranks' => " UPDATE cache_taxa_taxon_lists u SET family_taxa_taxon_list_id=cttlf.id, family_taxon=cttlf.taxon, - order_taxa_taxon_list_id=cttlo.id, order_taxon=cttlo.taxon, - kingdom_taxa_taxon_list_id=cttlk.id, kingdom_taxon=cttlk.taxon - FROM cache_taxa_taxon_lists cttl - -- Ensure only changed taxon concepts are updated - JOIN descendants nu ON nu.id=cttl.preferred_taxa_taxon_list_id - JOIN cache_taxon_paths ctp ON ctp.external_key=cttl.external_key AND ctp.taxon_list_id=#master_list_id# - LEFT JOIN cache_taxa_taxon_lists cttlf ON cttlf.taxon_meaning_id=ANY(ctp.path) and cttlf.taxon_rank='Family' and cttlf.taxon_list_id=#master_list_id# AND cttlf.preferred=true - LEFT JOIN cache_taxa_taxon_lists cttlo ON cttlo.taxon_meaning_id=ANY(ctp.path) and cttlo.taxon_rank='Order' and cttlo.taxon_list_id=#master_list_id# AND cttlo.preferred=true - LEFT JOIN cache_taxa_taxon_lists cttlk ON cttlk.taxon_meaning_id=ANY(ctp.path) and cttlk.taxon_rank='Kingdom' and cttlk.taxon_list_id=#master_list_id# AND cttlk.preferred=true - WHERE cttl.taxon_meaning_id=u.taxon_meaning_id + order_taxa_taxon_list_id=cttlo.id, order_taxon=cttlo.taxon, + kingdom_taxa_taxon_list_id=cttlk.id, kingdom_taxon=cttlk.taxon + FROM master_list_paths mlp + JOIN descendants nu ON nu.id=mlp.id + LEFT JOIN cache_taxa_taxon_lists cttlf ON cttlf.taxon_meaning_id=ANY(mlp.path) and cttlf.taxon_rank='Family' + AND cttlf.taxon_list_id=#master_list_id# AND cttlf.preferred=true AND cttlf.allow_data_entry=true + LEFT JOIN cache_taxa_taxon_lists cttlo ON cttlo.taxon_meaning_id=ANY(mlp.path) and cttlo.taxon_rank='Order' + AND cttlo.taxon_list_id=#master_list_id# AND cttlo.preferred=true AND cttlo.allow_data_entry=true + LEFT JOIN cache_taxa_taxon_lists cttlk ON cttlk.taxon_meaning_id=ANY(mlp.path) and cttlk.taxon_rank='Kingdom' + AND cttlk.taxon_list_id=#master_list_id# AND cttlk.preferred=true AND cttlk.allow_data_entry=true + WHERE mlp.external_key=u.external_key AND (COALESCE(u.family_taxa_taxon_list_id, 0)<>COALESCE(cttlf.id, 0) - OR COALESCE(u.family_taxon, '')<>COALESCE(cttlf.taxon, '') - OR COALESCE(u.order_taxa_taxon_list_id, 0)<>COALESCE(cttlo.id, 0) - OR COALESCE(u.order_taxon, '')<>COALESCE(cttlo.taxon, '') - OR COALESCE(u.kingdom_taxa_taxon_list_id, 0)<>COALESCE(cttlk.id, 0) - OR COALESCE(u.kingdom_taxon, '')<>COALESCE(cttlk.taxon, '') + OR COALESCE(u.family_taxon, '')<>COALESCE(cttlf.taxon, '') + OR COALESCE(u.order_taxa_taxon_list_id, 0)<>COALESCE(cttlo.id, 0) + OR COALESCE(u.order_taxon, '')<>COALESCE(cttlo.taxon, '') + OR COALESCE(u.kingdom_taxa_taxon_list_id, 0)<>COALESCE(cttlk.id, 0) + OR COALESCE(u.kingdom_taxon, '')<>COALESCE(cttlk.taxon, '') ); UPDATE cache_occurrences_functional u - SET family_taxa_taxon_list_id=cttlf.id, - taxon_path=ctp.path + SET family_taxa_taxon_list_id=cttl.family_taxa_taxon_list_id, + taxon_path=mlp.path FROM cache_taxa_taxon_lists cttl -- Ensure only changed taxon concepts are updated JOIN descendants nu ON nu.id=cttl.preferred_taxa_taxon_list_id - JOIN cache_taxon_paths ctp ON ctp.external_key=cttl.external_key AND ctp.taxon_list_id=COALESCE(#master_list_id#, cttl.taxon_list_id) - LEFT JOIN cache_taxa_taxon_lists cttlf ON ctp.path @> ARRAY[cttlf.taxon_meaning_id] and cttlf.taxon_rank='Family' and cttlf.taxon_list_id=#master_list_id# AND cttlf.preferred=true - WHERE cttl.taxon_meaning_id=u.taxon_meaning_id - AND (COALESCE(u.family_taxa_taxon_list_id, 0)<>COALESCE(cttlf.id, 0) - OR COALESCE(u.taxon_path, ARRAY[]::integer[])<>COALESCE(ctp.path, ARRAY[]::integer[]));", + JOIN master_list_paths mlp ON mlp.external_key=cttl.external_key; + WHERE cttl.id=u.taxa_taxon_list_id + AND (COALESCE(u.family_taxa_taxon_list_id, 0)<>COALESCE(cttl.family_taxa_taxon_list_id, 0) + OR COALESCE(u.taxon_path, ARRAY[]::integer[])<>COALESCE(mlp.path, ARRAY[]::integer[]));", "teardown" => " DROP TABLE descendants; - DROP TABLE ttl_path;", + DROP TABLE ttl_path; + DROP TABLE master_list_paths;", ); // -------------------------------------------------------------------------------------------------------------------------- From 4a72dbc2e65fdabecb510390e2dd9313739cb62f Mon Sep 17 00:00:00 2001 From: John van Breda Date: Tue, 28 Jan 2020 16:56:11 +0000 Subject: [PATCH 03/26] Adds number of problems to import errors file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Helps ensure problems on several rows aren’t missed since Excel doesn’t display all the content after a linefeed by default. --- .../controllers/services/import.php | 42 +++++++++++++------ 1 file changed, 29 insertions(+), 13 deletions(-) diff --git a/modules/indicia_svc_import/controllers/services/import.php b/modules/indicia_svc_import/controllers/services/import.php index 3e089c2368..ac1ecfc905 100644 --- a/modules/indicia_svc_import/controllers/services/import.php +++ b/modules/indicia_svc_import/controllers/services/import.php @@ -381,11 +381,12 @@ public function upload() { // Create the file pointer, plus one for errors. $handle = fopen($csvTempFile, "r"); $this->checkIfUtf8($metadata, $handle); + $existingNoOfProblemsColIdx = FALSE; $existingProblemColIdx = FALSE; $existingErrorRowNoColIdx = FALSE; $existingImportGuidColIdx = FALSE; $errorHandle = $this->getErrorFileHandle($csvTempFile, $handle, $supportsImportGuid, - $existingProblemColIdx, $existingErrorRowNoColIdx, $existingImportGuidColIdx); + $existingNoOfProblemsColIdx, $existingProblemColIdx, $existingErrorRowNoColIdx, $existingImportGuidColIdx); $count = 0; $limit = (isset($_GET['limit']) ? $_GET['limit'] : FALSE); $filepos = (isset($_GET['filepos']) ? $_GET['filepos'] : 0); @@ -596,8 +597,8 @@ public function upload() { } if (!$ok) { $this->logError( - $data, 'ID specified in import row but not being used to lookup an existing record.', - $existingProblemColIdx, $existingErrorRowNoColIdx, + $data, 1, 'ID specified in import row but not being used to lookup an existing record.', + $existingNoOfProblemsColIdx, $existingProblemColIdx, $existingErrorRowNoColIdx, $errorHandle, $count + $offset + 1, $supportsImportGuid && $existingImportGuidColIdx === FALSE ? $metadata['guid'] : '', $metadata @@ -617,8 +618,8 @@ public function upload() { } catch (Exception $e) { $this->logError( - $data, $e->getMessage(), - $existingProblemColIdx, $existingErrorRowNoColIdx, + $data, 1, $e->getMessage(), + $existingNoOfProblemsColIdx, $existingProblemColIdx, $existingErrorRowNoColIdx, $errorHandle, $count + $offset + 1, $supportsImportGuid && $existingImportGuidColIdx === FALSE ? $metadata['guid'] : '', $metadata @@ -789,16 +790,16 @@ public function upload() { if (($id = $modelToSubmit->submit()) == NULL) { // Record has errors - now embedded in model, so dump them into the // error file. - $errors = array(); + $errors = []; foreach ($modelToSubmit->getAllErrors() as $field => $msg) { $fldTitle = array_search($field, $metadata['mappings']); $fldTitle = $fldTitle ? $fldTitle : $field; $errors[] = "$fldTitle: $msg"; } - $errors = implode("\n", array_unique($errors)); + $errors = array_unique($errors); $this->logError( - $data, $errors, - $existingProblemColIdx, $existingErrorRowNoColIdx, + $data, count($errors), implode("\n", $errors), + $existingNoOfProblemsColIdx, $existingProblemColIdx, $existingErrorRowNoColIdx, $errorHandle, $count + $offset + 1, $supportsImportGuid && $existingImportGuidColIdx === FALSE ? $metadata['guid'] : '', $metadata @@ -819,8 +820,8 @@ public function upload() { else { $error = "Could not identify whether record is main record or synonym : " . $saveArray['synonym:tracker']; $this->logError( - $data, $error, - $existingProblemColIdx, $existingErrorRowNoColIdx, + $data, 1, $error, + $existingNoOfProblemsColIdx, $existingProblemColIdx, $existingErrorRowNoColIdx, $errorHandle, $count + $offset + 1, $supportsImportGuid && $existingImportGuidColIdx === FALSE ? $metadata['guid'] : '', $metadata @@ -867,13 +868,21 @@ public function upload() { */ private function logError( $data, + $noOfProblems, $error, + $existingNoOfProblemsColIdx, $existingProblemColIdx, $existingErrorRowNoColIdx, $errorHandle, $total, $importGuidToAppend, &$metadata) { + if ($existingNoOfProblemsColIdx === FALSE) { + $data[] = $noOfProblems; + } + else { + $data[$existingNoOfProblemsColIdx] = $error; + } if ($existingProblemColIdx === FALSE) { $data[] = $error; } @@ -1284,6 +1293,8 @@ private function getMetadata($csvTempFile) { * @param bool $supportsImportGuid * True if the model supports tracking imports by GUID, therefore the error * file needs to link the error row to its original GUID. + * @param int $existingNoOfProblemsColIdx + * Returns the column index that the current row's number of problems is in. * @param int $existingProblemColIdx * Returns the column index that the current row's error message is in. * @param int $existingProblemRowNoColIdx @@ -1298,6 +1309,7 @@ private function getMetadata($csvTempFile) { private function getErrorFileHandle($csvTempFile, $handle, $supportsImportGuid, + &$existingNoOfProblemsColIdx, &$existingProblemColIdx, &$existingProblemRowNoColIdx, &$existingImportGuidColIdx) { @@ -1311,9 +1323,13 @@ private function getErrorFileHandle($csvTempFile, $headers = fgetcsv($handle, 1000, ","); $existingImportGuidColIdx = FALSE; if ($needHeaders) { - $existingProblemColIdx = array_search('Problem', $headers); + $existingNoOfProblemsColIdx = array_search('Number of problems', $headers); + if ($existingNoOfProblemsColIdx === FALSE) { + $headers[] = 'Number of problems'; + } + $existingProblemColIdx = array_search('Problem description', $headers); if ($existingProblemColIdx === FALSE) { - $headers[] = 'Problem'; + $headers[] = 'Problem description'; } $existingProblemRowNoColIdx = array_search('Row no.', $headers); if ($existingProblemRowNoColIdx === FALSE) { From c715441d434e7fede21fb9863b47aa855a24aeff Mon Sep 17 00:00:00 2001 From: John van Breda Date: Tue, 4 Feb 2020 12:39:05 +0000 Subject: [PATCH 04/26] Corrects script positions so OK if data_cleaner not installed. --- .../db/version_0_1_2/201401010909_searchterm_id_difficulty.sql | 0 .../201611121607_cache_occurrences_id_diff_data.sql | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename modules/{cache_builder => data_cleaner}/db/version_0_1_2/201401010909_searchterm_id_difficulty.sql (100%) rename modules/{cache_builder => data_cleaner}/db/version_1_15_0/201611121607_cache_occurrences_id_diff_data.sql (100%) diff --git a/modules/cache_builder/db/version_0_1_2/201401010909_searchterm_id_difficulty.sql b/modules/data_cleaner/db/version_0_1_2/201401010909_searchterm_id_difficulty.sql similarity index 100% rename from modules/cache_builder/db/version_0_1_2/201401010909_searchterm_id_difficulty.sql rename to modules/data_cleaner/db/version_0_1_2/201401010909_searchterm_id_difficulty.sql diff --git a/modules/cache_builder/db/version_1_15_0/201611121607_cache_occurrences_id_diff_data.sql b/modules/data_cleaner/db/version_1_15_0/201611121607_cache_occurrences_id_diff_data.sql similarity index 100% rename from modules/cache_builder/db/version_1_15_0/201611121607_cache_occurrences_id_diff_data.sql rename to modules/data_cleaner/db/version_1_15_0/201611121607_cache_occurrences_id_diff_data.sql From f5b91cd0391176d4cb364492725010f97fbb52e2 Mon Sep 17 00:00:00 2001 From: John van Breda Date: Wed, 5 Feb 2020 09:47:14 +0000 Subject: [PATCH 05/26] Wrap HTML emails in tags Helps reduce email spam scores. --- application/controllers/scheduled_tasks.php | 13 ++++++++----- .../plugins/notification_emails.php | 2 +- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/application/controllers/scheduled_tasks.php b/application/controllers/scheduled_tasks.php index 422252c322..a3e53c1010 100644 --- a/application/controllers/scheduled_tasks.php +++ b/application/controllers/scheduled_tasks.php @@ -409,7 +409,7 @@ private function sendEmail($notificationIds, $swift, $userId, $emailContent, $cc kohana::lang('misc.notification_subject') : kohana::config('email.notification_subject'); $message = new Swift_Message( sprintf($subject, kohana::config('email.server_name')), - $emailContent, + "$emailContent", 'text/html' ); $recipients = new Swift_RecipientList(); @@ -419,8 +419,8 @@ private function sendEmail($notificationIds, $swift, $userId, $emailContent, $cc $recipients->addCc(trim($ccEmail)); } // Send the email. - $swift->send($message, $recipients, $email_config['address']); - kohana::log('info', 'Email notification sent to ' . $user->email_address); + $sent = $swift->send($message, $recipients, $email_config['address']); + kohana::log('info', "$sent email notification(s) sent to $user->email_address"); } } catch (Exception $e) { @@ -597,8 +597,11 @@ private function doRecordOwnerNotifications($swift) { $this->addArrayToEmailTable($email->occurrence_id, $attrArray, $emailContent); $emailContent .= ""; - $message = new Swift_Message(kohana::lang('misc.notification_subject', kohana::config('email.server_name')), $emailContent, - 'text/html'); + $message = new Swift_Message( + kohana::lang('misc.notification_subject', kohana::config('email.server_name')), + "$emailContent", + 'text/html' + ); $recipients = new Swift_RecipientList(); $recipients->addTo($email->email_address); // Send the email. diff --git a/modules/notification_emails/plugins/notification_emails.php b/modules/notification_emails/plugins/notification_emails.php index 89f551a627..3211e3b89c 100644 --- a/modules/notification_emails/plugins/notification_emails.php +++ b/modules/notification_emails/plugins/notification_emails.php @@ -568,7 +568,7 @@ function send_out_user_email( } $emailContent .= '' . $notificationsLinkText . '
'; } - $message = new Swift_Message($emailSubject, $emailContent, 'text/html'); + $message = new Swift_Message($emailSubject, "$emailContent", 'text/html'); if ($highPriority === TRUE) { $message->setPriority(2); } From dfa5adecf3387374b3cb03a164c21d19b5d794fb Mon Sep 17 00:00:00 2001 From: John van Breda Date: Wed, 5 Feb 2020 11:44:29 +0000 Subject: [PATCH 06/26] Email from name set Reduces spam assassin score. --- application/controllers/scheduled_tasks.php | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/application/controllers/scheduled_tasks.php b/application/controllers/scheduled_tasks.php index a3e53c1010..b359e1edf2 100644 --- a/application/controllers/scheduled_tasks.php +++ b/application/controllers/scheduled_tasks.php @@ -394,7 +394,7 @@ private function sendEmail($notificationIds, $swift, $userId, $emailContent, $cc ->update(); $email_config = Kohana::config('email'); $userResults = $this->db - ->select('people.email_address') + ->select('people.email_address, people.first_name, people.surname') ->from('people') ->join('users', 'users.person_id', 'people.id') ->where('users.id', $userId) @@ -413,7 +413,8 @@ private function sendEmail($notificationIds, $swift, $userId, $emailContent, $cc 'text/html' ); $recipients = new Swift_RecipientList(); - $recipients->addTo($user->email_address); + $name = empty($user->first_name) ? "$user->surname" : "$user->first_name $user->surname"; + $recipients->addTo($user->email_address, $name); $cc = explode(',', $cc); foreach ($cc as $ccEmail) { $recipients->addCc(trim($ccEmail)); From a4a77c5337249439a5da8ae12ca1b9469e046afa Mon Sep 17 00:00:00 2001 From: John van Breda Date: Wed, 5 Feb 2020 11:51:37 +0000 Subject: [PATCH 07/26] Split out slow script Schema changes done inline, so other later scripts not affected. --- ...10945_occurrence_record_status_updates.sql | 24 +++---------------- ..._occurrence_record_status_updates_data.sql | 17 +++++++++++++ 2 files changed, 20 insertions(+), 21 deletions(-) create mode 100644 modules/indicia_setup/db/version_0_9_1/201504210946_occurrence_record_status_updates_data.sql diff --git a/modules/indicia_setup/db/version_0_9_1/201504210945_occurrence_record_status_updates.sql b/modules/indicia_setup/db/version_0_9_1/201504210945_occurrence_record_status_updates.sql index 54aeae9083..aaf0468120 100644 --- a/modules/indicia_setup/db/version_0_9_1/201504210945_occurrence_record_status_updates.sql +++ b/modules/indicia_setup/db/version_0_9_1/201504210945_occurrence_record_status_updates.sql @@ -1,8 +1,8 @@ --- #slow script# ALTER TABLE occurrences - ADD COLUMN record_substatus smallint CONSTRAINT occurrences_record_substatus_check CHECK (record_substatus BETWEEN 1 AND 5), + ADD COLUMN record_substatus smallint CONSTRAINT occurrences_record_substatus_check CHECK (record_substatus BETWEEN 1 AND 5), ADD COLUMN record_decision_source character CONSTRAINT record_decision_source_check CHECK (record_decision_source IN ('H', 'M')); +COMMENT ON COLUMN occurrences.record_status IS 'Status of this record. I - in progress, C - completed, V - verified, R - rejected, D - dubious/queried (deprecated), T - test.'; COMMENT ON COLUMN occurrences.record_substatus IS 'Provides additional detail on the record status. Values are: 1=accepted as correct, 2=accepted as considered correct, 3=plausible, 4=not accepted as unable to verify, 5=not accepted, incorrect. Null for unchecked records.'; COMMENT ON COLUMN occurrences.record_decision_source IS 'Defines if the record status decision was by a human (H) or machine (M).'; @@ -11,26 +11,8 @@ ALTER TABLE occurrence_comments -- Add columns to log occurrence status changes with the comments ADD COLUMN record_status character(1) CONSTRAINT occurrence_comments_record_status_check CHECK (record_status = ANY (ARRAY['I'::bpchar, 'C'::bpchar, 'V'::bpchar, 'R'::bpchar, 'T'::bpchar, 'D'::bpchar])), ADD COLUMN record_substatus smallint CONSTRAINT occurrence_comments_record_substatus_check CHECK (record_substatus BETWEEN 1 AND 5); -UPDATE occurrence_comments SET query=FALSE; ALTER TABLE occurrence_comments ALTER query SET default false; COMMENT ON COLUMN occurrence_comments.query IS 'Set to true if this comment asks a question that needs a response.'; COMMENT ON COLUMN occurrence_comments.record_status IS 'If this comment relates to the changing of the status of a record, then determines the status it was changed to. Provides and audit trail of verification changes.'; -COMMENT ON COLUMN occurrence_comments.record_substatus IS 'As record_status but provides an audit trail of the occurrences.record_substatus field'; - -UPDATE occurrence_comments oc -SET query=true -FROM occurrences o -WHERE (oc.comment LIKE 'I emailed this record%' OR oc.comment LIKE 'Query%') -AND oc.occurrence_id=o.id -AND o.record_status IN ('S', 'C', 'D'); - --- Sent status is no longer valid. We track queries via comments instead. -UPDATE occurrences SET record_status='C' where record_status='S'; - -ALTER TABLE occurrences DROP CONSTRAINT occurrences_record_status_check; - -ALTER TABLE occurrences - ADD CONSTRAINT occurrences_record_status_check CHECK (record_status = ANY (ARRAY['I'::bpchar, 'C'::bpchar, 'V'::bpchar, 'R'::bpchar, 'T'::bpchar, 'D'::bpchar])); - -COMMENT ON COLUMN occurrences.record_status IS 'Status of this record. I - in progress, C - completed, V - verified, R - rejected, D - dubious/queried (deprecated), T - test.'; +COMMENT ON COLUMN occurrence_comments.record_substatus IS 'As record_status but provides an audit trail of the occurrences.record_substatus field'; \ No newline at end of file diff --git a/modules/indicia_setup/db/version_0_9_1/201504210946_occurrence_record_status_updates_data.sql b/modules/indicia_setup/db/version_0_9_1/201504210946_occurrence_record_status_updates_data.sql new file mode 100644 index 0000000000..5081fec447 --- /dev/null +++ b/modules/indicia_setup/db/version_0_9_1/201504210946_occurrence_record_status_updates_data.sql @@ -0,0 +1,17 @@ +-- #slow script# +UPDATE occurrence_comments SET query=FALSE; + +UPDATE occurrence_comments oc +SET query=true +FROM occurrences o +WHERE (oc.comment LIKE 'I emailed this record%' OR oc.comment LIKE 'Query%') +AND oc.occurrence_id=o.id +AND o.record_status IN ('S', 'C', 'D'); + +-- Sent status is no longer valid. We track queries via comments instead. +UPDATE occurrences SET record_status='C' where record_status='S'; + +ALTER TABLE occurrences DROP CONSTRAINT occurrences_record_status_check; + +ALTER TABLE occurrences + ADD CONSTRAINT occurrences_record_status_check CHECK (record_status = ANY (ARRAY['I'::bpchar, 'C'::bpchar, 'V'::bpchar, 'R'::bpchar, 'T'::bpchar, 'D'::bpchar])); From 4c90b85f647fe7523f65debec814cf890fc222fb Mon Sep 17 00:00:00 2001 From: John van Breda Date: Wed, 5 Feb 2020 14:05:35 +0000 Subject: [PATCH 08/26] Separate slow data update from schema changes Prevents failures in upgrade of already populated databases. --- .../db/version_1_28_0/201703171025_confidential.sql | 10 +--------- .../version_1_28_0/201703171026_confidential_data.sql | 7 +++++++ 2 files changed, 8 insertions(+), 9 deletions(-) create mode 100644 modules/cache_builder/db/version_1_28_0/201703171026_confidential_data.sql diff --git a/modules/cache_builder/db/version_1_28_0/201703171025_confidential.sql b/modules/cache_builder/db/version_1_28_0/201703171025_confidential.sql index 77c2806e91..12d9caa9b9 100644 --- a/modules/cache_builder/db/version_1_28_0/201703171025_confidential.sql +++ b/modules/cache_builder/db/version_1_28_0/201703171025_confidential.sql @@ -1,10 +1,2 @@ --- #slow script# - ALTER TABLE cache_occurrences_functional - ADD COLUMN confidential BOOLEAN DEFAULT FALSE; - -UPDATE cache_occurrences_functional co - SET confidential=true -FROM occurrences o -WHERE o.id=co.id -AND o.confidential=true; \ No newline at end of file + ADD COLUMN confidential BOOLEAN DEFAULT FALSE; \ No newline at end of file diff --git a/modules/cache_builder/db/version_1_28_0/201703171026_confidential_data.sql b/modules/cache_builder/db/version_1_28_0/201703171026_confidential_data.sql new file mode 100644 index 0000000000..8554d69a76 --- /dev/null +++ b/modules/cache_builder/db/version_1_28_0/201703171026_confidential_data.sql @@ -0,0 +1,7 @@ +-- #slow script# + +UPDATE cache_occurrences_functional co + SET confidential=true +FROM occurrences o +WHERE o.id=co.id +AND o.confidential=true; \ No newline at end of file From 2ffb732cac5fa75b752a748ded4f568e6f6181b1 Mon Sep 17 00:00:00 2001 From: John van Breda Date: Wed, 5 Feb 2020 14:17:08 +0000 Subject: [PATCH 09/26] Separate data from schema changes Prevents script running order issues. --- ...4071219_cache_taxon_searchterms_external_key.sql | 13 ++----------- ...20_cache_taxon_searchterms_external_key_data.sql | 7 +++++++ ...504211044_cache_occurrences_record_substatus.sql | 1 - .../201512131953_occurrences_family_filter.sql | 8 +------- .../201512131954_occurrences_family_filter_data.sql | 6 ++++++ 5 files changed, 16 insertions(+), 19 deletions(-) create mode 100644 modules/cache_builder/db/version_0_1_2/201504071220_cache_taxon_searchterms_external_key_data.sql create mode 100644 modules/cache_builder/db/version_0_1_2/201512131954_occurrences_family_filter_data.sql diff --git a/modules/cache_builder/db/version_0_1_2/201504071219_cache_taxon_searchterms_external_key.sql b/modules/cache_builder/db/version_0_1_2/201504071219_cache_taxon_searchterms_external_key.sql index 229878649f..e7168e3494 100644 --- a/modules/cache_builder/db/version_0_1_2/201504071219_cache_taxon_searchterms_external_key.sql +++ b/modules/cache_builder/db/version_0_1_2/201504071219_cache_taxon_searchterms_external_key.sql @@ -1,13 +1,4 @@ --- #slow script# --- Took 27 minutes on testwarehouse. - -ALTER TABLE cache_taxon_searchterms +ALTER TABLE cache_taxon_searchterms ADD COLUMN external_key character varying; COMMENT ON COLUMN cache_taxon_searchterms.external_key - IS 'External identifier for the taxon.'; - -UPDATE cache_taxon_searchterms cts -SET external_key = cttl.external_key -FROM cache_taxa_taxon_lists cttl -WHERE cttl.id=cts.taxa_taxon_list_id -AND cttl.external_key IS NOT NULL; \ No newline at end of file + IS 'External identifier for the taxon.'; \ No newline at end of file diff --git a/modules/cache_builder/db/version_0_1_2/201504071220_cache_taxon_searchterms_external_key_data.sql b/modules/cache_builder/db/version_0_1_2/201504071220_cache_taxon_searchterms_external_key_data.sql new file mode 100644 index 0000000000..c20633e330 --- /dev/null +++ b/modules/cache_builder/db/version_0_1_2/201504071220_cache_taxon_searchterms_external_key_data.sql @@ -0,0 +1,7 @@ +-- #slow script# + +UPDATE cache_taxon_searchterms cts +SET external_key = cttl.external_key +FROM cache_taxa_taxon_lists cttl +WHERE cttl.id=cts.taxa_taxon_list_id +AND cttl.external_key IS NOT NULL; \ No newline at end of file diff --git a/modules/cache_builder/db/version_0_1_2/201504211044_cache_occurrences_record_substatus.sql b/modules/cache_builder/db/version_0_1_2/201504211044_cache_occurrences_record_substatus.sql index baffa21cef..749511976e 100644 --- a/modules/cache_builder/db/version_0_1_2/201504211044_cache_occurrences_record_substatus.sql +++ b/modules/cache_builder/db/version_0_1_2/201504211044_cache_occurrences_record_substatus.sql @@ -1,4 +1,3 @@ --- #slow script# -- move into a cache_builder script ALTER TABLE cache_occurrences ADD COLUMN record_substatus smallint, diff --git a/modules/cache_builder/db/version_0_1_2/201512131953_occurrences_family_filter.sql b/modules/cache_builder/db/version_0_1_2/201512131953_occurrences_family_filter.sql index a0d784e6f4..d789335754 100644 --- a/modules/cache_builder/db/version_0_1_2/201512131953_occurrences_family_filter.sql +++ b/modules/cache_builder/db/version_0_1_2/201512131953_occurrences_family_filter.sql @@ -1,9 +1,3 @@ --- #slow script# -- Adds family_taxa_taxon_list_id to cache_occurrences since it makes a big improvement to filtering ALTER TABLE cache_occurrences - ADD family_taxa_taxon_list_id integer NULL; - -UPDATE cache_occurrences o -SET family_taxa_taxon_list_id=cttl.family_taxa_taxon_list_id -FROM cache_taxa_taxon_lists cttl -WHERE cttl.id=o.taxa_taxon_list_id; \ No newline at end of file + ADD family_taxa_taxon_list_id integer NULL; \ No newline at end of file diff --git a/modules/cache_builder/db/version_0_1_2/201512131954_occurrences_family_filter_data.sql b/modules/cache_builder/db/version_0_1_2/201512131954_occurrences_family_filter_data.sql new file mode 100644 index 0000000000..e33ccc389c --- /dev/null +++ b/modules/cache_builder/db/version_0_1_2/201512131954_occurrences_family_filter_data.sql @@ -0,0 +1,6 @@ +-- #slow script# + +UPDATE cache_occurrences o +SET family_taxa_taxon_list_id=cttl.family_taxa_taxon_list_id +FROM cache_taxa_taxon_lists cttl +WHERE cttl.id=o.taxa_taxon_list_id; \ No newline at end of file From da9d21d1ec713d91ff2a1cd0ade7f69a8a9de29c Mon Sep 17 00:00:00 2001 From: John van Breda Date: Wed, 5 Feb 2020 16:12:56 +0000 Subject: [PATCH 10/26] Shouldn't be slow script Otherwise messes up script run order. --- .../db/version_1_0_0/201512300956_cache_occurrences_view.sql | 1 - 1 file changed, 1 deletion(-) diff --git a/modules/cache_builder/db/version_1_0_0/201512300956_cache_occurrences_view.sql b/modules/cache_builder/db/version_1_0_0/201512300956_cache_occurrences_view.sql index 01f32b4b6a..13f122fc86 100644 --- a/modules/cache_builder/db/version_1_0_0/201512300956_cache_occurrences_view.sql +++ b/modules/cache_builder/db/version_1_0_0/201512300956_cache_occurrences_view.sql @@ -1,4 +1,3 @@ --- #slow script# DROP TABLE cache_occurrences; -- Create a view to ease the migration path to the new cache occurrences structure. From c2e02bf132c649b1edc3edd1f3d19194f7c6e55f Mon Sep 17 00:00:00 2001 From: John van Breda Date: Thu, 13 Feb 2020 14:16:16 +0000 Subject: [PATCH 11/26] Duplicate removed --- application/models/sample.php | 8 -------- 1 file changed, 8 deletions(-) diff --git a/application/models/sample.php b/application/models/sample.php index b8da8fa52a..d8c61739bb 100644 --- a/application/models/sample.php +++ b/application/models/sample.php @@ -93,14 +93,6 @@ class Sample_Model extends ORM_Tree { ['fieldName' => 'sample:external_key'], ], ], - [ - 'description' => 'Sample External Key', - 'fields' => [ - ['fieldName' => 'survey_id', 'notInMappings' => TRUE], - ['fieldName' => 'sample:sample_method_id'], - ['fieldName' => 'sample:external_key'], - ], - ], [ 'description' => 'Grid Ref and Date', 'fields' => [ From c6f0fd3c2801b0f74bd4a84b18a10935485ae142 Mon Sep 17 00:00:00 2001 From: John van Breda Date: Thu, 20 Feb 2020 13:03:33 +0000 Subject: [PATCH 12/26] Adds error info in DWCa download failures. --- modules/indicia_svc_data/controllers/data_service_base.php | 3 +++ 1 file changed, 3 insertions(+) diff --git a/modules/indicia_svc_data/controllers/data_service_base.php b/modules/indicia_svc_data/controllers/data_service_base.php index a2c6377a2f..b2817f6a7c 100644 --- a/modules/indicia_svc_data/controllers/data_service_base.php +++ b/modules/indicia_svc_data/controllers/data_service_base.php @@ -72,6 +72,9 @@ protected function handle_request() { if ($mode !== 'dwca') { throw $e; } + elseif (!$this->failedRequestDetail) { + $this->failedRequestDetail = $e->getMessage(); + } } switch ($mode) { case 'json': From a0f28f3910e04f867cca5c1352f64afc68cc0a4a Mon Sep 17 00:00:00 2001 From: John van Breda Date: Mon, 24 Feb 2020 12:25:06 +0000 Subject: [PATCH 13/26] Index to improve indexing of sensitive data to ES --- .../db/version_3_2_0/20200224_es_sensitivity_index.sql | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 modules/indicia_setup/db/version_3_2_0/20200224_es_sensitivity_index.sql diff --git a/modules/indicia_setup/db/version_3_2_0/20200224_es_sensitivity_index.sql b/modules/indicia_setup/db/version_3_2_0/20200224_es_sensitivity_index.sql new file mode 100644 index 0000000000..6b51ffc79f --- /dev/null +++ b/modules/indicia_setup/db/version_3_2_0/20200224_es_sensitivity_index.sql @@ -0,0 +1,3 @@ +-- Improves performance when extracting sensitive data to move to Elasticsearch. +CREATE INDEX IF NOT EXISTS ix_cache_occ_functional_id_tracking_sens +ON cache_occurrences_functional(id, tracking) WHERE sensitive=true; \ No newline at end of file From 1e1b7fe5814547948ee87dc60cec2b9c6e11f297 Mon Sep 17 00:00:00 2001 From: John van Breda Date: Tue, 3 Mar 2020 15:22:45 +0000 Subject: [PATCH 14/26] Removed stray semi-colon in SQL --- modules/cache_builder/config/cache_builder.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/cache_builder/config/cache_builder.php b/modules/cache_builder/config/cache_builder.php index d1def318d5..6774bffa43 100644 --- a/modules/cache_builder/config/cache_builder.php +++ b/modules/cache_builder/config/cache_builder.php @@ -347,7 +347,7 @@ FROM cache_taxa_taxon_lists cttl -- Ensure only changed taxon concepts are updated JOIN descendants nu ON nu.id=cttl.preferred_taxa_taxon_list_id - JOIN master_list_paths mlp ON mlp.external_key=cttl.external_key; + JOIN master_list_paths mlp ON mlp.external_key=cttl.external_key WHERE cttl.id=u.taxa_taxon_list_id AND (COALESCE(u.family_taxa_taxon_list_id, 0)<>COALESCE(cttl.family_taxa_taxon_list_id, 0) OR COALESCE(u.taxon_path, ARRAY[]::integer[])<>COALESCE(mlp.path, ARRAY[]::integer[]));", From 9eb357da19146df591f3bf3a16e844060c927815 Mon Sep 17 00:00:00 2001 From: John van Breda Date: Tue, 3 Mar 2020 16:24:47 +0000 Subject: [PATCH 15/26] Include required parameters info in REST API report response. --- modules/rest_api/controllers/services/rest.php | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modules/rest_api/controllers/services/rest.php b/modules/rest_api/controllers/services/rest.php index c31a9e9d4d..b42d03ea7f 100644 --- a/modules/rest_api/controllers/services/rest.php +++ b/modules/rest_api/controllers/services/rest.php @@ -2003,7 +2003,8 @@ private function getReportOutput(array $segments) { } elseif (isset($report['content']['parameterRequest'])) { // @todo: handle param requests - $this->apiResponse->fail('Bad request (parameters missing)', 400, "Missing parameters"); + $this->apiResponse->fail('Bad request (parameters missing)', 400, + "Missing parameters: " . implode(', ', array_keys($report['content']['parameterRequest']))); } } finally { From f1c4e9ad45498eeadb406b44486025d6c923e76a Mon Sep 17 00:00:00 2001 From: John van Breda Date: Tue, 10 Mar 2020 16:49:57 +0000 Subject: [PATCH 16/26] Reports required for associations data in Elasticsearch. --- .../list_for_elastic.xml | 42 +++++++++++++++++++ .../list_for_elastic_all.xml | 42 +++++++++++++++++++ 2 files changed, 84 insertions(+) create mode 100644 modules/occurrence_associations/reports/library/occurrence_associations/list_for_elastic.xml create mode 100644 modules/occurrence_associations/reports/library/occurrence_associations/list_for_elastic_all.xml diff --git a/modules/occurrence_associations/reports/library/occurrence_associations/list_for_elastic.xml b/modules/occurrence_associations/reports/library/occurrence_associations/list_for_elastic.xml new file mode 100644 index 0000000000..224a25ef4e --- /dev/null +++ b/modules/occurrence_associations/reports/library/occurrence_associations/list_for_elastic.xml @@ -0,0 +1,42 @@ + + + SELECT #columns# + FROM cache_occurrences_functional o + JOIN occurrence_associations oa ON oa.from_occurrence_id=o.id AND oa.deleted=false + JOIN cache_termlists_terms ctt ON ctt.id=oa.association_type_id + JOIN cache_occurrences_functional o2 ON o2.id=oa.to_occurrence_id + JOIN cache_taxa_taxon_lists cttl ON cttl.id=o.taxa_taxon_list_id + #agreements_join# + #filters# + #group_bys# + UNION + SELECT #columns# + FROM cache_occurrences_functional o + JOIN occurrence_associations oa ON oa.to_occurrence_id=o.id AND oa.deleted=false + JOIN cache_termlists_terms ctt ON ctt.id=oa.association_type_id + JOIN cache_occurrences_functional o2 ON o2.id=oa.from_occurrence_id + JOIN cache_taxa_taxon_lists cttl ON cttl.id=o.taxa_taxon_list_id + #agreements_join# + #filters# + #group_bys# + + + + oa.id > #last_id# + + + oa.updated_on >= '#tracking_date_from#' + + + + + + + + + + \ No newline at end of file diff --git a/modules/occurrence_associations/reports/library/occurrence_associations/list_for_elastic_all.xml b/modules/occurrence_associations/reports/library/occurrence_associations/list_for_elastic_all.xml new file mode 100644 index 0000000000..19d27316b4 --- /dev/null +++ b/modules/occurrence_associations/reports/library/occurrence_associations/list_for_elastic_all.xml @@ -0,0 +1,42 @@ + + + SELECT #columns# + FROM cache_occurrences_functional o + JOIN occurrence_associations oa ON oa.from_occurrence_id=o.id AND oa.deleted=false + JOIN cache_termlists_terms ctt ON ctt.id=oa.association_type_id + JOIN cache_occurrences_functional o2 ON o2.id=oa.to_occurrence_id + JOIN cache_taxa_taxon_lists cttl ON cttl.id=o.taxa_taxon_list_id + #filters# + #group_bys# + UNION + SELECT #columns# + FROM cache_occurrences_functional o + JOIN occurrence_associations oa ON oa.to_occurrence_id=o.id AND oa.deleted=false + JOIN cache_termlists_terms ctt ON ctt.id=oa.association_type_id + JOIN cache_occurrences_functional o2 ON o2.id=oa.from_occurrence_id + JOIN cache_taxa_taxon_lists cttl ON cttl.id=o.taxa_taxon_list_id + #filters# + #group_bys# + + + + oa.id > #last_id# + + + oa.updated_on >= '#tracking_date_from#' + + + + + + + + + + \ No newline at end of file From 570a0c026b38978390b00d1de8b5cfc829c326a8 Mon Sep 17 00:00:00 2001 From: John van Breda Date: Wed, 11 Mar 2020 09:24:39 +0000 Subject: [PATCH 17/26] Ensure report does not index too early Associations must only be indexed after the main occurrence records. --- .../occurrence_associations/list_for_elastic.xml | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/modules/occurrence_associations/reports/library/occurrence_associations/list_for_elastic.xml b/modules/occurrence_associations/reports/library/occurrence_associations/list_for_elastic.xml index 224a25ef4e..3853c2a75a 100644 --- a/modules/occurrence_associations/reports/library/occurrence_associations/list_for_elastic.xml +++ b/modules/occurrence_associations/reports/library/occurrence_associations/list_for_elastic.xml @@ -5,22 +5,28 @@ SELECT #columns# - FROM cache_occurrences_functional o + FROM (SELECT value::json AS var FROM variables WHERE name='rest-autofeed-#occurrences_autofeed_project#') v, + cache_occurrences_functional o JOIN occurrence_associations oa ON oa.from_occurrence_id=o.id AND oa.deleted=false JOIN cache_termlists_terms ctt ON ctt.id=oa.association_type_id JOIN cache_occurrences_functional o2 ON o2.id=oa.to_occurrence_id JOIN cache_taxa_taxon_lists cttl ON cttl.id=o.taxa_taxon_list_id #agreements_join# + WHERE o.tracking <= (var#>>'{0,last_tracking_id}')::integer + AND var#>>'{0,mode}'='updates' #filters# #group_bys# UNION SELECT #columns# - FROM cache_occurrences_functional o + FROM (SELECT value::json AS var FROM variables WHERE name='rest-autofeed-#occurrences_autofeed_project#') v, + cache_occurrences_functional o JOIN occurrence_associations oa ON oa.to_occurrence_id=o.id AND oa.deleted=false JOIN cache_termlists_terms ctt ON ctt.id=oa.association_type_id JOIN cache_occurrences_functional o2 ON o2.id=oa.from_occurrence_id JOIN cache_taxa_taxon_lists cttl ON cttl.id=o.taxa_taxon_list_id #agreements_join# + WHERE o.tracking <= (var#>>'{0,last_tracking_id}')::integer + AND var#>>'{0,mode}'='updates' #filters# #group_bys# @@ -31,6 +37,9 @@ oa.updated_on >= '#tracking_date_from#' + From 94e6b1d434dd84a79f126d20bb120b4e4175fc6a Mon Sep 17 00:00:00 2001 From: John van Breda Date: Wed, 11 Mar 2020 09:33:58 +0000 Subject: [PATCH 18/26] All occurrence assocs report also update to not index too early --- .../list_for_elastic_all.xml | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/modules/occurrence_associations/reports/library/occurrence_associations/list_for_elastic_all.xml b/modules/occurrence_associations/reports/library/occurrence_associations/list_for_elastic_all.xml index 19d27316b4..cb2f503593 100644 --- a/modules/occurrence_associations/reports/library/occurrence_associations/list_for_elastic_all.xml +++ b/modules/occurrence_associations/reports/library/occurrence_associations/list_for_elastic_all.xml @@ -7,20 +7,26 @@ SELECT #columns# - FROM cache_occurrences_functional o + FROM (SELECT value::json AS var FROM variables WHERE name='rest-autofeed-#occurrences_autofeed_project#') v, + cache_occurrences_functional o JOIN occurrence_associations oa ON oa.from_occurrence_id=o.id AND oa.deleted=false JOIN cache_termlists_terms ctt ON ctt.id=oa.association_type_id JOIN cache_occurrences_functional o2 ON o2.id=oa.to_occurrence_id JOIN cache_taxa_taxon_lists cttl ON cttl.id=o.taxa_taxon_list_id + WHERE o.tracking <= (var#>>'{0,last_tracking_id}')::integer + AND var#>>'{0,mode}'='updates' #filters# #group_bys# UNION SELECT #columns# - FROM cache_occurrences_functional o + FROM (SELECT value::json AS var FROM variables WHERE name='rest-autofeed-#occurrences_autofeed_project#') v, + cache_occurrences_functional o JOIN occurrence_associations oa ON oa.to_occurrence_id=o.id AND oa.deleted=false JOIN cache_termlists_terms ctt ON ctt.id=oa.association_type_id JOIN cache_occurrences_functional o2 ON o2.id=oa.from_occurrence_id JOIN cache_taxa_taxon_lists cttl ON cttl.id=o.taxa_taxon_list_id + WHERE o.tracking <= (var#>>'{0,last_tracking_id}')::integer + AND var#>>'{0,mode}'='updates' #filters# #group_bys# @@ -31,6 +37,9 @@ oa.updated_on >= '#tracking_date_from#' + From f61a3c8bc431e755ac96da69c49f7f20a145ee46 Mon Sep 17 00:00:00 2001 From: John van Breda Date: Thu, 12 Mar 2020 12:45:36 +0000 Subject: [PATCH 19/26] Index helps in ES indexing Since changes are tracked on updated_on. --- .../db/version_3_2_0/202003111230_ix_updated_on.sql | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 modules/occurrence_associations/db/version_3_2_0/202003111230_ix_updated_on.sql diff --git a/modules/occurrence_associations/db/version_3_2_0/202003111230_ix_updated_on.sql b/modules/occurrence_associations/db/version_3_2_0/202003111230_ix_updated_on.sql new file mode 100644 index 0000000000..e221c730b0 --- /dev/null +++ b/modules/occurrence_associations/db/version_3_2_0/202003111230_ix_updated_on.sql @@ -0,0 +1,2 @@ +CREATE INDEX IF NOT EXISTS ix_occurrence_associations_updated_on + ON occurrence_associations(updated_on); \ No newline at end of file From 16f4cee332440a463c17e71e8d405c068deee5fb Mon Sep 17 00:00:00 2001 From: John van Breda Date: Thu, 12 Mar 2020 12:46:10 +0000 Subject: [PATCH 20/26] Revision of ES associations indexing reports Performance and fixes some change tracking issues. --- .../list_for_elastic.xml | 95 ++++++++++++------- .../list_for_elastic_all.xml | 92 +++++++++++------- 2 files changed, 121 insertions(+), 66 deletions(-) diff --git a/modules/occurrence_associations/reports/library/occurrence_associations/list_for_elastic.xml b/modules/occurrence_associations/reports/library/occurrence_associations/list_for_elastic.xml index 3853c2a75a..f3bc292df0 100644 --- a/modules/occurrence_associations/reports/library/occurrence_associations/list_for_elastic.xml +++ b/modules/occurrence_associations/reports/library/occurrence_associations/list_for_elastic.xml @@ -4,48 +4,75 @@ > - SELECT #columns# - FROM (SELECT value::json AS var FROM variables WHERE name='rest-autofeed-#occurrences_autofeed_project#') v, - cache_occurrences_functional o - JOIN occurrence_associations oa ON oa.from_occurrence_id=o.id AND oa.deleted=false - JOIN cache_termlists_terms ctt ON ctt.id=oa.association_type_id - JOIN cache_occurrences_functional o2 ON o2.id=oa.to_occurrence_id - JOIN cache_taxa_taxon_lists cttl ON cttl.id=o.taxa_taxon_list_id - #agreements_join# - WHERE o.tracking <= (var#>>'{0,last_tracking_id}')::integer - AND var#>>'{0,mode}'='updates' - #filters# - #group_bys# - UNION - SELECT #columns# - FROM (SELECT value::json AS var FROM variables WHERE name='rest-autofeed-#occurrences_autofeed_project#') v, - cache_occurrences_functional o - JOIN occurrence_associations oa ON oa.to_occurrence_id=o.id AND oa.deleted=false - JOIN cache_termlists_terms ctt ON ctt.id=oa.association_type_id - JOIN cache_occurrences_functional o2 ON o2.id=oa.from_occurrence_id - JOIN cache_taxa_taxon_lists cttl ON cttl.id=o.taxa_taxon_list_id - #agreements_join# - WHERE o.tracking <= (var#>>'{0,last_tracking_id}')::integer - AND var#>>'{0,mode}'='updates' - #filters# - #group_bys# +DROP TABLE IF EXISTS oa_chunk_1; +DROP TABLE IF EXISTS oa_chunk_2; + +SELECT oa.id, oa.updated_on, oa.from_occurrence_id, oa.to_occurrence_id, ctt.term, cttl.preferred_taxon, cttl.default_common_name +INTO TEMPORARY oa_chunk_1 + FROM occurrence_associations oa + JOIN cache_termlists_terms ctt ON ctt.id=oa.association_type_id + JOIN cache_occurrences_functional o ON o.id=oa.to_occurrence_id + JOIN cache_taxa_taxon_lists cttl ON cttl.id=o.taxa_taxon_list_id + WHERE oa.deleted=false + AND oa.updated_on < ( + SELECT max(updated_on) FROM cache_occurrences_functional + WHERE tracking<(SELECT (value::json#>>'{0,last_tracking_id}')::integer FROM variables WHERE name='rest-autofeed-#occurrences_autofeed_proj_id#') + ) + AND oa.updated_on >= '#tracking_date_from#' + ORDER BY oa.updated_on + LIMIT 10000/2; + +SELECT oa.id, oa.updated_on, oa.from_occurrence_id, oa.to_occurrence_id, ctt.term, cttl.preferred_taxon, cttl.default_common_name +INTO TEMPORARY oa_chunk_2 + FROM occurrence_associations oa + JOIN cache_termlists_terms ctt ON ctt.id=oa.association_type_id + JOIN cache_occurrences_functional o ON o.id=oa.from_occurrence_id + JOIN cache_taxa_taxon_lists cttl ON cttl.id=o.taxa_taxon_list_id + WHERE deleted=false + AND oa.updated_on < ( + SELECT max(updated_on) FROM cache_occurrences_functional + WHERE tracking<(SELECT (value::json#>>'{0,last_tracking_id}')::integer FROM variables WHERE name='rest-autofeed-#occurrences_autofeed_proj_id#') + ) + AND oa.updated_on >= '#tracking_date_from#' + ORDER BY oa.updated_on + LIMIT 10000/2; + +SELECT oa.id as "id", +oa.updated_on as "tracking_date", +string_agg(oa.to_occurrence_id::text || '~' || oa.term || '~' || oa.preferred_taxon || '~' || COALESCE(oa.default_common_name, ''), '@@') as "associations_data" + FROM oa_chunk_1 oa + JOIN cache_occurrences_functional o ON o.id=oa.from_occurrence_id + #agreements_join# + WHERE 1=1 + #filters# + GROUP BY oa.id, oa.updated_on, oa.updated_on +UNION +SELECT oa.id as "id", +oa.updated_on as "tracking_date", +string_agg(oa.from_occurrence_id::text || '~<<' || oa.term || '<<~' || oa.preferred_taxon || '~' || COALESCE(oa.default_common_name, ''), '@@') as "associations_data" + FROM oa_chunk_2 oa + JOIN cache_occurrences_functional o ON o.id=oa.to_occurrence_id + #agreements_join# + WHERE 1=1 + #filters# + GROUP BY oa.id, oa.updated_on, oa.updated_on + + tracking_date ASC + oa.id > #last_id# - - oa.updated_on >= '#tracking_date_from#' - - + + - - - - - + + + \ No newline at end of file diff --git a/modules/occurrence_associations/reports/library/occurrence_associations/list_for_elastic_all.xml b/modules/occurrence_associations/reports/library/occurrence_associations/list_for_elastic_all.xml index cb2f503593..a8b248abd8 100644 --- a/modules/occurrence_associations/reports/library/occurrence_associations/list_for_elastic_all.xml +++ b/modules/occurrence_associations/reports/library/occurrence_associations/list_for_elastic_all.xml @@ -6,46 +6,74 @@ > - SELECT #columns# - FROM (SELECT value::json AS var FROM variables WHERE name='rest-autofeed-#occurrences_autofeed_project#') v, - cache_occurrences_functional o - JOIN occurrence_associations oa ON oa.from_occurrence_id=o.id AND oa.deleted=false - JOIN cache_termlists_terms ctt ON ctt.id=oa.association_type_id - JOIN cache_occurrences_functional o2 ON o2.id=oa.to_occurrence_id - JOIN cache_taxa_taxon_lists cttl ON cttl.id=o.taxa_taxon_list_id - WHERE o.tracking <= (var#>>'{0,last_tracking_id}')::integer - AND var#>>'{0,mode}'='updates' - #filters# - #group_bys# - UNION - SELECT #columns# - FROM (SELECT value::json AS var FROM variables WHERE name='rest-autofeed-#occurrences_autofeed_project#') v, - cache_occurrences_functional o - JOIN occurrence_associations oa ON oa.to_occurrence_id=o.id AND oa.deleted=false - JOIN cache_termlists_terms ctt ON ctt.id=oa.association_type_id - JOIN cache_occurrences_functional o2 ON o2.id=oa.from_occurrence_id - JOIN cache_taxa_taxon_lists cttl ON cttl.id=o.taxa_taxon_list_id - WHERE o.tracking <= (var#>>'{0,last_tracking_id}')::integer - AND var#>>'{0,mode}'='updates' - #filters# - #group_bys# +DROP TABLE IF EXISTS oa_chunk_1; +DROP TABLE IF EXISTS oa_chunk_2; + +SELECT oa.id, oa.updated_on, oa.from_occurrence_id, oa.to_occurrence_id, ctt.term, cttl.preferred_taxon, cttl.default_common_name +INTO TEMPORARY oa_chunk_1 + FROM occurrence_associations oa + JOIN cache_termlists_terms ctt ON ctt.id=oa.association_type_id + JOIN cache_occurrences_functional o ON o.id=oa.to_occurrence_id + JOIN cache_taxa_taxon_lists cttl ON cttl.id=o.taxa_taxon_list_id + WHERE oa.deleted=false + AND oa.updated_on < ( + SELECT max(updated_on) FROM cache_occurrences_functional + WHERE tracking<(SELECT (value::json#>>'{0,last_tracking_id}')::integer FROM variables WHERE name='rest-autofeed-#occurrences_autofeed_proj_id#') + ) + AND oa.updated_on >= '#tracking_date_from#' + ORDER BY oa.updated_on + LIMIT 10000/2; + +SELECT oa.id, oa.updated_on, oa.from_occurrence_id, oa.to_occurrence_id, ctt.term, cttl.preferred_taxon, cttl.default_common_name +INTO TEMPORARY oa_chunk_2 + FROM occurrence_associations oa + JOIN cache_termlists_terms ctt ON ctt.id=oa.association_type_id + JOIN cache_occurrences_functional o ON o.id=oa.from_occurrence_id + JOIN cache_taxa_taxon_lists cttl ON cttl.id=o.taxa_taxon_list_id + WHERE deleted=false + AND oa.updated_on < ( + SELECT max(updated_on) FROM cache_occurrences_functional + WHERE tracking<(SELECT (value::json#>>'{0,last_tracking_id}')::integer FROM variables WHERE name='rest-autofeed-#occurrences_autofeed_proj_id#') + ) + AND oa.updated_on >= '#tracking_date_from#' + ORDER BY oa.updated_on + LIMIT 10000/2; + +SELECT oa.id as "id", +oa.updated_on as "tracking_date", +oa.updated_on as "tracking", +string_agg(oa.to_occurrence_id::text || '~' || oa.term || '~' || oa.preferred_taxon || '~' || COALESCE(oa.default_common_name, ''), '@@') as "associations_data" + FROM oa_chunk_1 oa + JOIN cache_occurrences_functional o ON o.id=oa.from_occurrence_id + WHERE 1=1 + #filters# + GROUP BY oa.id, oa.updated_on, oa.updated_on +UNION +SELECT oa.id as "id", +oa.updated_on as "tracking_date", +string_agg(oa.from_occurrence_id::text || '~<<' || oa.term || '<<~' || oa.preferred_taxon || '~' || COALESCE(oa.default_common_name, ''), '@@') as "associations_data" + FROM oa_chunk_2 oa + JOIN cache_occurrences_functional o ON o.id=oa.to_occurrence_id + WHERE 1=1 + #filters# + GROUP BY oa.id, oa.updated_on, oa.updated_on + + tracking_date ASC + oa.id > #last_id# - - oa.updated_on >= '#tracking_date_from#' - - + + - - - - - + + + \ No newline at end of file From b413e4fbc16c29b44d277a919b99a4cedcf299fa Mon Sep 17 00:00:00 2001 From: John van Breda Date: Thu, 12 Mar 2020 14:04:41 +0000 Subject: [PATCH 21/26] Not needed for sort any more. --- .../library/occurrence_associations/list_for_elastic_all.xml | 1 - 1 file changed, 1 deletion(-) diff --git a/modules/occurrence_associations/reports/library/occurrence_associations/list_for_elastic_all.xml b/modules/occurrence_associations/reports/library/occurrence_associations/list_for_elastic_all.xml index a8b248abd8..b6e620c539 100644 --- a/modules/occurrence_associations/reports/library/occurrence_associations/list_for_elastic_all.xml +++ b/modules/occurrence_associations/reports/library/occurrence_associations/list_for_elastic_all.xml @@ -41,7 +41,6 @@ INTO TEMPORARY oa_chunk_2 SELECT oa.id as "id", oa.updated_on as "tracking_date", -oa.updated_on as "tracking", string_agg(oa.to_occurrence_id::text || '~' || oa.term || '~' || oa.preferred_taxon || '~' || COALESCE(oa.default_common_name, ''), '@@') as "associations_data" FROM oa_chunk_1 oa JOIN cache_occurrences_functional o ON o.id=oa.from_occurrence_id From f54e22eb3fc9008240d05496146dffc4882d8eb5 Mon Sep 17 00:00:00 2001 From: John van Breda Date: Wed, 18 Mar 2020 20:29:47 +0000 Subject: [PATCH 22/26] Additional report for locations Used for intersection tests by [misc_extensions.query_locations_on_map_click] control. --- .../library/locations/locations_list_3.xml | 46 +++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 reports/library/locations/locations_list_3.xml diff --git a/reports/library/locations/locations_list_3.xml b/reports/library/locations/locations_list_3.xml new file mode 100644 index 0000000000..714754e4ed --- /dev/null +++ b/reports/library/locations/locations_list_3.xml @@ -0,0 +1,46 @@ + + + SELECT #columns# + FROM locations l + LEFT JOIN locations_websites lw on lw.location_id=l.id AND lw.deleted=false + LEFT JOIN cache_termlists_terms ttype ON ttype.id=l.location_type_id + #agreements_join# + #joins# + WHERE #sharing_filter# + AND l.deleted=false + AND (l.public=true or lw.website_id in (#website_ids#)) + #order_by# + + + l.name ASC + + + + l.location_type_id in (#location_type_ids#) + + + + st_intersects(l.boundary_geom, st_geomfromtext('#intersects#', 900913)) + and not st_touches(l.boundary_geom, st_geomfromtext('#intersects#', 900913)) + + + (l.code is null or l.code not like '%+%') + + + + + + + + + + + + \ No newline at end of file From f5a5d2e9d89003df3dd06366dc2a3422acd6e6cb Mon Sep 17 00:00:00 2001 From: John van Breda Date: Mon, 30 Mar 2020 12:58:23 +0100 Subject: [PATCH 23/26] Renamed folder version as wasn't deployed in 3.2 --- .../202001210925_sample_delete.sql | 0 .../20200224_es_sensitivity_index.sql | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename modules/indicia_setup/db/{version_3_2_0 => version_3_3_0}/202001210925_sample_delete.sql (100%) rename modules/indicia_setup/db/{version_3_2_0 => version_3_3_0}/20200224_es_sensitivity_index.sql (100%) diff --git a/modules/indicia_setup/db/version_3_2_0/202001210925_sample_delete.sql b/modules/indicia_setup/db/version_3_3_0/202001210925_sample_delete.sql similarity index 100% rename from modules/indicia_setup/db/version_3_2_0/202001210925_sample_delete.sql rename to modules/indicia_setup/db/version_3_3_0/202001210925_sample_delete.sql diff --git a/modules/indicia_setup/db/version_3_2_0/20200224_es_sensitivity_index.sql b/modules/indicia_setup/db/version_3_3_0/20200224_es_sensitivity_index.sql similarity index 100% rename from modules/indicia_setup/db/version_3_2_0/20200224_es_sensitivity_index.sql rename to modules/indicia_setup/db/version_3_3_0/20200224_es_sensitivity_index.sql From 22e4f1b351950d91cc05e3f1e2ca8e925cc2c822 Mon Sep 17 00:00:00 2001 From: John van Breda Date: Tue, 7 Apr 2020 20:17:10 +0100 Subject: [PATCH 24/26] Adds new remote download fields for updates to Indicia2Recorder --- reports/library/occurrences/remote_download.xml | 2 ++ ...emote_download_by_input_date_using_spatial_index_builder.xml | 2 ++ 2 files changed, 4 insertions(+) diff --git a/reports/library/occurrences/remote_download.xml b/reports/library/occurrences/remote_download.xml index cf2882ac76..2f16b7217f 100644 --- a/reports/library/occurrences/remote_download.xml +++ b/reports/library/occurrences/remote_download.xml @@ -123,6 +123,8 @@ + + diff --git a/reports/library/occurrences/remote_download_by_input_date_using_spatial_index_builder.xml b/reports/library/occurrences/remote_download_by_input_date_using_spatial_index_builder.xml index a0aa7a2609..8bf718c578 100644 --- a/reports/library/occurrences/remote_download_by_input_date_using_spatial_index_builder.xml +++ b/reports/library/occurrences/remote_download_by_input_date_using_spatial_index_builder.xml @@ -123,6 +123,8 @@ + + From d3a5d2a57a1cfcce8e4f29fb3729799a6dfda374 Mon Sep 17 00:00:00 2001 From: John van Breda Date: Tue, 7 Apr 2020 20:58:38 +0100 Subject: [PATCH 25/26] Switched creator ID to person ID As person ID used to generate unique IDs in Indicia2Recorder --- reports/library/occurrences/remote_download.xml | 2 +- ...emote_download_by_input_date_using_spatial_index_builder.xml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/reports/library/occurrences/remote_download.xml b/reports/library/occurrences/remote_download.xml index 2f16b7217f..207c6335b9 100644 --- a/reports/library/occurrences/remote_download.xml +++ b/reports/library/occurrences/remote_download.xml @@ -123,7 +123,7 @@ - + diff --git a/reports/library/occurrences/remote_download_by_input_date_using_spatial_index_builder.xml b/reports/library/occurrences/remote_download_by_input_date_using_spatial_index_builder.xml index 8bf718c578..d37861eede 100644 --- a/reports/library/occurrences/remote_download_by_input_date_using_spatial_index_builder.xml +++ b/reports/library/occurrences/remote_download_by_input_date_using_spatial_index_builder.xml @@ -123,7 +123,7 @@ - + From 5df997b49e7187fc8530dce726b497a134364d09 Mon Sep 17 00:00:00 2001 From: John van Breda Date: Thu, 16 Apr 2020 10:59:57 +0100 Subject: [PATCH 26/26] Version bump --- application/config/version.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/application/config/version.php b/application/config/version.php index 29960f2908..fc27a7932e 100644 --- a/application/config/version.php +++ b/application/config/version.php @@ -29,14 +29,14 @@ * * @var string */ -$config['version'] = '3.2.3'; +$config['version'] = '3.3.0'; /** * Version release date. * * @var string */ -$config['release_date'] = '2020-04-06'; +$config['release_date'] = '2020-04-16'; /** * Link to the code repository downloads page.