From 12a48338b90402b68af960be2ba26835623e4e2b Mon Sep 17 00:00:00 2001 From: John van Breda Date: Tue, 12 Feb 2019 19:35:33 +0000 Subject: [PATCH 01/10] Version bump --- application/config/version.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/application/config/version.php b/application/config/version.php index c66fe62021..a03ce4d0f7 100644 --- a/application/config/version.php +++ b/application/config/version.php @@ -29,7 +29,7 @@ * * @var string */ -$config['version'] = '2.6.1'; +$config['version'] = '2.7.0'; /** * Version release date. From d72c06640304f4461ad590594cf367a89fd98e15 Mon Sep 17 00:00:00 2001 From: John van Breda Date: Tue, 12 Feb 2019 19:36:19 +0000 Subject: [PATCH 02/10] Single update for map squares processing Updates on cache tables are expensive, so doing this in one update statement improves performance. --- application/helpers/postgreSQL.php | 100 ++++++++++++++++++----------- application/libraries/MY_ORM.php | 8 +-- 2 files changed, 64 insertions(+), 44 deletions(-) diff --git a/application/helpers/postgreSQL.php b/application/helpers/postgreSQL.php index 5bab227923..d4819850cb 100644 --- a/application/helpers/postgreSQL.php +++ b/application/helpers/postgreSQL.php @@ -177,57 +177,81 @@ public static function insertMapSquaresForSamples($ids, $size, $db = NULL) { * submitted directly (i.e. not as part of a sample), to make sure that any * changed occurrences are linked to their map square entries properly. */ - public static function insertMapSquaresForOccurrences($ids, $size, $db = NULL) { - self::insertMapSquares($ids, 'o', $size, $db); + public static function insertMapSquaresForOccurrences($ids, $db) { + self::insertMapSquares($ids, 'o', $db); } /** * Generic shared code for the insertMapSquaresFor... methods. */ - private static function insertMapSquares($ids, $alias, $size, $db = NULL) { + private static function insertMapSquares($ids, $alias, $db) { if (count($ids) > 0) { static $srid; if (!isset($srid)) { $srid = kohana::config('sref_notations.internal_srid'); } - if (!$db) - $db = new Database(); $idlist = implode(',', $ids); - // Seems much faster to break this into small queries than one big left join. - $smpInfo = $db->query( - "SELECT DISTINCT s.id, o.website_id, s.survey_id, st_astext(coalesce(s.geom, l.centroid_geom)) as geom, o.confidential, - GREATEST(round(sqrt(st_area(st_transform(s.geom, sref_system_to_srid(entered_sref_system)))))::integer, o.sensitivity_precision, s.privacy_precision, $size) as size, - coalesce(s.entered_sref_system, l.centroid_sref_system) as entered_sref_system, - round(st_x(st_centroid(reduce_precision( - coalesce(s.geom, l.centroid_geom), o.confidential, - GREATEST(round(sqrt(st_area(st_transform(s.geom, sref_system_to_srid(entered_sref_system)))))::integer, o.sensitivity_precision, s.privacy_precision, $size), - s.entered_sref_system) - ))) as x, - round(st_y(st_centroid(reduce_precision( - coalesce(s.geom, l.centroid_geom), o.confidential, - GREATEST(round(sqrt(st_area(st_transform(s.geom, sref_system_to_srid(entered_sref_system)))))::integer, o.sensitivity_precision, s.privacy_precision, $size), s.entered_sref_system) - ))) as y - FROM samples s - JOIN occurrences o ON o.sample_id=s.id - LEFT JOIN locations l on l.id=s.location_id AND l.deleted=false - WHERE $alias.id IN ($idlist)")->result_array(TRUE); - $km = $size / 1000; - foreach ($smpInfo as $s) { - $existing = $db->query("SELECT id FROM map_squares WHERE x={$s->x} AND y={$s->y} AND size={$s->size}") - ->result_array(FALSE); - if (count($existing)===0) { - $qry = $db->query("INSERT INTO map_squares (geom, x, y, size) - VALUES (reduce_precision(st_geomfromtext('{$s->geom}', $srid), '{$s->confidential}', {$s->size}, '{$s->entered_sref_system}'), {$s->x}, {$s->y}, {$s->size})"); - $msqId = $qry->insert_id(); + $sizes = [1000, 2000, 10000]; + $fieldsForEachSquare = ''; + foreach ($sizes as $idx => $size) { + $fieldsForEachSquare .= <<query($query)->result_array(TRUE); + foreach ($smpInfo as $s) { + $updateFieldSQL = []; + $updateFilterSQL = []; + foreach ($sizes as $size) { + $km = $size / 1000; + $squareDetail = [ + 'x' => $s->{"x$size"}, + 'y' => $s->{"y$size"}, + 'size' => $s->{"size$size"}, + ]; + $existing = $db + ->query("SELECT id FROM map_squares WHERE x=$squareDetail[x] AND y=$squareDetail[y] AND size=$squareDetail[size]") + ->result_array(FALSE); + if (count($existing) === 0) { + $qry = $db->query("INSERT INTO map_squares (geom, x, y, size) + VALUES (reduce_precision(st_geomfromtext('{$s->geom}', $srid), '{$s->confidential}', $squareDetail[size], '{$s->entered_sref_system}'), $squareDetail[x], $squareDetail[y], $squareDetail[size])"); + $msqId = $qry->insert_id(); + } + else { + $msqId = $existing[0]['id']; + } + $updateFieldSQL[] = "map_sq_{$km}km_id=$msqId"; + $updateFilterSQL[] = "map_sq_{$km}km_id IS NULL OR map_sq_{$km}km_id<>$msqId"; } - $db->query("UPDATE cache_occurrences_functional SET map_sq_{$km}km_id=$msqId " . - "WHERE website_id={$s->website_id} AND survey_id={$s->survey_id} AND sample_id={$s->id} " . - "AND (map_sq_{$km}km_id IS NULL OR map_sq_{$km}km_id<>$msqId)"); - $db->query("UPDATE cache_samples_functional SET map_sq_{$km}km_id=$msqId " . - "WHERE id={$s->id} AND (map_sq_{$km}km_id IS NULL OR map_sq_{$km}km_id<>$msqId)"); + $db->query("UPDATE cache_occurrences_functional SET " . implode(', ', $updateFieldSQL) . + "WHERE sample_id={$s->id} " . + 'AND (' . implode(' OR ', $updateFilterSQL) . ')'); + $db->query("UPDATE cache_samples_functional SET " . implode(', ', $updateFieldSQL) . + "WHERE id={$s->id} " . + 'AND (' . implode(' OR ', $updateFilterSQL) . ')'); } } } diff --git a/application/libraries/MY_ORM.php b/application/libraries/MY_ORM.php index 75b331bba0..3cba558af3 100644 --- a/application/libraries/MY_ORM.php +++ b/application/libraries/MY_ORM.php @@ -751,16 +751,12 @@ private function postProcess() { } if (!empty($samples)) { // @todo Map squares could be added to work queue. - postgreSQL::insertMapSquaresForSamples($samples, 1000, $this->db); - postgreSQL::insertMapSquaresForSamples($samples, 2000, $this->db); - postgreSQL::insertMapSquaresForSamples($samples, 10000, $this->db); + postgreSQL::insertMapSquaresForSamples($samples, $this->db); } elseif (!empty($occurrences)) { // No need to do occurrence map square update if inserting a sample, as // the above code does the occurrences in bulk. - postgreSQL::insertMapSquaresForOccurrences($occurrences, 1000, $this->db); - postgreSQL::insertMapSquaresForOccurrences($occurrences, 2000, $this->db); - postgreSQL::insertMapSquaresForOccurrences($occurrences, 10000, $this->db); + postgreSQL::insertMapSquaresForOccurrences($occurrences, $this->db); } } if (!empty(self::$changedRecords['insert']['occurrence_association']) || From 64459a7f64368cfe742b877a557cbd336dd3a73c Mon Sep 17 00:00:00 2001 From: John van Breda Date: Tue, 12 Feb 2019 19:37:05 +0000 Subject: [PATCH 03/10] Adds fields for tracking cache table updates Used for tracking feeds to other systems (e.g. Elasticsearch). Fields auto-update using triggers. --- .../201902121402_tracking_fields.sql | 37 +++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 modules/cache_builder/db/version_2_7_0/201902121402_tracking_fields.sql diff --git a/modules/cache_builder/db/version_2_7_0/201902121402_tracking_fields.sql b/modules/cache_builder/db/version_2_7_0/201902121402_tracking_fields.sql new file mode 100644 index 0000000000..30f1d061bb --- /dev/null +++ b/modules/cache_builder/db/version_2_7_0/201902121402_tracking_fields.sql @@ -0,0 +1,37 @@ +ALTER TABLE cache_occurrences_functional +ADD COLUMN tracking bigint; + +ALTER TABLE cache_samples_functional +ADD COLUMN tracking bigint; + +CREATE INDEX ix_cache_occurrences_functional_tracking ON cache_occurrences_functional(tracking); +CREATE INDEX ix_cache_samples_functional_tracking ON cache_samples_functional(tracking); + +COMMENT ON COLUMN cache_occurrences_functional.tracking IS 'Autogenerated tracking number for updates. A report can grab all new or updated records by filtering on this value.'; +COMMENT ON COLUMN cache_samples_functional.tracking IS 'Autogenerated tracking number for updates. A report can grab all new or updated records by filtering on this value.'; + +CREATE SEQUENCE cache_tracking_seq + INCREMENT 1 + MINVALUE 1 + MAXVALUE 9223372036854775807 + START 1 + CACHE 1; + +CREATE OR REPLACE FUNCTION cache_functional_changed() RETURNS TRIGGER + LANGUAGE plpgsql + AS $$ +BEGIN + NEW.tracking = nextval('cache_tracking_seq'::regclass); + RETURN NEW; +END; +$$; + +CREATE TRIGGER trigger_cache_occurrences_functional_changed + BEFORE INSERT OR UPDATE ON cache_occurrences_functional + FOR EACH ROW + EXECUTE PROCEDURE cache_functional_changed(); + +CREATE TRIGGER trigger_cache_samples_functional_changed + BEFORE INSERT OR UPDATE ON cache_samples_functional + FOR EACH ROW + EXECUTE PROCEDURE cache_functional_changed(); From 43e93d1283a5005e5f58c16d4c9a8863ec5d1110 Mon Sep 17 00:00:00 2001 From: John van Breda Date: Tue, 12 Feb 2019 19:38:01 +0000 Subject: [PATCH 04/10] Limit updates to cache tables MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Don’t do double hit for media, as can be done in one. --- .../cache_builder/config/cache_builder.php | 44 ++++++------------- 1 file changed, 14 insertions(+), 30 deletions(-) diff --git a/modules/cache_builder/config/cache_builder.php b/modules/cache_builder/config/cache_builder.php index f713e1a36b..9409c8720d 100644 --- a/modules/cache_builder/config/cache_builder.php +++ b/modules/cache_builder/config/cache_builder.php @@ -765,7 +765,8 @@ when sc2.id is null and s.updated_on<=sc1.created_on then 'Q' else 'A' end, - parent_sample_id=s.parent_id + parent_sample_id=s.parent_id, + media_count=(SELECT COUNT(sm.*) FROM sample_media sm WHERE sm.sample_id=s.id AND sm.deleted=false) FROM samples s #join_needs_update# LEFT JOIN samples sp ON sp.id=s.parent_id AND sp.deleted=false @@ -779,22 +780,14 @@ WHERE s.id=s_update.id "; -$config['samples']['update']['functional_media'] = " -UPDATE cache_samples_functional u -SET media_count=(SELECT COUNT(sm.*) -FROM sample_media sm WHERE sm.sample_id=u.id AND sm.deleted=false) -FROM samples s -#join_needs_update# -WHERE s.id=u.id -"; - $config['samples']['update']['functional_sensitive'] = " -UPDATE cache_samples_functional +UPDATE cache_samples_functional u SET location_id=null, location_name=null FROM samples s #join_needs_update# JOIN occurrences o ON o.sample_id=s.id AND o.deleted=false AND o.sensitivity_precision IS NOT NULL -WHERE s.id=cache_samples_functional.id +WHERE s.id=u.id +AND (u.location_id IS NOT NULL OR u.location_name IS NOT NULL) "; $config['samples']['update']['nonfunctional'] = " @@ -938,7 +931,7 @@ INSERT INTO cache_samples_functional( id, website_id, survey_id, input_form, location_id, location_name, public_geom, date_start, date_end, date_type, created_on, updated_on, verified_on, created_by_id, - group_id, record_status, query, parent_sample_id) + group_id, record_status, query, parent_sample_id, media_count) SELECT distinct on (s.id) s.id, su.website_id, s.survey_id, COALESCE(sp.input_form, s.input_form), s.location_id, CASE WHEN s.privacy_precision IS NOT NULL THEN NULL ELSE COALESCE(l.name, s.location_name, lp.name, sp.location_name) END, reduce_precision(coalesce(s.geom, l.centroid_geom), false, s.privacy_precision, @@ -950,7 +943,8 @@ when sc2.id is null and s.updated_on<=sc1.created_on then 'Q' else 'A' end, - s.parent_id + s.parent_id, + (SELECT COUNT(sm.*) FROM sample_media sm WHERE sm.sample_id=s.id AND sm.deleted=false) FROM samples s #join_needs_update# LEFT JOIN cache_samples_functional cs on cs.id=s.id @@ -966,8 +960,6 @@ AND cs.id IS NULL "; -$config['samples']['insert']['functional_media'] = $config['samples']['update']['functional_media']; - $config['samples']['insert']['functional_sensitive'] = " UPDATE cache_samples_functional SET location_id=null, location_name=null @@ -1395,7 +1387,8 @@ external_key=o.external_key, taxon_path=ctp.path, parent_sample_id=s.parent_id, - verification_checks_enabled=w.verification_checks_enabled + verification_checks_enabled=w.verification_checks_enabled, + media_count=(SELECT COUNT(om.*) FROM occurrence_media om WHERE om.occurrence_id=o.id AND om.deleted=false) FROM occurrences o #join_needs_update# left join cache_occurrences_functional co on co.id=o.id @@ -1422,15 +1415,6 @@ WHERE cache_occurrences_functional.id=o.id "; -$config['occurrences']['update']['functional_media'] = " -UPDATE cache_occurrences_functional u -SET media_count=(SELECT COUNT(om.*) -FROM occurrence_media om WHERE om.occurrence_id=o.id AND om.deleted=false) -FROM occurrences o -#join_needs_update# -WHERE o.id=u.id -"; - $config['occurrences']['update']['functional_sensitive'] = " UPDATE cache_samples_functional cs SET location_id=null, location_name=null @@ -1623,7 +1607,8 @@ taxon_group_id, taxon_rank_sort_order, record_status, record_substatus, certainty, query, sensitive, release_status, marine_flag, data_cleaner_result, training, zero_abundance, licence_id, import_guid, confidential, external_key, - taxon_path, blocked_sharing_tasks, parent_sample_id, verification_checks_enabled) + taxon_path, blocked_sharing_tasks, parent_sample_id, verification_checks_enabled, + media_count) SELECT distinct on (o.id) o.id, o.sample_id, o.website_id, s.survey_id, COALESCE(sp.input_form, s.input_form), s.location_id, case when o.confidential=true or o.sensitivity_precision is not null or s.privacy_precision is not null then null else coalesce(l.name, s.location_name, lp.name, sp.location_name) end, @@ -1663,7 +1648,8 @@ ], NULL) END, s.parent_id, - w.verification_checks_enabled + w.verification_checks_enabled, + (SELECT COUNT(om.*) FROM occurrence_media om WHERE om.occurrence_id=o.id AND om.deleted=false) FROM occurrences o #join_needs_update# LEFT JOIN cache_occurrences_functional co on co.id=o.id @@ -1691,8 +1677,6 @@ AND co.id IS NULL "; -$config['occurrences']['insert']['functional_media'] = $config['occurrences']['update']['functional_media']; - $config['occurrences']['insert']['functional_sensitive'] = " UPDATE cache_samples_functional cs SET location_id=null, location_name=null From 37cc104b84ef41009681c0b34c853a8423985985 Mon Sep 17 00:00:00 2001 From: John van Breda Date: Tue, 12 Feb 2019 19:38:41 +0000 Subject: [PATCH 05/10] Don't change updated_on for spatial indexing changes .. otherwise things like notifications get fired. Tracking fields will be used instead. --- .../helpers/task_spatial_index_builder_location.php | 6 ++---- .../helpers/task_spatial_index_builder_location_delete.php | 4 ++-- .../helpers/task_spatial_index_builder_occurrence.php | 2 +- .../helpers/task_spatial_index_builder_sample.php | 4 ++-- 4 files changed, 7 insertions(+), 9 deletions(-) diff --git a/modules/spatial_index_builder/helpers/task_spatial_index_builder_location.php b/modules/spatial_index_builder/helpers/task_spatial_index_builder_location.php index d37115b6cd..7ec8e1ffd3 100644 --- a/modules/spatial_index_builder/helpers/task_spatial_index_builder_location.php +++ b/modules/spatial_index_builder/helpers/task_spatial_index_builder_location.php @@ -106,8 +106,7 @@ public static function process($db, $taskType, $procId) { SET location_ids=CASE WHEN u.location_ids IS NULL THEN clh.location_ids ELSE ARRAY(select distinct unnest(array_cat(clh.location_ids, u.location_ids))) - END, - updated_on=now() + END FROM changed_location_hits clh WHERE u.id=clh.sample_id AND NOT u.location_ids @> clh.location_ids; @@ -123,8 +122,7 @@ public static function process($db, $taskType, $procId) { SET location_ids=CASE WHEN u.location_ids IS NULL THEN clh.location_ids ELSE ARRAY(select distinct unnest(array_cat(clh.location_ids, u.location_ids))) - END, - updated_on=now() + END FROM changed_location_hits clh WHERE u.sample_id=clh.sample_id; AND NOT u.location_ids @> clh.location_ids diff --git a/modules/spatial_index_builder/helpers/task_spatial_index_builder_location_delete.php b/modules/spatial_index_builder/helpers/task_spatial_index_builder_location_delete.php index 149da87226..a5b179bd5d 100644 --- a/modules/spatial_index_builder/helpers/task_spatial_index_builder_location_delete.php +++ b/modules/spatial_index_builder/helpers/task_spatial_index_builder_location_delete.php @@ -51,12 +51,12 @@ public static function process($db, $taskType, $procId) { SELECT record_id INTO temporary loclist FROM work_queue WHERE claimed_by='$procId' AND entity='location'; UPDATE cache_occurrences_functional u -SET location_ids=array_remove(u.location_ids, l.record_id), updated_on=now() +SET location_ids=array_remove(u.location_ids, l.record_id) FROM loclist l WHERE u.location_ids @> ARRAY[l.record_id]; UPDATE cache_samples_functional u -SET location_ids=array_remove(u.location_ids, l.record_id), updated_on=now() +SET location_ids=array_remove(u.location_ids, l.record_id) FROM loclist l WHERE u.location_ids @> ARRAY[l.record_id]; SQL; diff --git a/modules/spatial_index_builder/helpers/task_spatial_index_builder_occurrence.php b/modules/spatial_index_builder/helpers/task_spatial_index_builder_occurrence.php index b74cda6fda..3b082b5709 100644 --- a/modules/spatial_index_builder/helpers/task_spatial_index_builder_occurrence.php +++ b/modules/spatial_index_builder/helpers/task_spatial_index_builder_occurrence.php @@ -59,7 +59,7 @@ public static function process($db, $taskType, $procId) { AND task='task_spatial_index_builder_occurrence'; UPDATE cache_occurrences_functional o -SET location_ids = s.location_ids, updated_on=now() +SET location_ids = s.location_ids FROM occlist ol, cache_samples_functional s WHERE s.id=o.sample_id AND o.id=ol.record_id diff --git a/modules/spatial_index_builder/helpers/task_spatial_index_builder_sample.php b/modules/spatial_index_builder/helpers/task_spatial_index_builder_sample.php index 2d769d25e5..47eac4af5f 100644 --- a/modules/spatial_index_builder/helpers/task_spatial_index_builder_sample.php +++ b/modules/spatial_index_builder/helpers/task_spatial_index_builder_sample.php @@ -70,7 +70,7 @@ public static function process($db, $taskType, $procId) { -- Samples - for updated samples, copy over the changes if there are any UPDATE cache_samples_functional u - SET location_ids=cs.location_ids, updated_on=now() + SET location_ids=cs.location_ids FROM changed_samples cs WHERE cs.sample_id=u.id AND ( @@ -79,7 +79,7 @@ public static function process($db, $taskType, $procId) { ); UPDATE cache_occurrences_functional o -SET location_ids = s.location_ids, updated_on=now() +SET location_ids = s.location_ids FROM cache_samples_functional s JOIN changed_samples cs on cs.sample_id=s.id WHERE o.sample_id=s.id From 1897eabe6db70dc0a7a9a7143a0ac8c86d06c5bc Mon Sep 17 00:00:00 2001 From: John van Breda Date: Tue, 12 Feb 2019 19:39:14 +0000 Subject: [PATCH 06/10] Include tracking in Elasticsearch output Will be used for autofeed. --- reports/library/occurrences/list_for_elastic.xml | 1 + reports/library/occurrences/list_for_elastic_all.xml | 1 + reports/library/occurrences/list_for_elastic_sensitive.xml | 1 + reports/library/occurrences/list_for_elastic_sensitive_all.xml | 1 + 4 files changed, 4 insertions(+) diff --git a/reports/library/occurrences/list_for_elastic.xml b/reports/library/occurrences/list_for_elastic.xml index 2c32dcef2d..4ae6aaa35b 100644 --- a/reports/library/occurrences/list_for_elastic.xml +++ b/reports/library/occurrences/list_for_elastic.xml @@ -142,5 +142,6 @@ + \ No newline at end of file diff --git a/reports/library/occurrences/list_for_elastic_all.xml b/reports/library/occurrences/list_for_elastic_all.xml index 9d76eb3a4d..ec4a1a0b28 100644 --- a/reports/library/occurrences/list_for_elastic_all.xml +++ b/reports/library/occurrences/list_for_elastic_all.xml @@ -141,5 +141,6 @@ + \ No newline at end of file diff --git a/reports/library/occurrences/list_for_elastic_sensitive.xml b/reports/library/occurrences/list_for_elastic_sensitive.xml index 497981f3c8..3e6be7db71 100644 --- a/reports/library/occurrences/list_for_elastic_sensitive.xml +++ b/reports/library/occurrences/list_for_elastic_sensitive.xml @@ -145,5 +145,6 @@ + \ No newline at end of file diff --git a/reports/library/occurrences/list_for_elastic_sensitive_all.xml b/reports/library/occurrences/list_for_elastic_sensitive_all.xml index 54de87c1d6..1c14083340 100644 --- a/reports/library/occurrences/list_for_elastic_sensitive_all.xml +++ b/reports/library/occurrences/list_for_elastic_sensitive_all.xml @@ -144,5 +144,6 @@ + \ No newline at end of file From 58e4b46d2006172a811db9511f65a6971e1f922e Mon Sep 17 00:00:00 2001 From: John van Breda Date: Tue, 12 Feb 2019 19:42:51 +0000 Subject: [PATCH 07/10] Standard parameters for filtering tracking IDs. --- .../report_standard_params_occurrences.php | 30 +++++++++++++++++++ .../report_standard_params_samples.php | 30 +++++++++++++++++++ 2 files changed, 60 insertions(+) diff --git a/application/helpers/report_standard_params_occurrences.php b/application/helpers/report_standard_params_occurrences.php index 435560ffc3..f73dbbe3fe 100644 --- a/application/helpers/report_standard_params_occurrences.php +++ b/application/helpers/report_standard_params_occurrences.php @@ -400,6 +400,36 @@ public static function getParameters() { ], ], ], + 'tracking_from' => [ + 'datatype' => 'integer', + 'display' => 'First squential update ID to include', + 'description' => 'All record inserts and updates are given a sequential tracking ID. Filter by this to limit ' . + 'the range of records returned to a contiguous batch of updates. Tracking is updated when the record is ' . + 'affected in any way, not just when it is edited. E.g. an update to spatial indexing will update the tracking.', + 'wheres' => [ + [ + 'value' => '', + 'operator' => '', + 'sql' => + "o.tracking >= #tracking_from#", + ], + ], + ], + 'tracking_to' => [ + 'datatype' => 'integer', + 'display' => 'Last squential update ID to include', + 'description' => 'All record inserts and updates are given a sequential tracking ID. Filter by this to limit ' . + 'the range of records returned to a contiguous batch of updates. Tracking is updated when the record is ' . + 'affected in any way, not just when it is edited. E.g. an update to spatial indexing will update the tracking.', + 'wheres' => [ + [ + 'value' => '', + 'operator' => '', + 'sql' => + "o.tracking <= #tracking_from#", + ], + ], + ], 'quality' => [ 'datatype' => 'lookup', 'display' => 'Quality', diff --git a/application/helpers/report_standard_params_samples.php b/application/helpers/report_standard_params_samples.php index 02ff8bfbc9..6f04d9994e 100644 --- a/application/helpers/report_standard_params_samples.php +++ b/application/helpers/report_standard_params_samples.php @@ -228,6 +228,36 @@ public static function getParameters() { array('value'=>'', 'operator'=>'', 'sql'=>"s.verified_on>now()-'#verified_date_age#'::interval") ) ), + 'tracking_from' => [ + 'datatype' => 'integer', + 'display' => 'First squential update ID to include', + 'description' => 'All record inserts and updates are given a sequential tracking ID. Filter by this to limit ' . + 'the range of records returned to a contiguous batch of updates. Tracking is updated when the record is ' . + 'affected in any way, not just when it is edited. E.g. an update to spatial indexing will update the tracking.', + 'wheres' => [ + [ + 'value' => '', + 'operator' => '', + 'sql' => + "s.tracking >= #tracking_from#", + ], + ], + ], + 'tracking_to' => [ + 'datatype' => 'integer', + 'display' => 'Last squential update ID to include', + 'description' => 'All record inserts and updates are given a sequential tracking ID. Filter by this to limit ' . + 'the range of records returned to a contiguous batch of updates. Tracking is updated when the record is ' . + 'affected in any way, not just when it is edited. E.g. an update to spatial indexing will update the tracking.', + 'wheres' => [ + [ + 'value' => '', + 'operator' => '', + 'sql' => + "s.tracking <= #tracking_from#", + ], + ], + ], 'quality' => array('datatype'=>'lookup', 'display'=>'Quality', 'description'=>'Minimum quality of records to include', 'lookup_values'=>'V:Accepted records only,P:Not reviewed,!D:Exclude queried or not accepted records,' . From 9c1c4bc2c283fa71bf04be0e9be25d3a91bfa2a4 Mon Sep 17 00:00:00 2001 From: John van Breda Date: Tue, 12 Feb 2019 19:46:13 +0000 Subject: [PATCH 08/10] Avoid unnecessary updates --- application/models/occurrence_comment.php | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/application/models/occurrence_comment.php b/application/models/occurrence_comment.php index 895138f0ff..3100f31b89 100644 --- a/application/models/occurrence_comment.php +++ b/application/models/occurrence_comment.php @@ -66,17 +66,16 @@ public function caption() /** * Implement an instant update of the cache occurrences queried field, so the verification UI * can report on in as changes are made. + * * @param $isInsert * @return bool */ public function postSubmit($isInsert) { if ($isInsert && $this->auto_generated!=='t' and $this->query==='t') { - $this->db->query("update cache_occurrences_functional set query='Q' where id={$this->occurrence_id}"); + $this->db->query("update cache_occurrences_functional set query='Q' where id={$this->occurrence_id} and query<>'Q'"); } - // answers don't need to be instant, just queries - return true; + // Answers don't need to be instant, just queries. + return TRUE; } - - } From 1f8d9f33afe8248e282018bf036d652cfa91f0bb Mon Sep 17 00:00:00 2001 From: John van Breda Date: Wed, 13 Feb 2019 09:56:30 +0000 Subject: [PATCH 09/10] Update autofeed mode to use tracking column Better than updated_on column as: * We can detect changes independently of updated_on (since changing updated_on unnecessarily fires notifications etc). * We never get a huge batch of records with the same tracking value, which previously would cause the autofeed to get stuck. --- .../rest_api/controllers/services/rest.php | 18 ++++++++++++++---- .../rest_api/libraries/RestApiResponse.php | 19 +++++++++++-------- 2 files changed, 25 insertions(+), 12 deletions(-) diff --git a/modules/rest_api/controllers/services/rest.php b/modules/rest_api/controllers/services/rest.php index 620c317757..ed8ffc51c4 100644 --- a/modules/rest_api/controllers/services/rest.php +++ b/modules/rest_api/controllers/services/rest.php @@ -1644,12 +1644,15 @@ private function loadReport($report, array $params) { // Find our state data for this feed. $afSettings = (array) variable::get("rest-autofeed-$_GET[proj_id]", ['mode' => 'notStarted'], FALSE); if ($afSettings['mode'] === 'notStarted') { - // First use of this autofeed, so we need to store the timepoint to + // First use of this autofeed, so we need to store the tracking point to // ensure we capture all changes after the initial sweep up of records // is done. Switch state to initial loading. + $lastTrackingInfo = $this->db + ->query('SELECT max(tracking) as max_tracking FROM cache_occurrences_functional') + ->current(); $afSettings = [ 'mode' => 'initialLoad', - 'last_date' => date('c'), + 'last_tracking_id' => $lastTrackingInfo->max_tracking, 'last_id' => 0, ]; variable::set("rest-autofeed-$_GET[proj_id]", $afSettings); @@ -1661,8 +1664,15 @@ private function loadReport($report, array $params) { } elseif ($afSettings['mode'] === 'updates') { // Doing updates of changes only as initial load done. - $params['last_date'] = $afSettings['last_date']; - $params['orderby'] = 'updated_on'; + if (isset($afSettings['last_date'])) { + // Last_date only used pre version 2.7, but may as well be polite. + $params['last_date'] = $afSettings['last_date']; + } + else { + // Start at one record after the last one we retrieved. + $params['tracking_from'] = $afSettings['last_tracking_id'] + 1; + } + $params['orderby'] = 'tracking'; } } if (!empty($this->resourceOptions['cached'])) { diff --git a/modules/rest_api/libraries/RestApiResponse.php b/modules/rest_api/libraries/RestApiResponse.php index 89001c6586..1e46e80b5c 100644 --- a/modules/rest_api/libraries/RestApiResponse.php +++ b/modules/rest_api/libraries/RestApiResponse.php @@ -725,10 +725,10 @@ private function succeedJson($data, $options, $autofeed) { echo ','; } elseif ($autofeed) { - // Capture the ID and update datestamp of the last row in the report, - // so we can autofeed the next batch. + // Capture the ID and update tracking ID of the last row in the + // report, so we can autofeed the next batch. $lastId = $row['id']; - $lastUpdate = $row['updated_on']; + $lastTrackingId = isset($row['tracking']) ? $row['tracking'] : 0; } } if ($autofeed) { @@ -744,18 +744,21 @@ private function succeedJson($data, $options, $autofeed) { $afSettings['mode'] = 'updates'; unset($afSettings['last_id']); } - elseif ($afSettings['mode'] === 'updates') { - // Whilst in updates only mode, we want to start the next batch at - // the same timestamp as the last batch finished so we get no gaps. - $afSettings['last_date'] = $lastUpdate; + elseif ($afSettings['mode'] === 'updates' && isset($lastTrackingId)) { + // Whilst in updates only mode, we want to start the next batch after + // the same tracking ID as the last batch finished so we get no gaps. + $afSettings['last_tracking_id'] = $lastTrackingId; } // Do not set the tracking variable if we have exceeded a time limit - // specified in the request. + // specified in the request. Otherwise a failure to process the batch + // on the client results in a batch being skipped. if (!isset($this->startTime) || !isset($_REQUEST['max_time']) || microtime(TRUE) - $this->startTime < $_REQUEST['max_time']) { variable::set("rest-autofeed-$_GET[proj_id]", $afSettings); } else { + // In this instance, we don't update the variable, so the next batch + // will be the same as this one. kohana::log('error', "Max time exceeded: $this->startTime - " . microtime(TRUE) . " is greater than $_REQUEST[max_time]"); } From 8b048ea78865d95f9e10ad03ab79dec7e762e930 Mon Sep 17 00:00:00 2001 From: John van Breda Date: Wed, 13 Feb 2019 10:07:20 +0000 Subject: [PATCH 10/10] Updated CHANGELOG --- CHANGELOG.md | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5c33442d76..d6565967da 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,25 @@ +# Version 2.7.0 +*2019-02-13 + +* Saving a record slightly faster, because ap square updates are done in a single update + statement rather than one per square size. +* Saving records also slightly faster due to reduced number of update statements that are + run on the cache tables. +* New tracking field to store an autogenerated sequential system unique update ID in + cache_occurrences_functional and cache_samples_functional. This makes it easier to + track any changes to the cache tables, e.g. for feeding changes through to other + systems such as Elasticsearch. It also makes change tracking of system generated + changes (such as spatial indexing) seperate to user instigated record changes so that + notifications are not unnecessarily generated for the former. +* New standard reporting parameters `tracking_from` and `tracking_to` for filtering on + change tracking IDs. +* REST API uses tracking data rather than updated_on field when using autofeed mode to + autogenerate a feed of updates. +* Spatial indexing no longer changes cache table updated_on fields therefore does not + fire notifications. +* Fixes a problem in the update kit with a missing class file + (https://github.com/Indicia-Team/warehouse/issues/315) + # Version 2.6.0 *2019-02-07*