From b84d307fa23a87f1c26cf92d149e121a31ba8224 Mon Sep 17 00:00:00 2001
From: Andrew van Breda <support@avb-it.co.uk>
Date: Tue, 8 Oct 2019 14:45:09 +0100
Subject: [PATCH 1/6] Plant Portal updater updated to match all the changes
 from the "normal" Indicia importer since the Plant Portal one was created
 (this is necessary as the plant portal one relies on the Indicia one to work,
 so breaks otherwise). Logging also removed as won't be needed going forward
 and I don't want to fill up logs.

---
 .../services/plant_portal_import.php          | 1388 +++++++++++++----
 1 file changed, 1071 insertions(+), 317 deletions(-)

diff --git a/modules/indicia_svc_plant_portal_import/controllers/services/plant_portal_import.php b/modules/indicia_svc_plant_portal_import/controllers/services/plant_portal_import.php
index 1f877edfd2..740811596b 100644
--- a/modules/indicia_svc_plant_portal_import/controllers/services/plant_portal_import.php
+++ b/modules/indicia_svc_plant_portal_import/controllers/services/plant_portal_import.php
@@ -14,28 +14,25 @@
  * You should have received a copy of the GNU General Public License
  * along with this program.  If not, see http://www.gnu.org/licenses/gpl.html.
  *
- * @package  Services
- * @subpackage Import
- * @author  Indicia Team
- * @license  http://www.gnu.org/licenses/gpl.html GPL
- * @link   http://code.google.com/p/indicia/
+ * @author Indicia Team
+ * @license http://www.gnu.org/licenses/gpl.html GPL
+ * @link http://code.google.com/p/indicia/
  */
- 
+
 defined('SYSPATH') or die('No direct script access.');
 
 /**
- * Controller class for plant portal import web services
- *
- * @package  Services
- * @subpackage Data
+ * Controller class for import web services.
  */
 class Plant_Portal_Import_Controller extends Service_Base_Controller {
-
   private $submissionStruct;
 
   /**
-   * @var array Parent model field details from the previous row. Allows us to efficiently use the same sample for
-   * multiple occurrences etc.
+   * Parent model field details from the previous row.
+   *
+   * Allows us to efficiently use the same sample for multiple occurrences etc.
+   *
+   * @var array
    */
   private $previousCsvSupermodel;
 
@@ -56,7 +53,7 @@ public function get_plant_portal_import_settings($model) {
       echo json_encode($model->fixed_values_form($options));
     }
   }
-  
+
   /**
    * Controller function that returns the list of importable fields for a model.
    * Accepts optional $_GET parameters for the website_id and survey_id, which limit the available
@@ -86,7 +83,7 @@ public function get_plant_portal_import_fields($model) {
     $use_associations = (empty($_GET['use_associations']) ? false : ($_GET['use_associations'] == "true" ? true : false));
     echo json_encode($model->getSubmittableFields(TRUE, $identifiers, $attrTypeFilter, $use_associations));
   }
-  
+
   /**
    * Controller function that returns the list of required fields for a model.
    * Accepts optional $_GET parameters for the website_id and survey_id, which limit the available
@@ -106,306 +103,730 @@ public function get_plant_portal_required_fields($model) {
     }
     echo json_encode($fields);
   }
-  
+
   /**
-   * Handle uploaded files in the $_FILES array by moving them to the upload folder. The current time is prefixed to the 
-   * name to make it unique. The uploaded file should be in a field called media_upload.
+   * List field combinations that can be used to locate existing records.
+   *
+   * Controller function that returns the list of combinations of fields that
+   * can be used to determine if a record already exists. Echoes JSON listing
+   * the fields that are required.
+   *
+   * @param string $modelName
+   *   Singular name of the model entity to check.
    */
-  public function upload_csv()
-  {
-    try
-    {
+  public function get_existing_record_options($modelName) {
+    $this->authenticate('read');
+    $model = ORM::factory($modelName);
+    $submissionStruct = $model->get_submission_structure();
+    $combinations = array();
+    if (isset($submissionStruct['superModels'])) {
+      foreach ($submissionStruct['superModels'] as $superModelName => $details) {
+        $superModel = ORM::factory($superModelName);
+        if (isset($superModel->importDuplicateCheckCombinations)) {
+          $combinations[$superModelName] = $superModel->importDuplicateCheckCombinations;
+        }
+      }
+    }
+    if (isset($model->importDuplicateCheckCombinations)) {
+      $combinations[$modelName] = $model->importDuplicateCheckCombinations;
+    }
+    echo json_encode($combinations);
+  }
+
+  /**
+   * Handle the upload of a CSV file.
+   *
+   * Handle uploaded files in the $_FILES array by moving them to the upload
+   * folder. The current time is prefixed to the  name to make it unique. The
+   * uploaded file should be in a field called media_upload.
+   */
+  public function upload_csv() {
+    try {
       // Ensure we have write permissions.
       $this->authenticate();
-      // We will be using a POST array to send data, and presumably a FILES array for the
-      // media.
-      // Upload size
+      // We will be using a POST array to send data, and presumably a FILES
+      // array for the media.
+      // Upload size.
       $ups = Kohana::config('indicia.maxUploadSize');
       $_FILES = Validation::factory($_FILES)->add_rules(
         'media_upload', 'upload::valid', 'upload::required',
         'upload::type[csv]', "upload::size[$ups]"
       );
-      if (count($_FILES)===0) {
+      if (count($_FILES) === 0) {
         echo "No file was uploaded.";
       }
-      elseif ($_FILES->validate())
-      {
-        if (array_key_exists('name_is_guid', $_POST) && $_POST['name_is_guid']=='true') 
+      elseif ($_FILES->validate()) {
+        if (array_key_exists('name_is_guid', $_POST) && $_POST['name_is_guid'] == 'true') {
           $finalName = strtolower($_FILES['media_upload']['name']);
-        else
-          $finalName = time().strtolower($_FILES['media_upload']['name']);
+        }
+        else {
+          $finalName = time() . strtolower($_FILES['media_upload']['name']);
+        }
         $fTmp = upload::save('media_upload', $finalName);
-        $this->response=basename($fTmp);
+        $this->response = basename($fTmp);
         $this->send_response();
-        kohana::log('debug', 'Successfully uploaded file to '. basename($fTmp));
+        kohana::log('debug', 'Successfully uploaded file to ' . basename($fTmp));
       }
-      else
-      {
-        kohana::log('error', 'Validation errors uploading file '. $_FILES['media_upload']['name']);
-        kohana::log('error', print_r($_FILES->errors('form_error_messages'), true));
+      else {
+        kohana::log('error', 'Validation errors uploading file ' . $_FILES['media_upload']['name']);
+        kohana::log('error', print_r($_FILES->errors('form_error_messages'), TRUE));
         foreach ($_FILES as $file) {
           if (!empty($file['error'])) {
             kohana::log('error', 'PHP reports file upload error: ' . $this->codeToMessage($file['error']));
           }
         }
-        Throw new ValidationError('Validation error', 2004, $_FILES->errors('form_error_messages'));
+        throw new ValidationError('Validation error', 2004, $_FILES->errors('form_error_messages'));
       }
     }
-    catch (Exception $e)
-    {
+    catch (Exception $e) {
       $this->handle_error($e);
     }
   }
-  
+
   /**
-   * Caches various metadata to do with the upload, including the upload mappings and the error count. This action 
-   * is called by the JavaScript code responsible for a chunked upload, before the upload actually starts.
+   * Store the upload process metadata.
+   *
+   * Caches various metadata to do with the upload, including the upload
+   * mappings and the error count. This action is called by the JavaScript
+   * code responsible for a chunked upload, before the upload actually starts.
    */
   public function cache_upload_metadata() {
     $this->authenticate();
     $metadata = array_merge($_POST);
-    if (isset($metadata['mappings']))
-      $metadata['mappings']=json_decode($metadata['mappings'], true);
-    if (isset($metadata['settings']))
-      $metadata['settings']=json_decode($metadata['settings'], true);
-    // the metadata can also hold auth tokens and user_id, though they do not need decoding.
-    self::internal_cache_upload_metadata($metadata);
+    if (isset($metadata['mappings'])) {
+      $metadata['mappings'] = json_decode($metadata['mappings'], TRUE);
+    }
+    if (isset($metadata['settings'])) {
+      $metadata['settings'] = json_decode($metadata['settings'], TRUE);
+    }
+    if (isset($metadata['existingDataLookups'])) {
+      $metadata['existingDataLookups'] = json_decode($metadata['existingDataLookups'], TRUE);
+    }
+    if (isset($metadata['importMergeFields'])) {
+      $metadata['importMergeFields'] = json_decode($metadata['importMergeFields'], TRUE);
+    }
+    if (isset($metadata['synonymProcessing'])) {
+      $metadata['synonymProcessing'] = json_decode($metadata['synonymProcessing'], TRUE);
+    }
+
+    // The metadata can also hold auth tokens and user_id, though they do not
+    // need decoding.
+    self::internalCacheUploadMetadata($metadata);
     echo "OK";
   }
-  
+
   private function codeToMessage($code) {
     switch ($code) {
       case UPLOAD_ERR_INI_SIZE:
         $message = "The uploaded file exceeds the upload_max_filesize directive in php.ini";
         break;
+
       case UPLOAD_ERR_FORM_SIZE:
         $message = "The uploaded file exceeds the MAX_FILE_SIZE directive that was specified in the HTML form";
         break;
+
       case UPLOAD_ERR_PARTIAL:
         $message = "The uploaded file was only partially uploaded";
         break;
+
       case UPLOAD_ERR_NO_FILE:
         $message = "No file was uploaded";
         break;
+
       case UPLOAD_ERR_NO_TMP_DIR:
         $message = "Missing a temporary folder";
         break;
+
       case UPLOAD_ERR_CANT_WRITE:
         $message = "Failed to write file to disk";
         break;
+
       case UPLOAD_ERR_EXTENSION:
         $message = "File upload stopped by extension";
         break;
+
       default:
         $message = "Unknown upload error";
         break;
     }
-      return $message;
-  } 
-  
+    return $message;
+  }
+
   /**
-   * Saves a set of metadata for an upload to a file, so it can persist across requests.
+   * Saves a set of metadata for an upload to a file.
+   *
+   * Allows the metadata to persist across requests.
    */
-  private function internal_cache_upload_metadata($metadata) {
-    $previous = self::_get_metadata($_GET['uploaded_csv']);
+  private function internalCacheUploadMetadata($metadata) {
+    $previous = self::getMetadata($_GET['uploaded_csv']);
     $metadata = array_merge($previous, $metadata);
-    $this->auto_render=false;
-    $mappingFile = str_replace('.csv','-metadata.txt',$_GET['uploaded_csv']);
+    $this->auto_render = FALSE;
+    $mappingFile = str_replace('.csv', '-metadata.txt', $_GET['uploaded_csv']);
     $mappingHandle = fopen(DOCROOT . "upload/$mappingFile", "w");
     fwrite($mappingHandle, json_encode($metadata));
     fclose($mappingHandle);
   }
-  
-  /*
-   * Determines if the provided module has been activated in the indicia configuration.
+
+  /**
+   * Saves a set of meanings for an upload to a file, so it can persist across requests.
+   * This is the mapping between the synonym identifier column and the indicia meaning id.
    */
-  private function _check_module_active($module)
-  {
-  	$config=kohana::config_load('core');
-  	foreach ($config['modules'] as $path) {
-  		if(strlen($path) >= strlen($module) &&
-  				substr_compare($path, $module , strlen($path)-strlen($module), strlen($module), true) === 0)
-  			return true;
-  	}
-  	return false;
+  private function cacheStoredMeanings($meanings) {
+    $previous = self::retrieveCachedStoredMeanings();
+    $metadata = array_merge($previous, $meanings);
+    $meaningsFile = str_replace('.csv', '-meanings.txt', $_GET['uploaded_csv']);
+    $meaningsHandle = fopen(DOCROOT . "upload/$meaningsFile", "w");
+    fwrite($meaningsHandle, json_encode($meanings));
+    fclose($meaningsHandle);
   }
-  
+
   /**
-   * Controller action that performs the import of data in an uploaded CSV file.
-   * Allows $_GET parameters to specify the filepos, offset and limit when uploading just a chunk at a time.
-   * This method is called to perform the entire upload when JavaScript is not enabled, or can 
-   * be called to perform part of an AJAX csv upload where only a part of the data is imported
-   * on each call.
+   * Internal function that retrieves the meanings for a CSV upload.
+   */
+  private function retrieveCachedStoredMeanings() {
+    $meaningsFile = DOCROOT . "upload/" . str_replace('.csv', '-meanings.txt', $_GET['uploaded_csv']);
+    if (file_exists($meaningsFile)) {
+      $meaningsHandle = fopen($meaningsFile, "r");
+      $meanings = fgets($meaningsHandle);
+      fclose($meaningsHandle);
+      return json_decode($meanings, TRUE);
+    }
+    else {
+      // No previous file, so create default new metadata.
+      return array();
+    }
+  }
+
+  /**
+   * Determines if the provided module has been activated in the configuration.
+   *
+   * @param string $module
+   *   Name of the module.
+   *
+   * @return bool
+   *   TRUE if the module is active.
+   */
+  private function checkModuleActive($module) {
+    $config = kohana::config_load('core');
+    foreach ($config['modules'] as $path) {
+      if (strlen($path) >= strlen($module) &&
+        substr_compare($path, $module, strlen($path) - strlen($module), strlen($module), TRUE) === 0
+      ) {
+        return TRUE;
+      }
+    }
+    return FALSE;
+  }
+
+  /**
+   * Controller action that performs the import of data in an uploaded file.
+   *
+   * Allows $_GET parameters to specify the filepos, offset and limit when
+   * uploading just a chunk at a time. This method is called to perform the
+   * entire upload when JavaScript is not enabled, or can be called to perform
+   * part of an AJAX csv upload where only a part of the data is imported on
+   * each call.
    * Requires a $_GET parameter for uploaded_csv - the uploaded file name.
    */
   public function upload() {
+    $allowCommitToDB = (isset($_GET['allow_commit_to_db']) ? $_GET['allow_commit_to_db'] : true);
     $csvTempFile = DOCROOT . "upload/" . $_GET['uploaded_csv'];
-    $metadata = $this->_get_metadata($_GET['uploaded_csv']);
+    $metadata = $this->getMetadata($_GET['uploaded_csv']);
     if (!empty($metadata['user_id'])) {
       global $remoteUserId;
       $remoteUserId = $metadata['user_id'];
     }
-    // Check if details of the last supermodel (e.g. sample for an occurrence) are in the cache from a previous iteration of 
-    // this bulk operation
-    $cache= Cache::instance();
+    // Check if details of the last supermodel (e.g. sample for an occurrence)
+    // are in the cache from a previous iteration of this bulk operation.
+    $cache = Cache::instance();
     $this->getPreviousRowSupermodel($cache);
-    // enable caching of things like language lookups
-    ORM::$cacheFkLookups = true;
-    // make sure the file still exists
-    if (file_exists($csvTempFile))
-    {
-      // Following helps for files from Macs
-      ini_set('auto_detect_line_endings',1);
-      // create the file pointer, plus one for errors
-      $handle = fopen ($csvTempFile, "r");
+    // Enable caching of things like language lookups.
+    ORM::$cacheFkLookups = TRUE;
+    // Make sure the file still exists.
+    if (file_exists($csvTempFile)) {
+      $tm = microtime(TRUE);
+      // Following helps for files from Macs.
+      ini_set('auto_detect_line_endings', 1);
+      $model = ORM::Factory($_GET['model']);
+      $supportsImportGuid = in_array('import_guid', array_keys($model->as_array()));
+      // Create the file pointer, plus one for errors.
+      $handle = fopen($csvTempFile, "r");
       $this->checkIfUtf8($metadata, $handle);
-      $errorHandle = $this->_get_error_file_handle($csvTempFile, $handle);
-      $count=0;
-      $limit = (isset($_GET['limit']) ? $_GET['limit'] : false);
+      $existingProblemColIdx = FALSE;
+      $existingErrorRowNoColIdx = FALSE;
+      $existingImportGuidColIdx = FALSE;
+      $errorHandle = $this->getErrorFileHandle($csvTempFile, $handle, $supportsImportGuid,
+        $existingProblemColIdx, $existingErrorRowNoColIdx, $existingImportGuidColIdx);
+      $count = 0;
+      $limit = (isset($_GET['limit']) ? $_GET['limit'] : FALSE);
       $filepos = (isset($_GET['filepos']) ? $_GET['filepos'] : 0);
       $offset = (isset($_GET['offset']) ? $_GET['offset'] : 0);
-      if ($filepos==0) {
-        // first row, so skip the header
+      if ($filepos == 0) {
+        // First row, so skip the header.
         fseek($handle, 0);
-        fgetcsv($handle, 1000, ",");
-        // also clear the lookup cache
+        fgetcsv($handle, 10000, ",");
+        // Also clear the lookup cache.
         $cache->delete_tag('lookup');
-      } else
-        // skip rows to allow for the last file position
+      }
+      else {
+        // Skip rows to allow for the last file position.
         fseek($handle, $filepos);
-      $model = ORM::Factory($_GET['model']);
+      }
       $this->submissionStruct = $model->get_submission_structure();
-      // special date processing.
-      $index = 0;
-      $dayColumn = false; 
-      $monthColumn = false;
-      $yearColumn = false;
-      foreach ($metadata['mappings'] as $col=>$attr) {
-      	// skip cols to do with remembered mappings
-      	if ($col!=='RememberAll' && substr($col, -9)!=='_Remember') {
-      		switch($attr) {
-      			case 'sample:date:day': $dayColumn = $index;
-      			case 'sample:date:month': $monthColumn = $index;
-      			case 'sample:date:year': $yearColumn = $index;
-      		}
-      		$index++;
-      	}
+
+      // Check if the conditions for special field processing are met - all the
+      // columns are in the mapping.
+      $specialFieldProcessing = array();
+      if (isset($model->specialImportFieldProcessingDefn)) {
+        foreach ($model->specialImportFieldProcessingDefn as $column => $defn) {
+          $columns = array();
+          $index = 0;
+          foreach ($metadata['mappings'] as $col => $attr) {
+            if ($col !== 'RememberAll' && substr($col, -9) !== '_Remember' && $col != 'AllowLookup') {
+              if (in_array($attr, $defn['columns'])) {
+                $columns[$attr] = TRUE;
+              }
+            }
+            $index++;
+          }
+          // The genus, specific name and qualifier are all merge fields.
+          // However the qualifier is not mandatory, so if a qualifier is not specified, we effectively tell the system it has been specified
+          // so that the system doesn't ask for it. Ideally this code should be generalised going forward.
+          if ((in_array('taxon:taxon:genus',$metadata['mappings']) || in_array('taxon:taxon:specific',$metadata['mappings'])) &&  !in_array('taxon:taxon:qualifier',$metadata['mappings'])) {
+            $columns['taxon:taxon:qualifier'] = TRUE;
+          }
+          if (count($defn['columns']) === count(array_keys($columns))) {
+            $specialFieldProcessing[$column] = TRUE;
+          }
+        }
+      }
+      $specialMergeProcessing = array();
+      if (isset($metadata['importMergeFields']) && is_string($metadata['importMergeFields'])) {
+        $metadata['importMergeFields'] = json_decode($metadata['importMergeFields'], TRUE);
+      }
+      if (isset($metadata['synonymProcessing']) && is_string($metadata['synonymProcessing'])) {
+        $metadata['synonymProcessing'] = json_decode($metadata['synonymProcessing'], TRUE);
       }
-      $processDate = $dayColumn !== false && $monthColumn !== false && $yearColumn !== false; // initially has to have all 3 fields: TODO vaguer dates?
-      while (($data = fgetcsv($handle, 1000, ",")) !== FALSE && ($limit===false || $count<$limit)) {
-        if(!array_filter($data))
-          // skip empty rows
+      if (isset($metadata['importMergeFields'])) {
+        // Only do the special merge processing if all the required fields are
+        // there, and if there are no required then if one of the optional ones
+        // are there.
+        foreach ($metadata['importMergeFields'] as $modelSpec) {
+          if (!isset($modelSpec['model']) || ($modelSpec['model'] = $_GET['model'])) {
+            foreach ($modelSpec['fields'] as $fieldSpec) {
+              $foundAllRequired = TRUE;
+              $foundOne = FALSE;
+              foreach ($fieldSpec['virtualFields'] as $subFieldSpec) {
+                if (in_array($fieldSpec['fieldName'] . ':' . $subFieldSpec['fieldNameSuffix'], $metadata['mappings'])) {
+                  $foundOne = TRUE;
+                }
+                elseif (isset($subFieldSpec['required']) && $subFieldSpec['required']) {
+                  $foundAllRequired = FALSE;
+                }
+              }
+              if ($foundOne && $foundAllRequired) {
+                $specialMergeProcessing[] = $fieldSpec;
+              }
+            }
+          }
+        }
+      }
+      $storedMeanings = self::retrieveCachedStoredMeanings();
+      while (($data = fgetcsv($handle, 10000, ",")) !== FALSE && ($limit === FALSE || $count < $limit)) {
+        if (!array_filter($data)) {
+          // Skip empty rows.
           continue;
+        }
+        // Can't just clear the model, as clear does not do a full reset -
+        // leaves related entries: important for location joinsTo websites.
+        $model = ORM::Factory($_GET['model']);
         $count++;
         $index = 0;
         $saveArray = $model->getDefaults();
-        // Note, the mappings will always be in the same order as the columns of the CSV file
-        foreach ($metadata['mappings'] as $col=>$attr) {
-          // skip cols to do with remembered mappings
-          if ($col!=='RememberAll' && substr($col, -9)!=='_Remember') {
+        // Note, the mappings will always be in the same order as the columns
+        // of the CSV file.
+        foreach ($metadata['mappings'] as $col => $attr) {
+          // Skip cols to do with remembered mappings.
+          if ($col !== 'RememberAll' && substr($col, -9) !== '_Remember' && $col != 'AllowLookup') {
             if (isset($data[$index])) {
-              // '<Please select>' is a value fixed in import_helper::model_field_options
-              if ($attr != '<Please select>' && $data[$index]!=='') {
-                // Add the data to the record save array. Utf8 encode if file does not have UTF8 BOM.
+              // '<Please select>' is a value fixed in
+              // import_helper::model_field_options.
+              if ($attr != '<Please select>' && $data[$index] !== '') {
+                // Add the data to the record save array. Utf8 encode if file
+                // does not have UTF8 BOM.
                 $saveArray[$attr] = $metadata['isUtf8'] ? $data[$index] : utf8_encode($data[$index]);
               }
-            } else {
-              // This is one of our static fields at the end
+            }
+            else {
+              // This is one of our static fields at the end.
               $saveArray[$col] = $attr;
             }
             $index++;
           }
         }
-
-        if((!isset($saveArray['sample:date']) || $saveArray['sample:date']=='') && $processDate) {
-        	$saveArray['sample:date'] = $data[$yearColumn].'-'.sprintf('%02d', $data[$monthColumn]).'-'.sprintf('%02d', $data[$dayColumn]); // initially has to have all 3 fields: TODO vaguer dates?
-      		unset($saveArray['sample:date:day']);
-      		unset($saveArray['sample:date:month']);
-      		unset($saveArray['sample:date:year']);
+        // The genus, specific name and qualifier are all merge fields.
+        // However the qualifier is not mandatory, so if a qualifier is not specified, we effectively tell the system it has been specified
+        // so that the system doesn't ask for it. Ideally this code should be generalised going forward.
+        if ((array_key_exists('taxon:taxon:genus',$saveArray) || array_key_exists('taxon:taxon:specific',$saveArray)) &&  !array_key_exists('taxon:taxon:qualifier',$saveArray)) {
+          $saveArray['taxon:taxon:qualifier'] = '';
         }
-        // copy across the fixed values, including the website id, into the data to save.
+        foreach (array_keys($specialFieldProcessing) as $col) {
+          if (!isset($saveArray[$col]) || $saveArray[$col] == '') {
+            $saveArray[$col] = vsprintf(
+              $model->specialImportFieldProcessingDefn[$col]['template'],
+              array_map(function ($column) use ($saveArray) {
+                return $saveArray[$column];
+              },
+              $model->specialImportFieldProcessingDefn[$col]['columns'])
+            );
+            foreach ($model->specialImportFieldProcessingDefn[$col]['columns'] as $column) {
+              unset($saveArray[$column]);
+            }
+          }
+        }
+        foreach ($specialMergeProcessing as $fieldSpec) {
+          $merge = array();
+          foreach ($fieldSpec['virtualFields'] as $subFieldSpec) {
+            $col = $fieldSpec['fieldName'] . ':' . $subFieldSpec['fieldNameSuffix'];
+            if (isset($saveArray[$col])) {
+              if ($saveArray[$col] !== '') {
+                $merge[] = (isset($subFieldSpec['dataPrefix']) ? $subFieldSpec['dataPrefix'] : '') .
+                  $saveArray[$col] .
+                  (isset($subFieldSpec['dataSuffix']) ? $subFieldSpec['dataSuffix'] : '');
+              }
+              unset($saveArray[$col]);
+            }
+          }
+          if (count($merge) > 0) {
+            $saveArray[$fieldSpec['fieldName']] = implode(str_replace('<newline>', "\r\n", (isset($fieldSpec['joiningString']) ? $fieldSpec['joiningString'] : '')), $merge);
+          }
+        }
+        // Copy across the fixed values, including the website id, into the
+        // data to save.
         if ($metadata['settings']) {
           $saveArray = array_merge($metadata['settings'], $saveArray);
         }
         if (!empty($saveArray['website_id'])) {
-          // automatically join to the website if relevant
+          // Sutomatically join to the website if relevant.
           if (isset($this->submissionStruct['joinsTo']) && in_array('websites', $this->submissionStruct['joinsTo'])) {
-            $saveArray['joinsTo:website:'.$saveArray['website_id']]=1;
-          }
-        }
-        // Check if in an association situation
-        $associationExists = false;
-        if (self::_check_module_active($this->submissionStruct['model'].'_associations')) {
-        	// assume model has attributes.
-        	$attrDetails = $model->get_attr_details();
-        	$associatedSuffix = '_2';
-         	$associatedRecordSubmissionStructure = $this->submissionStruct;
-        	$originalRecordPrefix      = $this->submissionStruct['model'];
-        	$originalAttributePrefix   = $attrDetails['attrs_field_prefix'];
-        	$originalMediaPrefix       = $originalRecordPrefix.'_media';
-        	$associatedRecordPrefix    = $originalRecordPrefix.$associatedSuffix;
-        	$associatedAttributePrefix = $originalAttributePrefix.$associatedSuffix;
-        	$associatedMediaPrefix     = $originalMediaPrefix.$associatedSuffix;
-        	$associationRecordPrefix   = $originalRecordPrefix.'_association';
-        	// find out if association or associated records exist
-        	foreach ($saveArray as $assocField=>$assocValue) {
-        		$associationExists = $associationExists ||
-        			substr($assocField, 0, strlen($associationRecordPrefix)) == $associationRecordPrefix ||
-        			substr($assocField, 0, strlen($associatedRecordPrefix)) == $associatedRecordPrefix;
-        	}
-        }
-        
-        // If posting a supermodel, are the details of the supermodel the same as for the previous CSV row? If so, we can link to that
-        // record rather than create a new supermodel record.
-        $updatedPreviousCsvSupermodelDetails=$this->checkForSameSupermodel($saveArray, $model, $associationExists);
+            $saveArray['joinsTo:website:' . $saveArray['website_id']] = 1;
+          }
+        }
+        if ($supportsImportGuid) {
+          if ($existingImportGuidColIdx === FALSE) {
+            // Save the import guid  in a field so the results of each
+            // individual upload can be grouped together. Relies on the model
+            // being imported into having a text field called import_guid
+            // otherwise it's just ignored.
+            $saveArray['import_guid'] = $metadata['guid'];
+          }
+          else {
+            // This is a reimport of error records which want to link back to
+            // the original import. So use the original GUID as supplied in the
+            // data rather than the uploaded file name.
+            $saveArray['import_guid'] = $data[$existingImportGuidColIdx];
+          }
+        }
+        // Check if in an association situation.
+        $associationExists = FALSE;
+        $originalRecordPrefix = $this->submissionStruct['model'];
+        $originalAttributePrefix = (isset($model->attrs_field_prefix) ? $model->attrs_field_prefix : '');
+        $originalMediaPrefix = $originalRecordPrefix . '_media';
+        if (self::checkModuleActive($this->submissionStruct['model'] . '_associations')) {
+          // Assume model has attributes.
+          $associatedSuffix = '_2';
+          $associatedRecordSubmissionStructure = $this->submissionStruct;
+          $associatedRecordPrefix = $originalRecordPrefix . $associatedSuffix;
+          $associatedAttributePrefix = $originalAttributePrefix . $associatedSuffix;
+          $associatedMediaPrefix = $originalMediaPrefix . $associatedSuffix;
+          $associationRecordPrefix = $originalRecordPrefix . '_association';
+          // Find out if association or associated records exist - do this if a
+          // species lookup value is filled in.
+          // This restricts to occurrence/taxa associations.
+          foreach ($saveArray as $assocField => $assocValue) {
+            $associationExists = $associationExists || (!empty($assocValue) &&
+                preg_match("/^$associatedRecordPrefix:fk_taxa_taxon_list/", $assocField));
+          }
+        }
         // Clear the model, so nothing else from the previous row carries over.
         $model->clear();
-        // Save the record with automatically generated spatial reference from the Vice County/Country where needed
-        $saveArray=self::auto_generate_grid_references($saveArray);
-        kohana::log('debug', 'Plant Portal - Warehouse is going to save this submission '.print_r($saveArray,true));
-        $model->set_submission_data($saveArray, true);         
-        $associationExists = false;
-        if (($id = $model->submit()) == null) {
-          // Record has errors - now embedded in model, so dump them into the error file
-          $errors = array();
-          foreach($model->getAllErrors() as $field=>$msg) {
-            $fldTitle = array_search($field, $metadata['mappings']);
-            $fldTitle = $fldTitle ? $fldTitle : $field;
-            $errors[] = "$fldTitle: $msg";
-          }
-          $errors = implode("\n", array_unique($errors));
-          $data[] = $errors;
-          $data[] = $count + $offset + 1; // 1 for header
-          fputcsv($errorHandle, $data);
-          kohana::log('debug', 'Failed to import CSV row: '.$errors);
-          $metadata['errorCount'] = $metadata['errorCount'] + 1;
-        } else {
-          // now the record has successfully posted, we need to store the details of any new supermodels and their Ids, 
-          // in case they are duplicated in the next csv row.
-          $this->previousCsvSupermodel['details'] = array_merge($this->previousCsvSupermodel['details'], $updatedPreviousCsvSupermodelDetails);
-          $this->captureSupermodelIds($model, $associationExists);
-        }
-        // get file position here otherwise the fgetcsv in the while loop will move it one record too far. 
+        // If main model ID is set in imported data, then we MUST be importing
+        // into an existing record. Import file can't create arbitrary new IDs.
+        $mustExist = FALSE;
+        if (!empty($saveArray["$_GET[model]:id"])) {
+          // Test that if an import contains an ID for the main model, that it
+          // is used for a lookup of existing records.
+          $ok = FALSE;
+          if (!empty($metadata['mappings']['lookupSelect' . $_GET['model']])) {
+            $lookupFields = json_decode($metadata['mappings']['lookupSelect' . $_GET['model']]);
+            foreach ($lookupFields as $lookupField) {
+              if ($lookupField->fieldName === "$_GET[model]:id") {
+                $ok = TRUE;
+              }
+            }
+          }
+          if (!$ok) {
+            $this->logError(
+              $data, 'ID specified in import row but not being used to lookup an existing record.',
+              $existingProblemColIdx, $existingErrorRowNoColIdx,
+              $errorHandle, $count + $offset + 1,
+              $supportsImportGuid && $existingImportGuidColIdx === FALSE ? $metadata['guid'] : '',
+              $metadata
+            );
+            // Get file position here otherwise the fgetcsv in the while loop
+            // will move it one record too far.
+            $filepos = ftell($handle);
+            continue;
+          }
+          $mustExist = TRUE;
+        }
+        // If a possible previous record, attempt to find the relevant IDs.
+        if (isset($metadata['mappings']['lookupSelect' . $_GET['model']]) && $metadata['mappings']['lookupSelect' . $_GET['model']] !== '') {
+          try {
+            self::mergeExistingRecordIds($_GET['model'], $originalRecordPrefix, $originalAttributePrefix, '', $metadata,
+              $mustExist, $model, $saveArray);
+          }
+          catch (Exception $e) {
+            $this->logError(
+              $data, $e->getMessage(),
+              $existingProblemColIdx, $existingErrorRowNoColIdx,
+              $errorHandle, $count + $offset + 1,
+              $supportsImportGuid && $existingImportGuidColIdx === FALSE ? $metadata['guid'] : '',
+              $metadata
+            );
+            // Get file position here otherwise the fgetcsv in the while loop
+            // will move it one record too far.
+            $filepos = ftell($handle);
+            continue;
+          }
+        }
+        // Check if we can use a existing data relationship to workmout if this
+        // is a new or old record. If posting a supermodel, are the details of
+        // the supermodel the same as for the previous CSV row? If so, we can
+        // link to that record rather than create a new supermodel record.
+        // If not, then potentially lookup existing record in the database.
+        $updatedPreviousCsvSupermodelDetails = $this->checkForSameSupermodel($saveArray, $model, $associationExists, $metadata);
+        if ($associationExists && isset($metadata['mappings']['lookupSelect' . $associatedRecordPrefix]) && $metadata['mappings']['lookupSelect' . $associatedRecordPrefix] !== '') {
+          $assocModel = ORM::Factory($_GET['model']);
+          self::mergeExistingRecordIds($_GET['model'], $associatedRecordPrefix, $associatedAttributePrefix, $associatedSuffix,
+            $metadata, FALSE, $assocModel, $saveArray);
+          if (isset($saveArray[$originalRecordPrefix . ':id']) && isset($saveArray[$associatedRecordPrefix . ':id'])) {
+            $assocModel = ORM::Factory($associationRecordPrefix)
+              ->where([
+                'from_' . $_GET['model'] . '_id' => $saveArray[$originalRecordPrefix . ':id'],
+                'to_' . $_GET['model'] . '_id' => $saveArray[$associatedRecordPrefix . ':id'],
+                'association_type_id' => $saveArray[$associationRecordPrefix . ':association_type_id'],
+                'deleted' => 'f',
+              ])->find();
+            if ($assocModel->loaded === TRUE) {
+              $saveArray[$associationRecordPrefix . ':id'] = $assocModel->id;
+            }
+          }
+        }
+
+        // Save the record.
+        $model->set_submission_data($saveArray, TRUE);
+        /*
+        At this point, if model has associations (i.e. a module is active
+        called <modelSingular>_associations) we flip the submission so the
+        model becomes the subModel. This way we can bolt any second
+        associated record in, into the submodel array.
+         */
+        // GvB TODO alter automatic mappings to set up secondary occurrences
+        // correctly.
+        if ($associationExists && isset($model->submission['superModels']) &&
+          is_array($model->submission['superModels']) &&
+          count($model->submission['superModels']) === 1
+        ) {
+          // We are assuming only one superModel, which must exist at this
+          // point. Use key 'record1' into the subModel array so association
+          // record knows which is which.
+          // We are using the previously wrapped superModel.
+          unset($associatedRecordSubmissionStructure['superModels']);
+          // Flip then bolt in as second submodel to the supermodel using key
+          // 'record2'.
+          $submissionData = $model->submission;
+          $superModelSubmission = $submissionData['superModels'][0]['model'];
+          $superModelFK = $submissionData['superModels'][0]['fkId'];
+          $superModel = ORM::Factory($superModelSubmission['id']);
+          $superModel->clear();
+          unset($submissionData['superModels']);
+          // Try to wrap second record of original model.
+          // As the submission builder needs a 1-1 match between field prefix
+          // and model name, we need to generate an altered saveArray.
+          $associatedArray = array();
+          foreach ($saveArray as $fieldname => $value) {
+            $parts = explode(':', $fieldname);
+            // Filter out original model feilds, any of its attributes and
+            // media records.
+            if ($parts[0] != $originalRecordPrefix &&
+              $parts[0] != $originalAttributePrefix &&
+              $parts[0] != $originalMediaPrefix
+            ) {
+              if ($parts[0] == $associatedRecordPrefix) {
+                $parts[0] = $originalRecordPrefix;
+              }
+              else {
+                if ($parts[0] == $associatedAttributePrefix) {
+                  $parts[0] = $originalAttributePrefix;
+                }
+                else {
+                  if ($parts[0] == $associatedMediaPrefix) {
+                    $parts[0] = $originalMediaPrefix;
+                  }
+                }
+              }
+              $associatedArray[implode(':', $parts)] = $value;
+            }
+          }
+          $associatedSubmission = submission_builder::build_submission($associatedArray, $associatedRecordSubmissionStructure);
+          // Map fk_* fields to the looked up id.
+          $associatedSubmission = $model->getFkFields($associatedSubmission, $associatedArray);
+          // Wrap the association and bolt in as a submodel of original model,
+          // using '||record2||' pointer.
+          $association = ORM::Factory($associationRecordPrefix);
+          $association->set_submission_data($saveArray, TRUE);
+          if (!isset($association->submission['fields']['to_' . $associatedRecordSubmissionStructure['model'] . '_id'])) {
+            $association->submission['fields']['to_' . $associatedRecordSubmissionStructure['model'] . '_id'] = array('value' => '||record2||');
+          }
+          $submissionData['subModels'] = array(
+            array(
+              'fkId' => 'from_' . $associatedRecordSubmissionStructure['model'] . '_id',
+              'model' => $association->submission,
+            ),
+          );
+          $superModelSubmission['subModels'] =
+            array(
+              'record1' => array('fkId' => $superModelFK, 'model' => $submissionData),
+              'record2' => array('fkId' => $superModelFK, 'model' => $associatedSubmission),
+            );
+          $superModel->submission = $superModelSubmission;
+          $modelToSubmit = $superModel;
+        }
+        else {
+          $associationExists = FALSE;
+          $modelToSubmit = $model;
+        }
+        $mainOrSynonym = FALSE;
+        $error = FALSE;
+        if (isset($metadata['synonymProcessing']) && isset($metadata['synonymProcessing']['separateSynonyms']) &&
+            $metadata['synonymProcessing']['separateSynonyms'] && isset($saveArray['synonym:tracker'])) {
+          $mainOrSynonym = "maybe";
+          $modelToSubmit->process_synonyms = FALSE;
+          if (isset($metadata['synonymProcessing']['synonymValues'])) {
+            foreach ($metadata['synonymProcessing']['synonymValues'] as $synonymValue) {
+              if ($saveArray['synonym:tracker'] === $synonymValue) {
+                $mainOrSynonym = "synonym";
+              }
+            }
+          }
+          if (isset($metadata['synonymProcessing']['mainValues'])) {
+            foreach ($metadata['synonymProcessing']['mainValues'] as $mainValue) {
+              if ($saveArray['synonym:tracker'] === $mainValue) {
+                $mainOrSynonym = "main";
+              }
+            }
+          }
+          if (!isset($saveArray['synonym:identifier']) || $saveArray['synonym:identifier'] === '') {
+            $error = "Could not identify field to group synonyms with.";
+          }
+          if ($mainOrSynonym === "maybe") {
+            $error = "Could not identify whether record is main record or synonym : " . $saveArray['synonym:tracker'];
+          }
+        }
+        if (!$error && $mainOrSynonym === "synonym") {
+          $modelToSubmit->submission['fields']['preferred']['value'] = 'f';
+          if (array_key_exists($saveArray['synonym:identifier'], $storedMeanings)) {
+            // Meaning is held on supermodel.
+            foreach ($modelToSubmit->submission['superModels'] as $idx => $superModel) {
+              if ($superModel['model']['id'] == 'taxon_meaning' && !isset($superModel['model']['fields']['id'])) {
+                $modelToSubmit->submission['superModels'][$idx]['model']['fields']['id'] = array(
+                  'value' => $storedMeanings[$saveArray['synonym:identifier']],
+                );
+              }
+            }
+          }
+          else {
+            $error = "Synonym appears in file before equivalent main record : " . $saveArray['synonym:identifier'];
+          }
+        }
+        if (!$error && $mainOrSynonym === "main") {
+          if (array_key_exists($saveArray['synonym:identifier'], $storedMeanings)) {
+            $error = "Main record appears more than once : " . $saveArray['synonym:identifier'];
+          }
+        }
+        if (!$error) {
+          if (($id = $modelToSubmit->submit()) == NULL) {
+            // Record has errors - now embedded in model, so dump them into the
+            // error file.
+            $errors = array();
+            foreach ($modelToSubmit->getAllErrors() as $field => $msg) {
+              $fldTitle = array_search($field, $metadata['mappings']);
+              $fldTitle = $fldTitle ? $fldTitle : $field;
+              $errors[] = "$fldTitle: $msg";
+            }
+            $errors = implode("\n", array_unique($errors));
+            $this->logError(
+              $data, $errors,
+              $existingProblemColIdx, $existingErrorRowNoColIdx,
+              $errorHandle, $count + $offset + 1,
+              $supportsImportGuid && $existingImportGuidColIdx === FALSE ? $metadata['guid'] : '',
+              $metadata
+            );
+          }
+          else {
+            // Now the record has successfully posted, we need to store the
+            // details of any new supermodels and their Ids, in case they are
+            // duplicated in the next csv row.
+            $this->previousCsvSupermodel['details'] = array_merge($this->previousCsvSupermodel['details'], $updatedPreviousCsvSupermodelDetails);
+            $this->captureSupermodelIds($modelToSubmit, $associationExists);
+            if ($mainOrSynonym === "main") {
+              // In case of a main record, store the meaning id.
+              $storedMeanings[$saveArray['synonym:identifier']] = $this->previousCsvSupermodel['id']['taxon_meaning'];
+            }
+          }
+        }
+        else {
+          $error = "Could not identify whether record is main record or synonym : " . $saveArray['synonym:tracker'];
+          $this->logError(
+            $data, $error,
+            $existingProblemColIdx, $existingErrorRowNoColIdx,
+            $errorHandle, $count + $offset + 1,
+            $supportsImportGuid && $existingImportGuidColIdx === FALSE ? $metadata['guid'] : '',
+            $metadata
+          );
+        }
+        // Get file position here otherwise the fgetcsv in the while loop will
+        // move it one record too far.
         $filepos = ftell($handle);
       }
-      // Get percentage progress
+      // Get percentage progress.
       $progress = $filepos * 100 / filesize($csvTempFile);
       $r = "{\"uploaded\":$count,\"progress\":$progress,\"filepos\":$filepos}";
-      // allow for a JSONP cross-site request
+      // Allow for a JSONP cross-site request.
       if (array_key_exists('callback', $_GET)) {
-        $r = $_GET['callback']."(".$r.")";
+        $r = $_GET['callback'] . "(" . $r . ")";
       }
       echo $r;
       fclose($handle);
       fclose($errorHandle);
-      self::internal_cache_upload_metadata($metadata);
-      
-      // An AJAX upload request will just receive the number of records uploaded and progress
-      $this->auto_render=false;      
-      $cache->set(basename($csvTempFile).'previousSupermodel', $this->previousCsvSupermodel);      
+      self::internalCacheUploadMetadata($metadata);
+      self::cacheStoredMeanings($storedMeanings);
+
+      // An AJAX upload request will just receive the number of records
+      // uploaded and progress.
+      $this->auto_render = FALSE;
+      if (!empty($allowCommitToDB)&&$allowCommitToDB==true) {
+        $cache->set(basename($csvTempFile) . 'previousSupermodel', $this->previousCsvSupermodel);
+      }
+      if (class_exists('request_logging')) {
+        request_logging::log('i', 'import', NULL, 'upload',
+          empty($saveArray['website_id']) ? NULL : $saveArray['website_id'],
+          security::getUserId(), $tm);
+      }
     }
   }
-  
+
   /*
    * Create new plots with data passed in from the website
    */
@@ -415,9 +836,6 @@ public function create_new_plots() {
     $plotNames = (isset($_GET['plotNames']) ? $_GET['plotNames'] : false);
     $plotSrefs = (isset($_GET['plotSrefs']) ? $_GET['plotSrefs'] : false);
     $plotSrefSystems = (isset($_GET['plotSrefSystems']) ? $_GET['plotSrefSystems'] : false);
-    kohana::log('debug', 'Plant Portal - Warehouse has received these new plot names to process '.print_r($plotNames,true));
-    kohana::log('debug', 'Plant Portal - Warehouse has received these new plot spatial references to process '.print_r($plotSrefs,true));
-    kohana::log('debug', 'Plant Portal - Warehouse has received these new plot spatial reference systems to process '.print_r($plotSrefSystems,true));
     $plotLocationType = (isset($_GET['plotLocationType']) ? $_GET['plotLocationType'] : false);
     $websiteId = (isset($_GET['websiteId']) ? $_GET['websiteId'] : false);
     $userId = (isset($_GET['userId']) ? $_GET['userId'] : false);
@@ -492,7 +910,6 @@ public function create_new_plots() {
   public function create_new_groups() {
     $db = new Database();
     $groupNames = (isset($_GET['names']) ? $_GET['names'] : false);
-    kohana::log('debug', 'Plant Portal - Warehouse has received these new plot groups names to process '.print_r($groupNames,true));
     //Groups names set in batches, these are comma separated so explode them to deal with them
     $explodedGroupNames = explode(',',$groupNames);
     $userId = (isset($_GET['userId']) ? $_GET['userId'] : false);
@@ -698,180 +1115,517 @@ private static function auto_generate_grid_references($saveArray) {
     }
     return $saveArray;
   }
-  
+
   /**
-   * Display the end result of an upload. Either displayed at the end of a non-AJAX upload, or redirected
-   * to directly by the AJAX code that is performing a chunked upload when the upload completes.
-   * Requires a get parameter for the uploaded_csv filename.
-   * @return string JSON containing the problems cound and error file name.
+   * Adds an error to the error log file.
+   */
+  private function logError(
+      $data,
+      $error,
+      $existingProblemColIdx,
+      $existingErrorRowNoColIdx,
+      $errorHandle,
+      $total,
+      $importGuidToAppend,
+      &$metadata) {
+    if ($existingProblemColIdx === FALSE) {
+      $data[] = $error;
+    }
+    else {
+      $data[$existingProblemColIdx] = $error;
+    }
+    if ($existingErrorRowNoColIdx === FALSE) {
+      // + 1 for header.
+      $data[] = $total;
+    }
+    else {
+      $data[$existingErrorRowNoColIdx] = $total;
+    }
+    if ($importGuidToAppend) {
+      $data[] = $importGuidToAppend;
+    }
+    fputcsv($errorHandle, $data);
+    kohana::log('debug', 'Failed to import CSV row: ' . $error);
+    $metadata['errorCount'] = $metadata['errorCount'] + 1;
+  }
+
+  /**
+   * If there is an existing record to lookup, merge its IDs with the data row.
+   */
+  private function mergeExistingRecordIds(
+      $modelName,
+      $fieldPrefix,
+      $attrPrefix,
+      $assocSuffix,
+      $metadata,
+      $mustExist,
+      &$model,
+      &$saveArray,
+      $setSupermodel = FALSE) {
+    $join = "";
+    $table = inflector::plural($modelName);
+    $fields = json_decode($metadata['mappings']['lookupSelect' . $fieldPrefix]);
+    $fields = array_map(
+      function ($field) {
+        return $field->fieldName;
+      }, $fields);
+    $join = self::buildJoin($fieldPrefix,$fields,$table,$saveArray); 
+    $wheres = $model->buildWhereFromSaveArray($saveArray, $fields, "(" . $table . ".deleted = 'f')", $in, $assocSuffix);
+    if ($wheres !== FALSE) {
+      $db = Database::instance();
+      // Have to use a db as this may have a join.
+      $existing = $db->query("SELECT $table.id FROM $table $join WHERE " . $wheres)->result_array(FALSE);
+      if (count($existing) > 0) {
+        // If an previous record exists, we have to check for existing
+        // attributes.
+        // Note this only works properly on single value attributes.
+        $saveArray[$fieldPrefix . ':id'] = $existing[0]['id'];
+        if (isset($model->attrs_field_prefix)) {
+          if ($setSupermodel) {
+            $this->previousCsvSupermodel['attributeIds'][$modelName] = array();
+          }
+          $attributes = ORM::Factory($modelName . '_attribute_value')
+            ->where(array($modelName . '_id' => $existing[0]['id'], 'deleted' => 'f'))->find_all();
+          foreach ($attributes as $attribute) {
+            if ($setSupermodel) {
+              $this->previousCsvSupermodel['attributeIds'][$modelName][$attribute->__get($modelName . '_attribute_id')] = $attribute->id;
+            }
+            if (isset($saveArray[$attrPrefix . ':' . $attribute->__get($modelName . '_attribute_id')])) {
+              $saveArray[$attrPrefix . ':' . $attribute->__get($modelName . '_attribute_id') . ':' . $attribute->id] =
+                $saveArray[$attrPrefix . ':' . $attribute->__get($modelName . '_attribute_id')];
+              unset($saveArray[$attrPrefix . ':' . $attribute->__get($modelName . '_attribute_id')]);
+            }
+            elseif (isset($saveArray[$attrPrefix . ':fk_' . $attribute->__get($modelName . '_attribute_id')])) {
+              $saveArray[$attrPrefix . ':fk_' . $attribute->__get($modelName . '_attribute_id') . ':' . $attribute->id] =
+                $saveArray[$attrPrefix . ':fk_' . $attribute->__get($modelName . '_attribute_id')];
+              unset($saveArray[$attrPrefix . ':fk_' . $attribute->__get($modelName . '_attribute_id')]);
+            }
+          }
+        }
+      }
+      elseif ($mustExist) {
+        throw new Exception('Importing an existing ID but the row does not already exist.');
+      }
+    }
+  }
+
+  /*
+   * Need to build a join so the system works correctly when importing taxa with update existing records selected.
+   * e.g. a problematic scenario would happen if importing new taxa but the external key/search code is still selected 
+   * for existing record update, in this case without building a join, the system would keep overwriting the previous record
+   * as each new one is imported (as it wasn't checking the search code/external key, the final result would be that only one row would import).
+   * Note this function might need improving/generalising for other models, although I did check occurrence/sample import which
+   * did not seem to show the same issue.
+   */
+  public static function buildJoin($fieldPrefix,$fields,$table,$saveArray) {
+    $r = '';
+    if (!empty($saveArray['taxon:external_key']) && $table=='taxa_taxon_lists') {
+      $r = "join taxa t on t.id = ".$table.".taxon_id AND t.external_key='".$saveArray['taxon:external_key']."' AND t.deleted=false";
+    }
+    elseif (!empty($saveArray['taxon:search_code']) && $table=='taxa_taxon_lists') {
+      $r = "join taxa t on t.id = ".$table.".taxon_id AND t.search_code='".$saveArray['taxon:search_code']."' AND t.deleted=false";
+    } 
+    return $r;
+  }
+
+  /**
+   * Display the end result of an upload.
+   *
+   * Either displayed at the end of a non-AJAX upload, or redirected to
+   * directly by the AJAX code that is performing a chunked upload when the
+   * upload completes. Requires a get parameter for the uploaded_csv filename.
+   *
+   * Echoes JSON containing the problems cound and error file name.
    */
   public function get_upload_result() {
     $this->authenticate('read');
-    $metadataFile = str_replace('.csv','-metadata.txt', $_GET['uploaded_csv']);    
-    $errorFile = str_replace('.csv','-errors.csv',$_GET['uploaded_csv']);
-    $metadata = $this->_get_metadata($_GET['uploaded_csv']);
-    echo json_encode(array('problems'=>$metadata['errorCount'], 'file' => url::base().'upload/'.basename($errorFile)));
-    // clean up the uploaded file and mapping file, but only remove the error file if no errors, otherwise we make it downloadable
-    if (file_exists(DOCROOT . "upload/" . $_GET['uploaded_csv']))
+    $metadataFile = str_replace('.csv', '-metadata.txt', $_GET['uploaded_csv']);
+    $errorFile = str_replace('.csv', '-errors.csv', $_GET['uploaded_csv']);
+    $metadata = $this->getMetadata($_GET['uploaded_csv']);
+    echo json_encode(array(
+      'problems' => $metadata['errorCount'],
+      'file' => url::base() . 'upload/' . basename($errorFile),
+    ));
+    // Clean up the uploaded file and mapping file, but only remove the error
+    // file if no errors, otherwise we make it downloadable.
+    if (file_exists(DOCROOT . "upload/" . $_GET['uploaded_csv'])) {
       unlink(DOCROOT . "upload/" . $_GET['uploaded_csv']);
-    if (file_exists(DOCROOT . "upload/" . $metadataFile))
+    }
+    if (file_exists(DOCROOT . "upload/" . $metadataFile)) {
       unlink(DOCROOT . "upload/" . $metadataFile);
-    if ($metadata['errorCount'] == 0 && file_exists(DOCROOT . "upload/" . $errorFile))
+    }
+    if ($metadata['errorCount'] == 0 && file_exists(DOCROOT . "upload/" . $errorFile)) {
       unlink(DOCROOT . "upload/" . $errorFile);
-    // clean up cached lookups
-    $cache= Cache::instance();
+    }
+    // Clean up cached lookups.
+    $cache = Cache::instance();
     $cache->delete_tag('lookup');
   }
-  
+
   /**
-   * When looping through csv import data, if the import data includes a supermodel (e.g. the sample for an occurrence)
-   * then this method checks to see if the supermodel part of the submission is repeated. If so, then rather than create
-   * a new record for the supermodel, we just link this new record to the existing supermodel record. E.g. a spreadsheet
-   * containing several occurrences in a single sample can repeat the sample details but only one sample gets created.
-   * BUT, there are situations (like building an association based submission) where we need to keep the structure, in which
-   * case we just set the id, rather than remove all the supermodel entries.
+   * Checks for matching supermodels (e.g. samples) between rows.
+   *
+   * When looping through csv import data, if the import data includes a
+   * supermodel (e.g. the sample for an occurrence) then this method checks to
+   * see if the supermodel part of the submission is repeated. If so, then
+   * rather than create a new record for the supermodel, we just link this new
+   * record to the existing supermodel record. E.g. a spreadsheet containing
+   * several occurrences in a single sample can repeat the sample details but
+   * only one sample gets created. BUT, there are situations (like building an
+   * association based submission) where we need to keep the structure, in
+   * which case we just set the id, rather than remove all the supermodel
+   * entries.
    */
-  private function checkForSameSupermodel(&$saveArray, $model, $linkOnly = false) {
+  private function checkForSameSupermodel(&$saveArray, $model, $linkOnly = FALSE, $metadata = array()) {
     $updatedPreviousCsvSupermodelDetails = array();
     if (isset($this->submissionStruct['superModels'])) {
-      // loop through the supermodels
-      foreach($this->submissionStruct['superModels'] as $modelName=>$modelDetails) {
-        // meaning models do not get shared across rows - we always generate a new meaning ID.
-        if ($modelName=='taxon_meaning' || $modelName=='meaning') 
+      // Loop through the supermodels.
+      foreach ($this->submissionStruct['superModels'] as $modelName => $modelDetails) {
+        // Meaning models do not get shared across rows - we always generate a
+        // new meaning ID.
+        if ($modelName == 'taxon_meaning' || $modelName == 'meaning') {
           continue;
+        }
         $sm = ORM::factory($modelName);
-        $smAttrsPrefix = isset($sm->attrs_field_prefix) ? $sm->attrs_field_prefix : null;
-        // look for data in that supermodel and build something we can use for comparison. We must capture both normal and custom attributes.
-        $hash='';
-        foreach ($saveArray as $field=>$value) {          
-          if (substr($field, 0, strlen($modelName)+1)=="$modelName:")            
-            $hash.="$field|$value|";
-          elseif ($smAttrsPrefix && substr($field, 0, strlen($smAttrsPrefix)+1)=="$smAttrsPrefix:")          
-            $hash.="$field|$value|";          
-        }
-        // if we have previously stored a hash for this supermodel, check if they are the same. If so we can get the ID.
-        if (isset($this->previousCsvSupermodel['details'][$modelName]) && $this->previousCsvSupermodel['details'][$modelName]==$hash) {
-          // the details for this supermodel point to an existing record, so we need to re-use it. 
-          if($linkOnly) {
-          	// now link the existing supermodel record to the save array
-          	$saveArray[$modelName.':id'] = $this->previousCsvSupermodel['id'][$modelName];
-          } else {
-            // First, remove the data from the submission array so we don't re-submit it.
-            foreach ($saveArray as $field=>$value) {
-              if (substr($field, 0, strlen($modelName)+1)=="$modelName:")
+        $smAttrsPrefix = isset($sm->attrs_field_prefix) ? $sm->attrs_field_prefix : NULL;
+        // We are going to build a hash which uniquely identifies everything we
+        // know about the current row's supermodel, so we can detect if it
+        // changes between rows.
+        $hashArray = [];
+        // If updating an existing record, then the comparison with supermodel
+        // data must include the existing supermodel's key as the import data
+        // is only partial.
+        if (!empty($saveArray[$model->object_name . ':id'])) {
+          $existing = ORM::factory($model->object_name, $saveArray[$model->object_name . ':id']);
+          $hashArray[$modelDetails['fk']] = $existing->{$modelDetails['fk']};
+        }
+        // Look for new import values related to this supermodel to include in
+        // our comparison. We must capture both normal and custom attributes.
+        foreach ($saveArray as $field => $value) {
+          if (substr($field, 0, strlen($modelName) + 1) === "$modelName:"
+              || $smAttrsPrefix && substr($field, 0, strlen($smAttrsPrefix) + 1) === "$smAttrsPrefix:") {
+            $hashArray[preg_replace("/^$modelName:/", '', $field)] = $value;
+          }
+        }
+        $hash = '';
+        // Convert the hash data into a key string we can store and compare.
+        foreach ($hashArray as $field => $value) {
+          $hash .= "$field|$value|";
+        }
+        // If we have previously stored a hash for this supermodel, check if
+        // they are the same. If so we can get the ID.
+        if (isset($this->previousCsvSupermodel['details'][$modelName]) && $this->previousCsvSupermodel['details'][$modelName] == $hash) {
+          // The details for this supermodel point to an existing record, so we
+          // need to re-use it.
+          if ($linkOnly) {
+            // Now link the existing supermodel record to the save array.
+            $saveArray[$modelName . ':id'] = $this->previousCsvSupermodel['id'][$modelName];
+            if (isset($sm->attrs_field_prefix)) {
+              if (!isset($this->previousCsvSupermodel['attributeIds'][$modelName])) {
+                // Only fetch supermodel attribute data now as this is first
+                // time it is used.
+                $this->previousCsvSupermodel['attributeIds'][$modelName] = array();
+                $smattrs = ORM::factory($modelName . '_attribute_value')->where(array('deleted' => 'f', $modelName . '_id' => $this->previousCsvSupermodel['id'][$modelName]))->find_all();
+                foreach ($smattrs as $smattr) {
+                  $this->previousCsvSupermodel['attributeIds'][$modelName][$smattr->__get($modelName . '_attribute_id')] = $smattr->id;
+                }
+              }
+              foreach ($this->previousCsvSupermodel['attributeIds'][$modelName] as $smattrId => $smattrValueId) {
+                if (isset($saveArray[$sm->attrs_field_prefix . ':' . $smattrId])) {
+                  $saveArray[$sm->attrs_field_prefix . ':' . $smattrId . ':' . $smattrValueId] = $saveArray[$sm->attrs_field_prefix . ':' . $smattrId];
+                  unset($saveArray[$sm->attrs_field_prefix . ':' . $smattrId]);
+                }
+                elseif (isset($saveArray[$sm->attrs_field_prefix . ':fk_' . $smattrId])) {
+                  $saveArray[$sm->attrs_field_prefix . ':fk_' . $smattrId . ':' . $smattrValueId] = $saveArray[$sm->attrs_field_prefix . ':fk_' . $smattrId];
+                  unset($saveArray[$sm->attrs_field_prefix . ':fk_' . $smattrId]);
+                }
+              }
+            }
+          }
+          else {
+            // First, remove the data from the submission array so we don't
+            // re-submit it. Although this leaves any attributes of the
+            // supermodel in the saveArray, they are ignored without the
+            // supermodel itself.
+            foreach ($saveArray as $field => $value) {
+              if (substr($field, 0, strlen($modelName) + 1) == "$modelName:") {
                 unset($saveArray[$field]);
+              }
+            }
+            // Now link the existing supermodel record to the save array.
+            $saveArray[$model->object_name . ':' . $modelDetails['fk']] = $this->previousCsvSupermodel['id'][$modelName];
+          }
+        }
+        else {
+          // This is a new supermodel (e.g. a new sample for the occurrences).
+          $updatedPreviousCsvSupermodelDetails[$modelName] = $hash;
+          unset($this->previousCsvSupermodel['attributeIds'][$modelName]);
+          // Check if there is lookup for existing data.
+          if (isset($metadata['mappings']) && isset($metadata['mappings']['lookupSelect' . $modelName]) && $metadata['mappings']['lookupSelect' . $modelName] !== '') {
+            $superModel = ORM::Factory($modelName);
+            self::mergeExistingRecordIds($modelName, $modelName, $sm->attrs_field_prefix, '', $metadata, FALSE,
+              $superModel, $saveArray, TRUE);
+          }
+          elseif ($modelName === 'term' && isset($metadata['mappings']) &&
+              isset($metadata['mappings']['lookupSelect' . $model->object_name]) &&
+              $metadata['mappings']['lookupSelect' . $model->object_name] !== '' &&
+              isset($saveArray['term:term'])) {
+            // Special case for termlist_terms, and their term supermodel: have
+            // to look up using complex query to get the link between the
+            // termlist and the term no attributes. No website join. The term
+            // and termlist_id have to be provided at this point.
+            $db = Database::instance();
+            // Have to use a db as this may have a join.
+            $existing = $db->query("SELECT tlt.term_id, tlt.meaning_id " .
+                      "FROM indicia.termlists_terms tlt " .
+                      "JOIN indicia.terms t ON t.id = tlt.term_id AND t.deleted = false " .
+                      "JOIN indicia.termlists tl ON tl.id = tlt.termlist_id AND tl.deleted = false " .
+                      "WHERE tlt.deleted = false " .
+                        "AND t.term='" . $saveArray['term:term'] . "' " .
+                        "AND t.language_id = " . $saveArray['term:language_id'] .
+                        (isset($saveArray['termlists_term:fk_termlist']) ?
+                          " AND tl.title = '" . $saveArray['termlists_term:fk_termlist'] . "'" :
+                          " AND tlt.termlist_id = " . $saveArray['termlists_term:termlist_id']))->result_array(FALSE);
+            if (count($existing) > 0) {
+              // If an previous record exists, we have to check for existing
+              // attributes.
+              // Note this only works properly on single value attributes.
+              $saveArray[$modelName . ':id'] = $existing[0]['term_id'];
+              $saveArray['meaning:id'] = $existing[0]['meaning_id'];
+              // No attributes for terms.
+            }
+          }
+          elseif ($modelName === 'taxon' && isset($metadata['mappings']) &&
+              isset($metadata['mappings']['lookupSelect' . $model->object_name]) &&
+              $metadata['mappings']['lookupSelect' . $model->object_name] !== '' &&
+              // Taxon info may not be provided if looking up existing record.
+              // In which case, skip the lookup.
+              !empty($saveArray['taxon:language_id']) &&
+              (!empty($saveArray['taxon:taxon']) || !empty($saveArray['taxon:external_key'])  || !empty($saveArray['taxon:search_code']))) {
+            // Same for taxa_taxon_lists, and their taxon supermodel: have to
+            // look up using complex query to get the link between the
+            // taxon_list and the taxon.
+            // This has attributes. No website join. The taxon and
+            // taxon_list_id have to be provided at this point.
+            $fields = json_decode($metadata['mappings']['lookupSelect' . $model->object_name]);
+            $fields = array_map(
+              function ($field) {
+                return $field->fieldName;
+              },
+              $fields
+            );
+            $db = Database::instance();
+            // Have to use a db as this may have a join.
+            $query = "SELECT ttl.taxon_id, ttl.taxon_meaning_id " .
+                  "FROM indicia.taxa_taxon_lists ttl " .
+                  "JOIN indicia.taxa t ON t.id = ttl.taxon_id AND t.deleted = false " .
+                  "JOIN indicia.taxon_lists tl ON tl.id = ttl.taxon_list_id AND tl.deleted = false " .
+                  "WHERE ttl.deleted = false " .
+                  "AND t.language_id = " . $saveArray['taxon:language_id'] .
+                  (isset($saveArray['taxa_taxon_list:fk_taxon_list']) ?
+                      " AND tl.title = '" . $saveArray['taxa_taxon_list:fk_taxon_list'] . "'" :
+                      " AND ttl.taxon_list_id = " . $saveArray['taxa_taxon_list:taxon_list_id']);
+            if (in_array('taxon:taxon', $fields) && isset($saveArray['taxon:taxon'])) {
+              $query .= "AND t.taxon='" . $saveArray['taxon:taxon'] . "' ";
+              $existing = $db->query($query)->result_array(FALSE);
+            }
+            elseif (in_array('taxon:external_key', $fields) && isset($saveArray['taxon:external_key'])) {
+              $query .= "AND t.external_key ='" . $saveArray['taxon:external_key'] . "' ";
+              $existing = $db->query($query)->result_array(FALSE);
+            }
+            elseif (in_array('taxon:search_code', $fields) && isset($saveArray['taxon:search_code'])) {
+              $query .= "AND t.search_code ='" . $saveArray['taxon:search_code'] . "' ";
+              $existing = $db->query($query)->result_array(FALSE);
+            }
+            else {
+              $existing = array();
+            }
+            if (count($existing) > 0) {
+              // If an previous record exists, we have to check for existing
+              // attributes.
+              // Note this only works properly on single value attributes.
+              $saveArray[$modelName . ':id'] = $existing[0]['taxon_id'];
+              $saveArray['taxon_meaning:id'] = $existing[0]['taxon_meaning_id'];
+              // TODO attributes.
             }
-            // now link the existing supermodel record to the save array
-            $saveArray[$model->object_name.':'.$modelDetails['fk']] = $this->previousCsvSupermodel['id'][$modelName];
           }
-        } else {
-          // this is a new supermodel (e.g. a new sample for the occurrences). So just save the details in case it is repeated
-          $updatedPreviousCsvSupermodelDetails[$modelName]=$hash;
         }
       }
     }
     return $updatedPreviousCsvSupermodelDetails;
   }
-  
+
   /**
-  * When saving a model with supermodels, we don't want to duplicate the supermodel record if all the details are the same across 2
-  * spreadsheet rows. So this method captures the ID of the supermodels that we have just posted, in case their details are replicated
-  * in the next record.
-  * Handles case where the submission has been flipped (associations), and supermodel has been made the main model.
-  */
-  private function captureSupermodelIds($model, $flipped=false) {
-  	if ($flipped) {
-  		// supermodel is now main model - just look for the ID field...
-  		$array = $model->as_array();
-  		$subStruct = $model->get_submission_structure();
-  		$this->previousCsvSupermodel['id'][$subStruct['model']] = $model->id;		
-  	} else if (isset($this->submissionStruct['superModels'])) {
+   * Find IDs associated with a supermodel (e.g. sample).
+   *
+   * When saving a model with supermodels, we don't want to duplicate the
+   * supermodel record if all the details are the same across 2 spreadsheet
+   * rows. So this method captures the ID of the supermodels that we have just
+   * posted, in case their details are replicated in the next record. Handles
+   * case where the submission has been flipped (associations), and supermodel
+   * has been made the main model.
+   */
+  private function captureSupermodelIds($model, $flipped = FALSE) {
+    if ($flipped) {
+      // Supermodel is now main model - just look for the ID field...
       $array = $model->as_array();
-      // loop through the supermodels
-      foreach($this->submissionStruct['superModels'] as $modelName=>$modelDetails) {
-        $id = $modelName . '_id';
-        // Expect that the fk field is called fkTable_id (e.g. if the super model is called sample, then
-        // the field should be sample_id). If it is not, then we revert to using ORM to find the ID, which 
-        // incurs a database hit.
-        $this->previousCsvSupermodel['id'][$modelName]=
-          isset($array[$id]) ? $array[$id] : $model->$modelName->id;
+      $subStruct = $model->get_submission_structure();
+      $this->previousCsvSupermodel['id'][$subStruct['model']] = $model->id;
+    }
+    else {
+      if (isset($this->submissionStruct['superModels'])) {
+        $array = $model->as_array();
+        // Loop through the supermodels.
+        foreach ($this->submissionStruct['superModels'] as $modelName => $modelDetails) {
+          $id = $modelName . '_id';
+          // Expect that the fk field is called fkTable_id (e.g. if the super
+          // model is called sample, then the field should be sample_id). If it
+          // is not, then we revert to using ORM to find the ID, which incurs a
+          // database hit. For this reason as well we don't get any attribute
+          // values now, but rather the first time they need to be used.
+          $this->previousCsvSupermodel['id'][$modelName] =
+            isset($array[$id]) ? $array[$id] : $model->$modelName->id;
+        }
       }
     }
   }
-  
+
+  private function createGuid() {
+    return sprintf('%04X%04X-%04X-%04X-%04X-%04X%04X%04X',
+       mt_rand(0, 65535), mt_rand(0, 65535), mt_rand(0, 65535), mt_rand(16384, 20479),
+       mt_rand(32768, 49151), mt_rand(0, 65535), mt_rand(0, 65535), mt_rand(0, 65535)
+    );
+  }
+
   /**
-   * Internal function that retrieves the metadata for a CSV upload. For AJAX requests, this comes 
-   * from a cached file. For normal requests, the mappings should be in the $_POST data.
+   * Internal function that retrieves the metadata for a CSV upload.
+   *
+   * For AJAX requests, this comes from a cached file. For normal requests, the
+   * mappings should be in the $_POST data.
    */
-  private function _get_metadata($csvTempFile) {
-    $metadataFile = DOCROOT . "upload/" . str_replace('.csv','-metadata.txt', $csvTempFile);
-    if (file_exists($metadataFile)) {      
+  private function getMetadata($csvTempFile) {
+    $metadataFile = DOCROOT . "upload/" . str_replace('.csv', '-metadata.txt', $csvTempFile);
+    if (file_exists($metadataFile)) {
       $metadataHandle = fopen($metadataFile, "r");
       $metadata = fgets($metadataHandle);
       fclose($metadataHandle);
-      return json_decode($metadata, true);
-    } else {
-      // no previous file, so create default new metadata      
-      return array('mappings'=>array(), 'settings'=>array(), 'errorCount'=>0);
+      return json_decode($metadata, TRUE);
+    }
+    else {
+      // No previous file, so create default new metadata.
+      return [
+        'mappings' => [],
+        'settings' => [],
+        'errorCount' => 0,
+        'guid' => $this->createGuid(),
+      ];
     }
   }
-  
+
   /**
-   * During a csv upload, this method is called to retrieve a resource handle to a file that can 
-   * contain errors during the upload. The file is created if required, and the headers from the 
-   * uploaded csv file (referred to by handle) are copied into the first row of the new error file
-   * along with a header for the problem description and row number.
-   * @param string $csvTempFile File name of the imported CSV file.
-   * @param resource $handle File handle
-   * @return resource The error file's handle.
+   * Retrieve the file handle of the error file.
+   *
+   * During a csv upload, this method is called to retrieve a resource handle
+   * to a file that can contain errors during the upload. The file is created
+   * if required, and the headers from the uploaded csv file (referred to by
+   * handle) are copied into the first row of the new error file along with a
+   * header for the problem description and row number.
+   *
+   * @param string $csvTempFile
+   *   File name of the imported CSV file.
+   * @param resource $handle
+   *   File handle.
+   * @param bool $supportsImportGuid
+   *   True if the model supports tracking imports by GUID, therefore the error
+   *   file needs to link the error row to its original GUID.
+   * @param int $existingProblemColIdx
+   *   Returns the column index that the current row's error message is in.
+   * @param int $existingProblemRowNoColIdx
+   *   Returns the column index that the current row's error source row number
+   *   is in.
+   * @param int $existingImportGuidColIdx
+   *   Returns the column index that the current row's import GUID is in.
+   *
+   * @return resource
+   *   The error file's handle.
    */
-  private function _get_error_file_handle($csvTempFile, $handle) {
-    // move the file to the beginning, so we can load the first row of headers.
+  private function getErrorFileHandle($csvTempFile,
+                                      $handle,
+                                      $supportsImportGuid,
+                                      &$existingProblemColIdx,
+                                      &$existingProblemRowNoColIdx,
+                                      &$existingImportGuidColIdx) {
+    // Move the file to the beginning, so we can load the first row of headers.
     fseek($handle, 0);
-    $errorFile = str_replace('.csv','-errors.csv',$csvTempFile);
+    $errorFile = str_replace('.csv', '-errors.csv', $csvTempFile);
     $needHeaders = !file_exists($errorFile);
     $errorHandle = fopen($errorFile, "a");
-    // skip the header row, but add it to the errors file with additional field for row number.
+    // Skip the header row, but add it to the errors file with additional field
+    // for row number unless already present.
     $headers = fgetcsv($handle, 1000, ",");
+    $existingImportGuidColIdx = FALSE;
     if ($needHeaders) {
-      $headers[] = 'Problem';
-      $headers[] = 'Row no.';
+      $existingProblemColIdx = array_search('Problem', $headers);
+      if ($existingProblemColIdx === FALSE) {
+        $headers[] = 'Problem';
+      }
+      $existingProblemRowNoColIdx = array_search('Row no.', $headers);
+      if ($existingProblemRowNoColIdx === FALSE) {
+        $headers[] = 'Row no.';
+      }
+      if ($supportsImportGuid) {
+        $existingImportGuidColIdx = array_search('Import ID', $headers);
+        if ($existingImportGuidColIdx === FALSE) {
+          // If not re-importing errors, store the file ID as an import guid in
+          // the errors, to link errors to their original import.
+          $headers[] = 'Import ID';
+        }
+      }
       fputcsv($errorHandle, $headers);
     }
     return $errorHandle;
   }
 
   /**
-   * Runs at the start of each batch of rows. Checks if the previous imported row defined a supermodel. If so, we'll load
-   * it from the Kohana cache. This allows us to determine if the new row can link to the same supermodel or not. An example
-   * would be linking several occurrences to the same sample.
-   * @param $cache
+   * Retrieves the supermodel (e.g. sample) associated with the last row.
+   *
+   * Runs at the start of each batch of rows. Checks if the previous imported
+   * row defined a supermodel. If so, we'll load it from the Kohana cache. This
+   * allows us to determine if the new row can link to the same supermodel or
+   * not. An example would be linking several occurrences to the same sample.
+   *
+   * @param object $cache
+   *   Cache object.
    */
-  private function getPreviousRowSupermodel($cache)
-  {
+  private function getPreviousRowSupermodel($cache) {
     $this->previousCsvSupermodel = $cache->get(basename($_GET['uploaded_csv']) . 'previousSupermodel');
     if (!$this->previousCsvSupermodel) {
       $this->previousCsvSupermodel = array(
-          'id' => array(),
-          'details' => array()
+        'id' => [],
+        'details' => [],
+        'attributeIds' => [],
       );
     }
   }
 
   /**
-   * Checks if there is a byte order marker at the beginning of the file (BOM). If so, sets this information in the $metadata.
-   * Rewinds the file to the beginning.
-   * @param $metadata
-   * @param $handle
-   * @return mixed
+   * Checks if there is a byte order marker at the beginning of the file (BOM).
+   *
+   * If so, sets this information in the $metadata. Rewinds the file to the
+   * beginning.
+   *
+   * @param array $metadata
+   *   Import metadata information.
+   * @param resource $handle
+   *   File handle.
    */
-  private function checkIfUtf8(&$metadata, $handle)
-  {
+  private function checkIfUtf8(array &$metadata, $handle) {
     if (!isset($metadata['isUtf8'])) {
       fseek($handle, 0);
-      $BOMCheck = fread($handle, 3);
-      // Flag if this file has a UTF8 BOM at the start
-      $metadata['isUtf8'] = $BOMCheck === chr(0xEF) . chr(0xBB) . chr(0xBF);
+      $bomCheck = fread($handle, 3);
+      // Flag if this file has a UTF8 BOM at the start.
+      $metadata['isUtf8'] = $bomCheck === chr(0xEF) . chr(0xBB) . chr(0xBF);
     }
   }
-}
\ No newline at end of file
+
+}

From 2f19aacd728dfffa5b33ff961a692e8d66bd8651 Mon Sep 17 00:00:00 2001
From: Andrew van Breda <support@avb-it.co.uk>
Date: Sat, 12 Oct 2019 20:06:03 +0100
Subject: [PATCH 2/6] Fixed auto-verifier. Essentially removed the mode that
 relied on occDelta which seemed to have stopped working (possibly due to
 changes in Indicia). Add further control options to the cache_occurrences
 mode so that the number of records and age of records the system processes
 can be controlled.

---
 application/config/version.php                |  4 +-
 .../config/auto_verify.php.example.php        | 10 +--
 modules/auto_verify/plugins/auto_verify.php   | 63 ++++++++++++-------
 3 files changed, 48 insertions(+), 29 deletions(-)

diff --git a/application/config/version.php b/application/config/version.php
index d37fdc2fb9..2609a52e36 100644
--- a/application/config/version.php
+++ b/application/config/version.php
@@ -29,14 +29,14 @@
  *
  * @var string
  */
-$config['version'] = '2.35.1';
+$config['version'] = '2.35.2';
 
 /**
  * Version release date.
  *
  * @var string
  */
-$config['release_date'] = '2019-10-03';
+$config['release_date'] = '2019-10-12';
 
 /**
  * Link to the code repository downloads page.
diff --git a/modules/auto_verify/config/auto_verify.php.example.php b/modules/auto_verify/config/auto_verify.php.example.php
index cfef14d2c8..0d0265dd0d 100644
--- a/modules/auto_verify/config/auto_verify.php.example.php
+++ b/modules/auto_verify/config/auto_verify.php.example.php
@@ -20,7 +20,9 @@
  * @link 	http://code.google.com/p/indicia/
  */
 
-$config['auto_accept_occurrences_with_null_id_difficulty']='false';
-//Do we process any data from cache_occurrences, or just look in occDelta for newly changed data.
-//You would probably only need this mode as a one-off run.
-$config['process_old_data']='false';
\ No newline at end of file
+$config['auto_accept_occurrences_with_null_id_difficulty']='true';
+// Note that -1 (or less) is unlimited, 0 processes nothing
+$config['max_num_records_to_process_at_once']=0;
+$config['oldest_record_created_date_to_process']='01/01/2000';
+// 1 (or less) is effectively unlimited
+$config['oldest_occurrence_id_to_process']=1;
\ No newline at end of file
diff --git a/modules/auto_verify/plugins/auto_verify.php b/modules/auto_verify/plugins/auto_verify.php
index dc762932d7..22bb991401 100644
--- a/modules/auto_verify/plugins/auto_verify.php
+++ b/modules/auto_verify/plugins/auto_verify.php
@@ -32,35 +32,59 @@
  *   Database object.
  *
  * @todo Config for $autoVerifyNullIdDiff should be a boolean, not a string
- * @todo Config for $processOldData should be a boolean, not a string
  */
 function auto_verify_scheduled_task($last_run_date, $db) {
   $autoVerifyNullIdDiff = kohana::config('auto_verify.auto_accept_occurrences_with_null_id_difficulty', FALSE, FALSE);
-  $processOldData = kohana::config('auto_verify.process_old_data', FALSE, FALSE);
+
+  $oldestRecordCreatedDateToProcess = kohana::config('auto_verify.oldest_record_created_date_to_process', FALSE, FALSE);
+  $oldestOccurrenceIdToProcess = kohana::config('auto_verify.oldest_occurrence_id_to_process', FALSE, FALSE);
+  $maxRecordsNumber = kohana::config('auto_verify.max_num_records_to_process_at_once', FALSE, FALSE);
+
   if (empty($autoVerifyNullIdDiff)) {
     echo "Unable to automatically verify occurrences when the auto_accept_occurrences_with_null_id_difficulty entry is empty.<br>";
     kohana::log('error', 'Unable to automatically verify occurrences when the auto_accept_occurrences_with_null_id_difficulty configuration entry is empty.');
     return FALSE;
   }
-  // Do we need to consider old data (probably as a one-off run) or just newly
-  // changed data.
-  $subQuery = "
-    SELECT delta.id";
-  if (!empty($processOldData)&&$processOldData === 'true') {
-    $subQuery .= "
-      FROM cache_occurrences_functional delta";
+
+  if (empty($oldestRecordCreatedDateToProcess)) {
+    echo "Unable to automatically verify occurrences when the oldest_record_created_date_to_process entry is empty.<br>";
+    kohana::log('error', 'Unable to automatically verify occurrences when the oldest_record_created_date_to_process configuration entry is empty.');
+    return FALSE;
   }
-  else {
-    $subQuery .= "
-      FROM occdelta delta";
+
+  if (empty($oldestOccurrenceIdToProcess)) {
+    echo "Unable to automatically verify occurrences when the oldest_occurrence_id_to_process entry is empty.<br>";
+    kohana::log('error', 'Unable to automatically verify occurrences when the oldest_occurrence_id_to_process configuration entry is empty.');
+    return FALSE;
+  }
+
+  if (empty($maxRecordsNumber)) {
+    echo "Unable to automatically verify occurrences when the max_num_records_to_process_at_once entry is empty.<br>";
+    kohana::log('error', 'Unable to automatically verify occurrences when the max_num_records_to_process_at_once configuration entry is empty.');
+    return FALSE;
   }
-  $subQuery .= "
+  
+  $subQuery = "
+    SELECT distinct delta.id
+    FROM cache_occurrences_functional delta
     JOIN surveys s on s.id = delta.survey_id AND s.auto_accept=true AND s.deleted=false
     LEFT JOIN cache_taxon_searchterms cts on cts.taxon_meaning_id = delta.taxon_meaning_id
     WHERE delta.data_cleaner_result=true
     AND delta.record_status='C' AND delta.record_substatus IS NULL
+        AND delta.created_on >= TO_TIMESTAMP('$oldestRecordCreatedDateToProcess', 'DD/MM/YYYY')
         AND (($autoVerifyNullIdDiff=false AND cts.identification_difficulty IS NOT NULL AND cts.identification_difficulty<=s.auto_accept_max_difficulty)
         OR ($autoVerifyNullIdDiff=true AND (cts.identification_difficulty IS NULL OR cts.identification_difficulty<=s.auto_accept_max_difficulty)))";
+
+  if (isset($oldestOccurrenceIdToProcess) && $oldestOccurrenceIdToProcess > -1) {
+    $subQuery .= "
+      AND delta.id >= $oldestOccurrenceIdToProcess";
+  }
+
+  if (isset($maxRecordsNumber) && $maxRecordsNumber > -1) {
+    $subQuery .= "
+      order by delta.id desc limit $maxRecordsNumber";
+  }
+
   $verificationTime = gmdate("Y\/m\/d H:i:s");
   //Need to update cache_occurrences_*, as these tables have already been built at this point.
   $query = "
@@ -77,7 +101,9 @@ function auto_verify_scheduled_task($last_run_date, $db) {
     release_status='R',
     verified_by_id=1,
     verified_on='$verificationTime',
-    record_decision_source='M'
+    record_decision_source='M',
+    updated_on = now(),
+    updated_by_id = 1
     WHERE id in
     ($subQuery);
 
@@ -110,12 +136,3 @@ function auto_verify_scheduled_task($last_run_date, $db) {
   else
     echo 'No occurrence records have been auto-verified.</br>';
 }
-
-/*
- * Tell the system that we need the occdelta table to find out which occurrences have been created/changed recently.
- */
-function auto_verify_metadata() {
-  return array(
-    'requires_occurrences_delta'=>TRUE
-  );
-}
\ No newline at end of file

From f14db96b1852d874ed97390efc49a1fcb548f68e Mon Sep 17 00:00:00 2001
From: Richard Burkmar <rburkmar@ceh.ac.uk>
Date: Wed, 16 Oct 2019 17:44:16 +0100
Subject: [PATCH 3/6] Added Warehouse features for Survey auto-verify filtering
 So far haven't actually done anything with the auto filtering but just
 provided the means users of the Warehouse to specify taxa for a survey
 against which occurrences will be checked to see if they qualify for
 auto-verification.

---
 application/config/version.php                |  4 +--
 application/controllers/survey.php            | 32 ++++++++++++++++++-
 application/models/survey.php                 | 16 ++++++++++
 application/views/survey/survey_edit.php      | 30 +++++++++++++++++
 client_helpers                                |  2 +-
 .../201910071330_new_survey_fields.sql        | 24 ++++++++++++++
 6 files changed, 104 insertions(+), 4 deletions(-)
 create mode 100644 modules/auto_verify/db/version_2_36_0/201910071330_new_survey_fields.sql

diff --git a/application/config/version.php b/application/config/version.php
index 2609a52e36..ae233dc848 100644
--- a/application/config/version.php
+++ b/application/config/version.php
@@ -29,14 +29,14 @@
  *
  * @var string
  */
-$config['version'] = '2.35.2';
+$config['version'] = '2.36.0';
 
 /**
  * Version release date.
  *
  * @var string
  */
-$config['release_date'] = '2019-10-12';
+$config['release_date'] = '2019-10-14';
 
 /**
  * Link to the code repository downloads page.
diff --git a/application/controllers/survey.php b/application/controllers/survey.php
index 5a5e01da16..7bebc72075 100644
--- a/application/controllers/survey.php
+++ b/application/controllers/survey.php
@@ -65,9 +65,39 @@ protected function prepareOtherViewData(array $values) {
     $arr = array();
     foreach ($websites->where('deleted','false')->orderby('title','asc')->find_all() as $website)
       $arr[$website->id] = $website->title;
-    return array(
+
+    $otherData = array(
       'websites' => $arr
     );
+
+    $otherData['taxon_restrictions'] = []; 
+    $masterListId = warehouse::getMasterTaxonListId();
+    if ($masterListId) {
+    
+      $tm_ids = $this->db
+          ->select('s.auto_accept_taxa_filters')
+          ->from('surveys AS s')
+          ->where([
+            's.id' => $values['survey:id'],
+          ])
+          ->get()->result_array(FALSE);    
+      $valsCSV=trim($tm_ids[0]['auto_accept_taxa_filters'], "{}");
+
+      if (!empty($valsCSV)) {
+        foreach (explode(",", $valsCSV) as $tm_id){ 
+          $ttl_id = $this->db
+            ->select('cttl.id as taxa_taxon_list_id')
+            ->from('cache_taxa_taxon_lists AS cttl')
+            ->where([
+              'cttl.taxon_meaning_id' => $tm_id,
+              'cttl.preferred' => true
+            ])
+            ->get()->result_array(FALSE);
+            array_push($otherData['taxon_restrictions'], $ttl_id[0]);
+        }
+      }
+    }
+    return $otherData;
   }
 
   /**
diff --git a/application/models/survey.php b/application/models/survey.php
index 28c5fb106b..05f86d7057 100644
--- a/application/models/survey.php
+++ b/application/models/survey.php
@@ -54,9 +54,25 @@ public function validate(Validation $array, $save = FALSE) {
       'owner_id',
       'auto_accept',
       'auto_accept_max_difficulty',
+      'auto_accept_taxa_filters',
       'core_validation_rules',
     );
     return parent::validate($array, $save);
   }
 
+  protected function preSubmit() {
+    if (!empty($_POST['has-taxon-restriction-data'])) {
+      $ttlIds = [];
+      foreach ($_POST as $key => $value) {
+        if (substr($key, -8) === ':present' && $value !== '0') {
+          $taxonMeaningId = $this->db
+              ->query('SELECT taxon_meaning_id FROM cache_taxa_taxon_lists WHERE id=' . $value)
+              ->current();
+          $ttlIds[] = intVal($taxonMeaningId->taxon_meaning_id);
+        }  
+      }
+      $this->submission['fields']['auto_accept_taxa_filters']=array('value' => $ttlIds);
+    }
+    return parent::presubmit();
+  }
 }
\ No newline at end of file
diff --git a/application/views/survey/survey_edit.php b/application/views/survey/survey_edit.php
index 8fc0b5dad6..76beb024bc 100644
--- a/application/views/survey/survey_edit.php
+++ b/application/views/survey/survey_edit.php
@@ -138,6 +138,36 @@
         'helpText' => 'If Auto Accept is set, then this is the minimum identification difficulty that will be auto verified.',
       ));
     }
+    if (array_key_exists('survey:auto_accept_taxa_filters', $values)) {
+      $masterListId = warehouse::getMasterTaxonListId();
+      echo <<<HTML
+<div class="alert alert-info">
+ <p>You can use the taxon selection control below to 
+ select one or more higher level taxa to which recorded taxa must belong in order to
+ quality for auto-verification. Leave the list empty for no filtering. You must also
+ check the Auto Accept box for these filters to take effect.</p>
+</div>
+<label>Taxon restrictions</label>
+<input type="hidden" name="has-taxon-restriction-data" value="1" />
+HTML;
+      require_once 'client_helpers/prebuilt_forms/includes/language_utils.php';
+      $speciesChecklistOptions = [
+        'lookupListId' => $masterListId,
+        'rowInclusionCheck' => 'alwaysRemovable',
+        'extraParams' => $readAuth,
+        'survey_id' => $values['survey:id'],
+        'language' => iform_lang_iso_639_2(kohana::config('indicia.default_lang')),
+      ];
+      if (!empty($other_data['taxon_restrictions'])) {
+        $speciesChecklistOptions['listId'] = $masterListId;
+        $speciesChecklistOptions['preloadTaxa'] = [];
+        foreach ($other_data['taxon_restrictions'] as $restriction) {
+          $speciesChecklistOptions['preloadTaxa'][] = $restriction['taxa_taxon_list_id'];
+        }
+      }
+      echo data_entry_helper::species_checklist($speciesChecklistOptions);
+      echo '<br/>';
+    }
     ?>
   </fieldset>
   <?php if (array_key_exists('attributes', $values) && count($values['attributes']) > 0) : ?>
diff --git a/client_helpers b/client_helpers
index f6e2e10951..ad24321a59 160000
--- a/client_helpers
+++ b/client_helpers
@@ -1 +1 @@
-Subproject commit f6e2e109510ed70c7cbf80d2026fcb71d9d1a228
+Subproject commit ad24321a5910452969229fc150951f80411ab756
diff --git a/modules/auto_verify/db/version_2_36_0/201910071330_new_survey_fields.sql b/modules/auto_verify/db/version_2_36_0/201910071330_new_survey_fields.sql
new file mode 100644
index 0000000000..4851a878de
--- /dev/null
+++ b/modules/auto_verify/db/version_2_36_0/201910071330_new_survey_fields.sql
@@ -0,0 +1,24 @@
+CREATE OR REPLACE function f_add_new_survey_fields (OUT success bool)
+    LANGUAGE plpgsql AS
+$func$
+BEGIN 
+  
+success := TRUE;
+
+BEGIN
+	ALTER TABLE surveys ADD COLUMN auto_accept_taxa_filters INT[];
+
+EXCEPTION
+    WHEN duplicate_column THEN 
+      RAISE NOTICE 'column exists.';
+      success := FALSE;
+END;
+
+END
+$func$;
+
+SELECT f_add_new_survey_fields();
+
+DROP FUNCTION f_add_new_survey_fields();
+
+COMMENT ON COLUMN surveys.auto_accept_taxa_filters IS 'List of taxon meaning IDs to filter records qualifying for auto-verification';

From 8990a1179cdc82c3719996ec56cc774ce59120e2 Mon Sep 17 00:00:00 2001
From: Richard Burkmar <rburkmar@ceh.ac.uk>
Date: Thu, 17 Oct 2019 19:24:30 +0100
Subject: [PATCH 4/6] Reworking of Kohana database queries

---
 application/controllers/survey.php | 32 +++++++++++++++---------------
 application/models/survey.php      | 17 +++++++++++-----
 2 files changed, 28 insertions(+), 21 deletions(-)

diff --git a/application/controllers/survey.php b/application/controllers/survey.php
index 7bebc72075..ef2805619e 100644
--- a/application/controllers/survey.php
+++ b/application/controllers/survey.php
@@ -74,27 +74,27 @@ protected function prepareOtherViewData(array $values) {
     $masterListId = warehouse::getMasterTaxonListId();
     if ($masterListId) {
     
-      $tm_ids = $this->db
+      $tmIdVals = $this->db
           ->select('s.auto_accept_taxa_filters')
           ->from('surveys AS s')
           ->where([
             's.id' => $values['survey:id'],
           ])
-          ->get()->result_array(FALSE);    
-      $valsCSV=trim($tm_ids[0]['auto_accept_taxa_filters'], "{}");
-
-      if (!empty($valsCSV)) {
-        foreach (explode(",", $valsCSV) as $tm_id){ 
-          $ttl_id = $this->db
-            ->select('cttl.id as taxa_taxon_list_id')
-            ->from('cache_taxa_taxon_lists AS cttl')
-            ->where([
-              'cttl.taxon_meaning_id' => $tm_id,
-              'cttl.preferred' => true
-            ])
-            ->get()->result_array(FALSE);
-            array_push($otherData['taxon_restrictions'], $ttl_id[0]);
-        }
+          ->get()->result();
+
+      $valsCSV=trim($tmIdVals[0]->auto_accept_taxa_filters, "{}");
+      
+      $ttlIds = $this->db
+          ->select('id')
+          ->from('cache_taxa_taxon_lists as cttl')
+          ->in('taxon_meaning_id', explode(",", $valsCSV))
+          ->where([
+            'cttl.preferred' => true
+          ])
+          ->get()->result();
+
+      foreach ($ttlIds as $ttlId) {
+        array_push($otherData['taxon_restrictions'], array("taxa_taxon_list_id" => $ttlId->id));
       }
     }
     return $otherData;
diff --git a/application/models/survey.php b/application/models/survey.php
index 05f86d7057..065318a707 100644
--- a/application/models/survey.php
+++ b/application/models/survey.php
@@ -63,15 +63,22 @@ public function validate(Validation $array, $save = FALSE) {
   protected function preSubmit() {
     if (!empty($_POST['has-taxon-restriction-data'])) {
       $ttlIds = [];
+      $tmIds = [];
       foreach ($_POST as $key => $value) {
         if (substr($key, -8) === ':present' && $value !== '0') {
-          $taxonMeaningId = $this->db
-              ->query('SELECT taxon_meaning_id FROM cache_taxa_taxon_lists WHERE id=' . $value)
-              ->current();
-          $ttlIds[] = intVal($taxonMeaningId->taxon_meaning_id);
+          $ttlIds[] = $value;
         }  
       }
-      $this->submission['fields']['auto_accept_taxa_filters']=array('value' => $ttlIds);
+      $tmIdRecs = $this->db
+          ->select('id, taxon_meaning_id')
+          ->from('cache_taxa_taxon_lists')
+          ->in('id', $ttlIds)
+          ->get()->result();
+
+      foreach ($tmIdRecs as $tmIdRec) {
+        $tmIds[] = intVal($tmIdRec->taxon_meaning_id);
+      }
+      $this->submission['fields']['auto_accept_taxa_filters']=array('value' => $tmIds);
     }
     return parent::presubmit();
   }

From 17b604a332599c6cfa00779d3e311c8837810d0c Mon Sep 17 00:00:00 2001
From: Andrew van Breda <support@avb-it.co.uk>
Date: Mon, 21 Oct 2019 12:30:30 +0100
Subject: [PATCH 5/6] Associated taxa now ordered by name. I think this was
 previously intended by didn't work.

---
 .../taxon_associations/get_taxon_associations_as_string.xml    | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/modules/taxon_associations/reports/library/taxon_associations/get_taxon_associations_as_string.xml b/modules/taxon_associations/reports/library/taxon_associations/get_taxon_associations_as_string.xml
index 8d96b2c3a3..85ccdc2e6a 100644
--- a/modules/taxon_associations/reports/library/taxon_associations/get_taxon_associations_as_string.xml
+++ b/modules/taxon_associations/reports/library/taxon_associations/get_taxon_associations_as_string.xml
@@ -8,12 +8,11 @@
         JOIN indicia.cache_taxa_taxon_lists cttl_hosts on cttl_hosts.taxon_meaning_id = ta.to_taxon_meaning_id and cttl_hosts.preferred=true
       WHERE cttl.id = #taxa_taxon_list_id# OR cttl.taxon_meaning_id = #taxon_meaning_id# 
     </query>
-    <order_bys>order by cttl.taxon asc</order_bys>
   <params>
     <param name="taxa_taxon_list_id" display='Taxa taxon list ID' description='Taxa taxon list ID to return data for.' datatype="int" emptyvalue='0'/>
     <param name="taxon_meaning_id" display='Taxon meaining ID' description='Taxa meaning ID to return data for.' datatype="int" emptyvalue='0'/>
   </params>
   <columns>
-    <column name='associated_taxa' display='Associated taxa' sql="STRING_AGG(cttl_hosts.taxon,', ')" datatype='text'/>
+    <column name='associated_taxa' display='Associated taxa' sql="STRING_AGG(cttl_hosts.taxon,', ' order by cttl_hosts.taxon asc)" datatype='text'/>
   </columns>
 </report>
\ No newline at end of file

From 7fddaf0260152cb38f243727b4cebd65e07c6d40 Mon Sep 17 00:00:00 2001
From: Richard Burkmar <rburkmar@ceh.ac.uk>
Date: Thu, 24 Oct 2019 15:00:09 +0100
Subject: [PATCH 6/6] Change to data cleaner phenology rule process Also
 changed message in email to refer to notifications.

---
 .../plugins/data_cleaner_period_within_year.php             | 6 +++---
 modules/notification_emails/plugins/notification_emails.php | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/modules/data_cleaner_period_within_year/plugins/data_cleaner_period_within_year.php b/modules/data_cleaner_period_within_year/plugins/data_cleaner_period_within_year.php
index f35775b81b..e323bb23e8 100644
--- a/modules/data_cleaner_period_within_year/plugins/data_cleaner_period_within_year.php
+++ b/modules/data_cleaner_period_within_year/plugins/data_cleaner_period_within_year.php
@@ -139,12 +139,12 @@ function data_cleaner_period_within_year_data_cleaner_rules() {
 and (vr.stages is null or vr.stages @> string_to_array(co.stage, ''))
 SQL;
   // The groupBy allows us to count the verified records at a similar time of
-  // year and only create messages if less than 2.
+  // year and only create messages if less than 6.
   $groupBy = <<<SQL
 group by co.id, co.date_start, co.taxa_taxon_list_external_key, co.stage,
   co.verification_checks_enabled, co.record_status, vr.error_message, vr.stages
--- at least 2 similar records
-having count(o2.id) < 2
+-- at least 6 similar records
+having count(o2.id) < 6
 SQL;
 
   return array(
diff --git a/modules/notification_emails/plugins/notification_emails.php b/modules/notification_emails/plugins/notification_emails.php
index 98e7d8c60e..89f551a627 100644
--- a/modules/notification_emails/plugins/notification_emails.php
+++ b/modules/notification_emails/plugins/notification_emails.php
@@ -518,7 +518,7 @@ function send_out_user_email(
   }
   //AVB note: The warehouse_url param is now redundant and can be removed next time testing is carried out on this page.
   $emailContent .= '<br><a href="' . $subscriptionSettingsPageUrl . '?user_id=' . $userId . '&warehouse_url=' .
-    url::base() . '">Click here to update your subscription settings.</a><br/><br/>';
+    url::base() . '">Click here to control which notifications you receive.</a><br/><br/>';
   $cc = NULL;
   $swift = email::connect();
   // Use a transaction to allow us to prevent the email sending and marking of