workbench

#!/usr/bin/env python3

# Usage: ./workbench --config config.yml --check
# Usage: ./workbench --config config.yml

import os
import sys
import copy
import json
import csv
import logging
import datetime
import argparse
import collections
import subprocess
import requests_cache
from progress_bar import InitBar
from ruamel.yaml import YAML
from workbench_utils import *
import workbench_fields
from WorkbenchConfig import WorkbenchConfig


def create():
    """Create new nodes via POST, and add media if there are any."""
    message = '"Create" task started using config file ' + args.config + "."
    print(message)
    logging.info(message)

    if config["ignore_existing_parent_ids"] is True:
        workbench_execution_start_time = "{:%Y-%m-%d %H:%M:%S}".format(
            datetime.datetime.now()
        )

        # If this is a secondary task, use the primary tasks execution start time.
        if os.environ.get("ISLANDORA_WORKBENCH_SECONDARY_TASKS") is not None:
            if os.path.abspath(args.config) in json.loads(
                os.environ["ISLANDORA_WORKBENCH_SECONDARY_TASKS"]
            ):
                # Ensure that query_csv_id_to_node_id_map_for_parents is True.
                config["query_csv_id_to_node_id_map_for_parents"] = True
                workbench_execution_start_time = os.environ.get(
                    "ISLANDORA_WORKBENCH_PRIMARY_TASK_EXECUTION_START_TIME"
                )
    else:
        message = "'ignore_existing_parent_ids' is set to false, parent IDs from previous Workbench sessions will be used."
        print(message)
        logging.info(message)

    path_to_rollback_csv_file = get_rollback_csv_filepath(config)
    prep_rollback_csv(config, path_to_rollback_csv_file)
    logging.info("Writing rollback CSV to " + path_to_rollback_csv_file)

    prepare_csv_id_to_node_id_map(config)

    if config["csv_headers"] == "labels":
        fieldname_map_cache_path = os.path.join(
            config["temp_dir"], f"node-{config['content_type']}-labels.fieldname_map"
        )
        if os.path.exists(fieldname_map_cache_path):
            os.remove(fieldname_map_cache_path)

    if config["log_term_creation"] is False:
        logging.info(
            "'log_term_creation' configuration setting is False. Creation of new taxonomy terms will not be logged."
        )

    if config["secondary_tasks"] is not None:
        if os.path.abspath(args.config) not in json.loads(
            os.environ["ISLANDORA_WORKBENCH_SECONDARY_TASKS"]
        ):
            prepare_csv_id_to_node_id_map(config)
            # Register the start time of the primary task so we can access it in secondary tasks.
            os.environ["ISLANDORA_WORKBENCH_PRIMARY_TASK_EXECUTION_START_TIME"] = (
                "{:%Y-%m-%d %H:%M:%S}".format(datetime.datetime.now())
            )

    node_ids = dict()
    csv_path = os.path.join(config["input_dir"], config["input_csv"])
    field_definitions = get_field_definitions(config, "node")
    csv_data = get_csv_data(config)
    csv_column_headers = csv_data.fieldnames

    if (
        "parent_id" in csv_column_headers
        and config["query_csv_id_to_node_id_map_for_parents"] is False
    ):
        message = "Only node IDs for parents created during this session will be used (not using the CSV ID to node ID map)."
        print(message)
        logging.warning(message)

    if (
        config["query_csv_id_to_node_id_map_for_parents"] is True
        and config["ignore_duplicate_parent_ids"] is True
    ):
        message = "Ignoring duplicate parent IDs in the CSV ID to node ID map; only the most recent entries will be used."
        print(message)
        logging.warning(message)

    node_endpoint = config["host"] + "/node?_format=json"

    if config["nodes_only"] is True:
        message = '"nodes_only" option in effect. No media will be created.'
        print(message)
        logging.info(message)

    row_count = 0
    for row in csv_data:
        if (
            "node_exists_verification_view_endpoint" in config
            and get_node_exists_verification_view_endpoint(config) is not False
        ):
            candidate_node_id = verify_node_exists_by_key(config, copy.copy(row))
            if candidate_node_id is not False:
                message = f"Item in row {row[config['id_field']]} appears to already be in Drupal ({config['host']}/node/{candidate_node_id}), skipping it."
                logging.warning(message)
                print(message)
                continue

        # Delete expired items from request_cache before processing a row.
        if config["enable_http_cache"] is True:
            requests_cache.delete(expired=True)

        # Create a copy of the current item's row to pass to create_media().
        row_for_media = copy.deepcopy(row)
        if config["paged_content_from_directories"] is True:
            # Create a copy of the current item's row to pass to the
            # create_children_from_directory function.
            row_as_parent = copy.deepcopy(row)

        id_field = row[config["id_field"]]
        unpopulated_member_of_log_message = None

        # Add required fields.
        row["title"] = truncate_csv_value(
            "title", row[config["id_field"]], field_definitions["title"], row["title"]
        )
        node = {
            "type": [{"target_id": config["content_type"], "target_type": "node_type"}],
            "title": [{"value": row["title"]}],
        }

        # Some optional node base fields.
        if "uid" in csv_column_headers:
            if len(row["uid"]) > 0:
                node["uid"] = [{"target_id": row["uid"]}]
            # Reset it to empty so it doesn't throw a key error in the code
            # in the "Assemble Drupal field structures..." section below.
            row["uid"] = ""

        if "created" in csv_column_headers:
            if len(row["created"]) > 0:
                node["created"] = [{"value": row["created"]}]
            # Reset it to empty so it doesn't throw a key error in the code
            # in the "Assemble Drupal field structures..." section below.
            row["created"] = ""

        if "langcode" in csv_column_headers:
            if len(row["langcode"]) > 0:
                node["langcode"] = [{"value": row["langcode"]}]
            # Reset it to empty so it doesn't throw a key error in the code
            # in the "Assemble Drupal field structures..." section below.
            row["langcode"] = ""

        if "published" in csv_column_headers:
            if len(row["published"]) > 0:
                node["status"] = [{"value": row["published"]}]
            # Reset it to empty so it doesn't throw a key error in the code
            # in the "Assemble Drupal field structures..." section below.
            row["published"] = ""

        if "promote" in csv_column_headers:
            if len(row["promote"]) > 0:
                node["promote"] = [{"value": row["promote"]}]
            # Reset it to empty so it doesn't throw a key error in the code
            # in the "Assemble Drupal field structures..." section below.
            row["promote"] = ""

        # print("DEBUG field_member_of", row)
        if "field_member_of" in row.keys() and (
            len(row["field_member_of"]) > 0
            and value_is_numeric(row["field_member_of"]) is False
        ):
            field_member_of_value_for_message = copy.copy(row["field_member_of"])
            row["field_member_of"] = get_nid_from_url_alias(
                config, row["field_member_of"]
            )
            if row["field_member_of"] is False:
                # @TODO: Use this version of the message in --check.
                message = f'Node identified in "field_member_of" ({field_member_of_value_for_message}) in CSV row with ID "{id_field}" cannot be found or accessed.'
                print("Warning: " + message)
                logging.warning(message)

        # We want to collect the node IDs of items that are parents in-session
        # to accommodate users who have 'query_csv_id_to_node_id_map_for_parents: false'
        # in their config files. Note: this will not work with secondary tasks, if
        # you're using a secondary task you need to use the CSV ID to node ID map.
        # If a node with an ID that matches the current item's 'parent_id'
        # value has just been created, make the item a child of the node.
        if "parent_id" in row.keys() and row["parent_id"] in node_ids:
            row["field_member_of"] = node_ids[row["parent_id"]]

        # Since all nodes, both ones just created and also ones created in previous runs of
        # Workbench, may have entries in the node ID map database, we always query it.
        if (
            config["query_csv_id_to_node_id_map_for_parents"] is True
            and config["csv_id_to_node_id_map_path"] is not False
            and "parent_id" in row
            and row["parent_id"] is not None
        ):
            parent_node_ids_from_id_map = []
            current_parent_node_id = ""
            if config["ignore_duplicate_parent_ids"] is True:
                query = "select node_id from csv_id_to_node_id_map where csv_id = ? order by timestamp desc limit 1"
            else:
                query = "select node_id from csv_id_to_node_id_map where csv_id = ?"
            parent_in_id_map_result = sqlite_manager(
                config,
                operation="select",
                query=query,
                values=(row["parent_id"],),
                db_file_path=config["csv_id_to_node_id_map_path"],
            )
            parents_from_id_map = []
            for parent_in_id_map_row in parent_in_id_map_result:
                parent_node_exists = ping_node(
                    config, parent_in_id_map_row["node_id"], warn=False
                )
                if parent_node_exists is True:
                    parent_node_ids_from_id_map.append(parent_in_id_map_row["node_id"])
            if len(parent_node_ids_from_id_map) == 1:
                row["field_member_of"] = parent_node_ids_from_id_map[0]
                current_parent_node_id = parent_node_ids_from_id_map[0]
            if len(parent_node_ids_from_id_map) > 1:
                message = f'Review your Workbench log for problems with the "parent_id" value in row with ID "{id_field}" in your input CSV data.'
                unpopulated_member_of_log_message = (
                    f"Row ID \"{id_field}\" has a \"parent_id\" value (\"{row['parent_id']}\") that corresponds to more than one node ID in the CSV ID to node ID "
                    + f"map (corresponding node IDs in the map are {', '.join(parent_node_ids_from_id_map)}). Workbench cannot reliably determine the child node's (CHILDNODEID) parent "
                    + 'node ID and will not populate node CHILDNODEID\'s "field_member_of".'
                )
                print("Warning: " + message)

        # Add custom (non-required) CSV fields.
        entity_fields = get_entity_fields(config, "node", config["content_type"])
        # Only add config['id_field'] to required_fields if it is not a node field.
        required_fields = ["file", "title"]
        if config["id_field"] not in entity_fields:
            required_fields.append(config["id_field"])
        custom_fields = list(set(csv_column_headers) - set(required_fields))
        additional_files_entries = get_additional_files_config(config)
        for custom_field in custom_fields:
            # Skip processing field if empty.
            if len(str(row[custom_field]).strip()) == 0:
                continue

            if len(additional_files_entries) > 0:
                if custom_field in additional_files_entries.keys():
                    continue

            # This field can exist in the CSV to create parent/child
            # relationships and is not a Drupal field.
            if custom_field == "parent_id":
                continue

            # 'langcode' is a core Drupal field, but is not considered a "base field".
            if custom_field == "langcode":
                continue

            # 'image_alt_text' is a reserved CSV field.
            if custom_field == "image_alt_text":
                continue

            # 'url_alias' is a reserved CSV field.
            if custom_field == "url_alias":
                continue

            # 'media_use_tid' is a reserved CSV field.
            if custom_field == "media_use_tid":
                continue

            # 'checksum' is a reserved CSV field.
            if custom_field == "checksum":
                continue

            # 'directory' is a reserved CSV field.
            if custom_field == "directory":
                continue

            # We skip CSV columns whose headers use the 'media:video:field_foo' media track convention.
            if custom_field.startswith("media:"):
                continue

            if "preprocessors" in config and custom_field in config["preprocessors"]:
                row[custom_field] = preprocess_csv(config, row, custom_field)

            # Assemble Drupal field structures from CSV data. If new field types are added to
            # workbench_fields.py, they need to be registered in the following if/elif/else block.

            # Entity reference fields (taxonomy_term and node).
            if field_definitions[custom_field]["field_type"] == "entity_reference":
                entity_reference_field = workbench_fields.EntityReferenceField()
                node = entity_reference_field.create(
                    config, field_definitions, node, row, custom_field
                )

            # Entity reference revision fields (paragraphs).
            elif (
                field_definitions[custom_field]["field_type"]
                == "entity_reference_revisions"
            ):
                entity_reference_revisions_field = (
                    workbench_fields.EntityReferenceRevisionsField()
                )
                node = entity_reference_revisions_field.create(
                    config, field_definitions, node, row, custom_field
                )

            # Typed relation fields.
            elif field_definitions[custom_field]["field_type"] == "typed_relation":
                typed_relation_field = workbench_fields.TypedRelationField()
                node = typed_relation_field.create(
                    config, field_definitions, node, row, custom_field
                )

            # Geolocation fields.
            elif field_definitions[custom_field]["field_type"] == "geolocation":
                geolocation_field = workbench_fields.GeolocationField()
                node = geolocation_field.create(
                    config, field_definitions, node, row, custom_field
                )

            # Link fields.
            elif field_definitions[custom_field]["field_type"] == "link":
                link_field = workbench_fields.LinkField()
                node = link_field.create(
                    config, field_definitions, node, row, custom_field
                )

            # Authority Link fields.
            elif field_definitions[custom_field]["field_type"] == "authority_link":
                link_field = workbench_fields.AuthorityLinkField()
                node = link_field.create(
                    config, field_definitions, node, row, custom_field
                )

            # For non-entity reference and non-typed relation fields (text, integer, boolean etc.).
            else:
                simple_field = workbench_fields.SimpleField()
                node = simple_field.create(
                    config, field_definitions, node, row, custom_field
                )

            # If the user has configured Workbench to not query the CSV ID to node ID map,
            # use the in-session node_ids list to track and assign parent node IDs.
            if config["query_csv_id_to_node_id_map_for_parents"] is False:
                if "parent_id" in row and row["parent_id"] in node_ids.keys():
                    node["field_member_of"] = [
                        {
                            "target_id": node_ids[row["parent_id"]],
                            "target_type": "node_type",
                        }
                    ]

        node_headers = {"Content-Type": "application/json"}
        node_endpoint = "/node?_format=json"
        node_response = issue_request(
            config, "POST", node_endpoint, node_headers, node, None
        )
        if node_response.status_code == 201:
            returned_node = json.loads(node_response.text)
            node_id = returned_node["nid"][0]["value"]
            node_uri = config["host"] + "/node/" + str(node_id)

            node_ids[id_field] = node_id

            if (
                "parent_id" in row
                and row["parent_id"] is not None
                and config["query_csv_id_to_node_id_map_for_parents"] is True
                and config["csv_id_to_node_id_map_path"] is not False
            ):
                populate_csv_id_to_node_id_map(
                    config, row["parent_id"], current_parent_node_id, id_field, node_id
                )
            elif (
                config["query_csv_id_to_node_id_map_for_parents"] is False
                and config["csv_id_to_node_id_map_path"] is not False
                and "parent_id" in row
                and row["parent_id"] in node_ids.keys()
            ):
                populate_csv_id_to_node_id_map(
                    config,
                    row["parent_id"],
                    node_ids[row["parent_id"]],
                    id_field,
                    node_id,
                )
            else:
                populate_csv_id_to_node_id_map(config, "", "", id_field, node_id)

            write_rollback_node_id(config, node_id, path_to_rollback_csv_file)

            if unpopulated_member_of_log_message is not None:
                unpopulated_member_of_log_message = (
                    unpopulated_member_of_log_message.replace(
                        "CHILDNODEID", str(node_id)
                    )
                )
                logging.warning(unpopulated_member_of_log_message)

            if config["progress_bar"] is False:
                print(
                    'Node for "'
                    + row["title"]
                    + '" (record '
                    + id_field
                    + ") created at "
                    + node_uri
                    + "."
                )
            logging.info(
                'Node for "%s" (record %s) created at %s.',
                row["title"],
                id_field,
                node_uri,
            )
            if "output_csv" in config.keys():
                # We pass a copy of the row into this function because Python.
                write_to_output_csv(
                    config, id_field, node_response.text, copy.deepcopy(row)
                )
        else:
            message = "Node for CSV record " + id_field + " not created"
            print("ERROR: " + message + ".")
            logging.error(
                message
                + f", HTTP response code was {node_response.status_code}, response body was {node_response.content}"
            )
            logging.error(
                'JSON request body used in previous POST to "%s" was %s.',
                node_endpoint,
                node,
            )
            continue

        # Execute node-specific post-create scripts, if any are configured.
        if "node_post_create" in config and len(config["node_post_create"]) > 0:
            for command in config["node_post_create"]:
                (
                    post_task_output,
                    post_task_return_code,
                ) = execute_entity_post_task_script(
                    command,
                    args.config,
                    node_response.status_code,
                    node_response.text,
                )
                if post_task_return_code == 0:
                    logging.info(
                        "Post node create script " + command + " executed successfully."
                    )
                else:
                    logging.error("Post node create script " + command + " failed.")

        if config["progress_bar"] is True:
            row_count += 1
            row_position = get_percentage(row_count, num_csv_records)
            pbar(row_position)

        # If 'url_alias' is in the CSV, create the alias.
        if "url_alias" in row and len(row["url_alias"]) > 0:
            create_url_alias(config, node_id, row["url_alias"])

        write_rollback_config(config, path_to_rollback_csv_file)

        # If the file named in 'file' can't be found.
        if "file" in row and len(row["file"].strip()) > 0:
            if (
                config["nodes_only"] is False
                and config["paged_content_from_directories"] is False
                and check_file_exists(config, row["file"].strip()) is False
            ):
                message = (
                    "No media for "
                    + node_uri
                    + ' created since the file named the input CSV\'s "file" column (row with ID "'
                    + id_field
                    + '") could not be found.'
                )
                if config["allow_missing_files"] is False:
                    logging.error(message)
                    sys.exit("Error: " + message)
                else:
                    if config["progress_bar"] is False:
                        logging.warning(message)
                continue
        else:
            message = (
                "No media for "
                + node_uri
                + ' created since its "file" column in the input CSV (row with ID "'
                + id_field
                + '") is empty.'
            )
            logging.warning(message)

        if node_response.status_code == 201:
            allowed_media_response_codes = [201, 204]
            if (
                config["nodes_only"] is False
                and "file" in row
                and len(row["file"].strip()) != 0
            ):
                media_response_status_code = create_media(
                    config, row["file"], "file", node_id, row_for_media
                )
                if media_response_status_code in allowed_media_response_codes:
                    if config["progress_bar"] is False:
                        print("+ Media for " + row["file"] + " created.")
                    logging.info("Media for %s created.", row["file"])
                else:
                    if config["progress_bar"] is False:
                        print(
                            "- ERROR: Media for "
                            + row["file"]
                            + " not created. See log for more information."
                        )
                    logging.error(
                        "Media for %s not created (HTTP respone code %s).",
                        row["file"],
                        media_response_status_code,
                    )
            if config["nodes_only"] is False and "additional_files" in config:
                additional_files_config = get_additional_files_config(config)
                if len(additional_files_config) > 0:
                    for (
                        additional_file_field,
                        additional_file_media_use_tid,
                    ) in additional_files_config.items():
                        # If there is no additional media file, move on to the next "additional_files" column.
                        if additional_file_field not in row:
                            continue

                        if (
                            additional_file_field in row
                            and len(row[additional_file_field].strip()) == 0
                        ):
                            if config["progress_bar"] is False:
                                message = (
                                    f'Media for "additional_files" CSV column "{additional_file_field}" in row with ID "{row[config["id_field"]]}" '
                                    + f'(node URL "{node_uri}") not created'
                                )
                                print(
                                    "- " + message + ". See log for more information."
                                )
                            logging.error(message + " because CSV field is empty.")
                            continue
                        filename = row[additional_file_field].strip()
                        file_exists = check_file_exists(config, filename)
                        if file_exists is False:
                            if config["progress_bar"] is False:
                                message = f'Media for file "{filename}" named in field "{additional_file_field}" of CSV row with ID "{row[config["id_field"]]}" not created'
                                print(
                                    "- " + message + ". See log for more information."
                                )
                            logging.error(message + " because file does not exist.")
                            if config["allow_missing_files"] is False:
                                sys.exit()
                            else:
                                continue

                        media_response_status_code = create_media(
                            config,
                            row[additional_file_field],
                            additional_file_field,
                            node_id,
                            row_for_media,
                            additional_file_media_use_tid,
                        )
                        if media_response_status_code in allowed_media_response_codes:
                            if config["progress_bar"] is False:
                                print(
                                    "+ Media for "
                                    + row[additional_file_field]
                                    + " created."
                                )
                            logging.info(
                                "Media for %s created.", row[additional_file_field]
                            )
                        else:
                            if config["progress_bar"] is False:
                                print(
                                    "- Media for "
                                    + row[additional_file_field]
                                    + " not created. See log for more information."
                                )
                            logging.error(
                                "Media for %s not created (HTTP respone code %s).",
                                row[additional_file_field],
                                media_response_status_code,
                            )

            if (
                config["nodes_only"] is False
                and "file" in row
                and len(row["file"]) == 0
                and "additional_files" not in config
                and config["paged_content_from_directories"] is False
            ):
                if config["progress_bar"] is False:
                    print("+ No files specified in CSV for row " + str(id_field) + ".")
                logging.info(
                    "No files specified for row %s, so no media created.", str(id_field)
                )

            if config["paged_content_from_directories"] is True:
                # Console output and logging are done in the create_children_from_directory() function.
                create_children_from_directory(config, row_as_parent, node_id)


def update():
    """Update nodes via PATCH. Note that PATCHing replaces the target field,
    so if we are adding an additional value to a multivalued field, we need
    to include the existing value(s) in our PATCH. The field classes take
    care of preserving existing values in 'append' updates.
    """
    message = (
        '"Update" ('
        + config["update_mode"]
        + ") task started using config file "
        + args.config
        + "."
    )
    print(message)
    logging.info(message)

    if config["csv_headers"] == "labels":
        fieldname_map_cache_path = os.path.join(
            config["temp_dir"], f"node-{config['content_type']}-labels.fieldname_map"
        )
        if os.path.exists(fieldname_map_cache_path):
            os.remove(fieldname_map_cache_path)

    field_definitions = get_field_definitions(config, "node")
    csv_data = get_csv_data(config)
    csv_column_headers = csv_data.fieldnames

    if config["log_term_creation"] is False:
        logging.info(
            "'log_term_creation' configuration setting is False. Creation of new taxonomy terms will not be logged."
        )

    row_count = 0
    for row in csv_data:
        # WIP on #785: print("DEBUG from within update()", row)
        # Delete expired items from request_cache before processing a row.
        if config["enable_http_cache"] is True:
            requests_cache.delete(expired=True)

        node_id_to_ping = copy.copy(row["node_id"])
        if not value_is_numeric(node_id_to_ping):
            node_id_to_ping = get_nid_from_url_alias(config, node_id_to_ping)
        node_ping_result = ping_node(config, node_id_to_ping, "GET", True)
        if node_id_to_ping is False or node_ping_result is False:
            if config["progress_bar"] is False:
                print(
                    "Node "
                    + str(node_id_to_ping)
                    + " not found or not accessible, skipping update."
                )
            logging.warning(
                "Node "
                + str(node_id_to_ping)
                + " not found or not accessible, skipping update."
            )
            continue

        # Add the target_id field.
        node = {"type": [{"target_id": config["content_type"]}]}

        node_field_values = get_node_field_values(config, row["node_id"])

        # Some optional node base fields.
        if "uid" in csv_column_headers:
            if len(row["uid"]) > 0:
                node["uid"] = [{"target_id": row["uid"]}]

        if "langcode" in csv_column_headers:
            if len(row["langcode"]) > 0:
                node["langcode"] = [{"value": row["langcode"]}]

        if "created" in csv_column_headers:
            if len(row["created"]) > 0:
                node["created"] = [{"value": row["created"]}]

        if "published" in csv_column_headers:
            if len(row["published"]) > 0:
                node["status"] = [{"value": row["published"]}]

        if "promote" in csv_column_headers:
            if len(row["promote"]) > 0:
                node["promote"] = [{"value": row["promote"]}]

        # Add custom (non-required) fields.
        required_fields = ["node_id"]
        custom_fields = list(set(csv_column_headers) - set(required_fields))
        for custom_field in custom_fields:
            node_has_all_fields = True
            # If node doesn't have the field, log that fact and skip updating the field.
            reserved_fields = ["published", "url_alias"]
            if (
                custom_field not in json.loads(node_ping_result)
                and custom_field not in reserved_fields
            ):
                message = f'Node {row["node_id"]} does not have a "{custom_field}" field, skipping update.'
                print(f"ERROR: " + message)
                logging.warning(message)
                node_has_all_fields = False
                break

            # Skip updating field if CSV field is empty (other than for 'delete' update mode).
            # For 'delete' update mode it doesn't matter if there's anything in the CSV field,
            # but users expect to be able to supply empty values for this operation.
            if len(row[custom_field].strip()) == 0:
                if config["update_mode"] != "delete":
                    continue

            # 'url_alias' is a reserved CSV field.
            if custom_field == "url_alias":
                continue

            # 'image_alt_text' is a reserved CSV field.
            # Issue to add alt text in update task is https://github.com/mjordan/islandora_workbench/issues/166.
            if custom_field == "image_alt_text":
                continue

            # 'langcode' is a core Drupal field, but is not considered a base field.
            if custom_field == "langcode":
                continue

            # 'created' is a base field.
            if custom_field == "created":
                continue

            # 'published' is a reserved CSV field.
            if custom_field == "published":
                continue

            # 'uid' is a base field.
            if custom_field == "uid":
                continue

            # 'promote' is a base field.
            if custom_field == "promote":
                continue

            if "preprocessors" in config and custom_field in config["preprocessors"]:
                row[custom_field] = preprocess_csv(config, row, custom_field)

            if custom_field == "revision_log":
                continue

            # Assemble Drupal field structures from CSV data. If new field types are added to
            # workbench_fields.py, they need to be registered in the following if/elif/else block.

            # Entity reference fields (taxonomy term and node).
            if field_definitions[custom_field]["field_type"] == "entity_reference":
                entity_reference_field = workbench_fields.EntityReferenceField()
                node = entity_reference_field.update(
                    config,
                    field_definitions,
                    node,
                    row,
                    custom_field,
                    node_field_values[custom_field],
                )

            # Entity reference revisions fields (paragraphs).
            elif (
                field_definitions[custom_field]["field_type"]
                == "entity_reference_revisions"
            ):
                entity_reference_revisions_field = (
                    workbench_fields.EntityReferenceRevisionsField()
                )
                node = entity_reference_revisions_field.update(
                    config,
                    field_definitions,
                    node,
                    row,
                    custom_field,
                    node_field_values[custom_field],
                )

            # Typed relation fields (currently, only taxonomy term).
            elif field_definitions[custom_field]["field_type"] == "typed_relation":
                typed_relation_field = workbench_fields.TypedRelationField()
                node = typed_relation_field.update(
                    config,
                    field_definitions,
                    node,
                    row,
                    custom_field,
                    node_field_values[custom_field],
                )

            # Geolocation fields.
            elif field_definitions[custom_field]["field_type"] == "geolocation":
                geolocation_field = workbench_fields.GeolocationField()
                node = geolocation_field.update(
                    config,
                    field_definitions,
                    node,
                    row,
                    custom_field,
                    node_field_values[custom_field],
                )

            # Link fields.
            elif field_definitions[custom_field]["field_type"] == "link":
                link_field = workbench_fields.LinkField()
                node = link_field.update(
                    config,
                    field_definitions,
                    node,
                    row,
                    custom_field,
                    node_field_values[custom_field],
                )

            # Authority Link fields.
            elif field_definitions[custom_field]["field_type"] == "authority_link":
                link_field = workbench_fields.AuthorityLinkField()
                node = link_field.update(
                    config,
                    field_definitions,
                    node,
                    row,
                    custom_field,
                    node_field_values[custom_field],
                )

            # For non-entity reference and non-typed relation fields (text, etc.).
            else:
                simple_field = workbench_fields.SimpleField()
                node = simple_field.update(
                    config,
                    field_definitions,
                    node,
                    row,
                    custom_field,
                    node_field_values[custom_field],
                )

        if node_has_all_fields is True:
            if value_is_numeric(row["node_id"]) is False:
                row["node_id"] = get_nid_from_url_alias(config, row["node_id"])
            node_endpoint = (
                config["host"] + "/node/" + str(row["node_id"]) + "?_format=json"
            )
            node_headers = {"Content-Type": "application/json"}
            # Make a GET request to the content type
            content_type_endpoint = f"{config['host']}/entity/node_type/{config['content_type']}?_format=json"
            content_type_endpoint_response = issue_request(
                config, "GET", content_type_endpoint
            )
            # See if revisions are enabled for the content type, if so create a new revision log message.
            if content_type_endpoint_response.status_code == 200:
                revisions_enabled = json.loads(content_type_endpoint_response.text)[
                    "new_revision"
                ]
                if revisions_enabled:
                    # Add revision information to node.
                    revision_log_message = (
                        row["revision_log"]
                        if "revision_log" in row and row["revision_log"] != ""
                        else "Updated by Islandora Workbench."
                    )
                    revision_json = {"revision_log": [{"value": revision_log_message}]}
                    node.update(revision_json)

            node_response = issue_request(
                config, "PATCH", node_endpoint, node_headers, node
            )

            if node_response.status_code == 200:
                if config["progress_bar"] is False:
                    print(
                        "Node "
                        + config["host"]
                        + "/node/"
                        + str(row["node_id"])
                        + " updated."
                    )
                logging.info(
                    "Node %s updated.", config["host"] + "/node/" + str(row["node_id"])
                )
            else:
                if config["progress_bar"] is False:
                    print(
                        "Error: Node "
                        + config["host"]
                        + "/node/"
                        + str(row["node_id"])
                        + " not updated. See log for more detail."
                    )
                logging.error(
                    "Node %s not updated (server response code was '%s').",
                    config["host"] + "/node/" + row["node_id"],
                    node_response.status_code,
                )

            # Execute node-specific post-create scripts, if any are configured.
            if "node_post_update" in config and len(config["node_post_update"]) > 0:
                for command in config["node_post_update"]:
                    (
                        post_task_output,
                        post_task_return_code,
                    ) = execute_entity_post_task_script(
                        command,
                        args.config,
                        node_response.status_code,
                        node_response.text,
                    )
                    if post_task_return_code == 0:
                        logging.info(
                            "Post node update script "
                            + command
                            + " executed successfully."
                        )
                    else:
                        logging.error("Post node update script " + command + " failed.")

            if config["progress_bar"] is True:
                row_count += 1
                row_position = get_percentage(row_count, num_csv_records)
                pbar(row_position)

            # If 'url_alias' is in the CSV, create the alias.
            if "url_alias" in row and len(row["url_alias"]) > 0:
                create_url_alias(config, row["node_id"], row["url_alias"])


def delete():
    """Delete nodes."""
    message = '"Delete" task started using config file ' + args.config + "."
    print(message)
    logging.info(message)

    csv_data = get_csv_data(config)

    row_count = 0
    for row in csv_data:
        # Delete expired items from request_cache before processing a row.
        if config["enable_http_cache"] is True:
            requests_cache.delete(expired=True)

        if not value_is_numeric(row["node_id"]):
            row["node_id"] = get_nid_from_url_alias(config, row["node_id"])
        if not ping_node(config, row["node_id"]):
            if config["progress_bar"] is False:
                message = f"Node {row['node_id']} not found or not accessible, skipping delete."
                print(message)
            logging.warning(message)
            continue

        # Delete the node's media first.
        if config["delete_media_with_nodes"] is True:
            try:
                media_endpoint = (
                    config["host"]
                    + "/node/"
                    + str(row["node_id"])
                    + "/media?_format=json"
                )
                media_response = issue_request(config, "GET", media_endpoint)
                media_response_body = json.loads(media_response.text)
                media_messages = []
                for media in media_response_body:
                    if "mid" in media:
                        media_id = media["mid"][0]["value"]
                        media_delete_status_code = remove_media_and_file(
                            config, media_id
                        )
                        if media_delete_status_code == 204:
                            media_messages.append(
                                "+ Media "
                                + config["host"]
                                + "/media/"
                                + str(media_id)
                                + " deleted."
                            )
            except Exception as e:
                media_messages.append(
                    "- ERROR: Media "
                    + config["host"]
                    + "/media/"
                    + str(media_id)
                    + " not deleted. See log for more detail."
                )
                logging.error(
                    "Media "
                    + config["host"]
                    + "/media/"
                    + str(media_id)
                    + " not deleted: "
                    + e
                )

        node_endpoint = (
            config["host"] + "/node/" + str(row["node_id"]) + "?_format=json"
        )
        node_response = issue_request(config, "DELETE", node_endpoint)
        if node_response.status_code == 204:
            if config["progress_bar"] is False:
                print(
                    "Node "
                    + config["host"]
                    + "/node/"
                    + str(row["node_id"])
                    + " deleted."
                )
            logging.info(
                "Node %s deleted.", config["host"] + "/node/" + str(row["node_id"])
            )

        if (
            config["delete_media_with_nodes"] is True
            and config["progress_bar"] is False
        ):
            if len(media_messages):
                for media_message in media_messages:
                    print(media_message)

        if config["progress_bar"] is True:
            row_count += 1
            row_position = get_percentage(row_count, num_csv_records)
            pbar(row_position)


def add_media():
    """Add media to existing nodes."""
    message = '"Add media" task started using config file ' + args.config + "."
    print(message)
    logging.info(message)

    csv_data = get_csv_data(config)

    row_count = 0
    for row in csv_data:
        # Delete expired items from request_cache before processing a row.
        if config["enable_http_cache"] is True:
            requests_cache.delete(expired=True)

        if not value_is_numeric(row["node_id"]):
            row["node_id"] = get_nid_from_url_alias(config, row["node_id"])
        if not ping_node(config, row["node_id"]):
            print(
                "Node "
                + row["node_id"]
                + " not found or not accessible, skipping adding media."
            )
            continue

        allowed_media_response_codes = [201, 204]

        node_json_url = (
            config["host"] + "/node/" + str(row["node_id"]) + "?_format=json"
        )
        node_uri = config["host"] + "/node/" + str(row["node_id"])
        node_response = issue_request(config, "HEAD", node_json_url)

        if "media_use_tid" in row:
            media_use_tid_value = row["media_use_tid"]
        else:
            # Get media use TID from config within create_media().
            media_use_tid_value = None

        if node_response.status_code == 200:
            if len(row["file"].strip()) == 0:
                message = (
                    "Media for node "
                    + row["node_id"]
                    + ' not created since CSV column "file" is empty.'
                )
                logging.warning(message)
                print("Warning: " + message)
            else:
                if check_file_exists(config, row["file"]) is False:
                    message = (
                        'File "'
                        + row["file"]
                        + '" identified in CSV "file" column for node ID '
                        + row["node_id"]
                        + " not found."
                    )
                    logging.error(message)
                    if config["allow_missing_files"] is False:
                        sys.exit("Error: " + message)
                else:
                    media_response_status_code = create_media(
                        config,
                        row["file"],
                        "file",
                        row["node_id"],
                        row,
                        media_use_tid_value,
                    )
                    if media_response_status_code in allowed_media_response_codes:
                        if config["progress_bar"] is False:
                            print(
                                'Media for "'
                                + row["file"]
                                + '" created and added to '
                                + node_uri
                            )
                        logging.info(
                            'Media for "%s" created and added to %s.',
                            row["file"],
                            node_uri,
                        )
                    else:
                        if config["progress_bar"] is False:
                            print(
                                "Error: Media for "
                                + row["file"]
                                + " not created. See log for more information."
                            )
                        logging.error(
                            'Media for "%s" not created (HTTP respone code %s).',
                            row["file"],
                            media_response_status_code,
                        )

            # There are additional CSV columns naming files.
            if "additional_files" in config:
                additional_files_config = get_additional_files_config(config)
                if len(additional_files_config) > 0:
                    for (
                        additional_file_field,
                        additional_file_media_use_tid,
                    ) in additional_files_config.items():
                        if len(row[additional_file_field].strip()) == 0:
                            if config["progress_bar"] is False:
                                print(
                                    'Warning: Media for node "'
                                    + row["node_id"]
                                    + '" not created since CSV column "'
                                    + additional_file_field
                                    + '" is empty.'
                                )
                            logging.warning(
                                'Media for node %s not created since CSV column "'
                                + additional_file_field
                                + '" is empty.',
                                row["node_id"],
                            )
                        elif (
                            check_file_exists(config, row[additional_file_field])
                            is False
                        ):
                            message = (
                                'Additional file "'
                                + row[additional_file_field]
                                + '" identified in CSV "'
                                + additional_file_field
                                + '" column for node ID '
                                + row["node_id"]
                                + " not found."
                            )
                            logging.error(message)
                            if config["allow_missing_files"] is False:
                                sys.exit("Error: " + message)
                            else:
                                continue
                        else:
                            media_response_status_code = create_media(
                                config,
                                row[additional_file_field],
                                additional_file_field,
                                row["node_id"],
                                row,
                                additional_file_media_use_tid,
                            )
                            if (
                                media_response_status_code
                                in allowed_media_response_codes
                            ):
                                logging.info(
                                    "Media for %s created and added to %s.",
                                    row[additional_file_field],
                                    node_uri,
                                )
                                if config["progress_bar"] is False:
                                    print(
                                        "Media for "
                                        + row[additional_file_field]
                                        + " created and added to "
                                        + node_uri
                                        + "."
                                    )
                            else:
                                logging.error(
                                    "Media for %s not created (HTTP response code %s).",
                                    row[additional_file_field],
                                    media_response_status_code,
                                )
                                if config["progress_bar"] is False:
                                    print(
                                        "ERROR: Media for "
                                        + row[additional_file_field]
                                        + " not created. See log for more information."
                                    )
        else:
            if config["progress_bar"] is False:
                print(
                    "ERROR: Node at "
                    + node_uri
                    + " does not exist or is not accessible."
                )
            logging.error(
                "Node at %s does not exist or is not accessible (HTTP response code %s)",
                node_uri,
                node_response.status_code,
            )

        if config["progress_bar"] is True:
            row_count += 1
            row_position = get_percentage(row_count, num_csv_records)
            pbar(row_position)


def update_media() -> None:
    """Update media from media IDs in the input CSV."""
    from typing import Optional

    # ========================================================= Helper functions =========================================================

    def get_media_type(
        media_id: str, get_media_response: requests.Response
    ) -> Optional[str]:
        """Get the media type of a media entity.

        Parameters:
            - media_id: A valid media ID.
            - get_media_response_body: The response body from a GET request to the media entity's endpoint.

        Returns:
            The media type of the media entity (e.g., 'image'), None if it could not be found.
        """
        try:
            return get_media_response.json()["bundle"][0]["target_id"]
        except Exception as e:
            logging.error('Unable to get media type for media ID "%s": %s', media_id, e)

    def get_media_parent_node_id(
        get_media_response_body: dict, media_csv_row: dict
    ) -> Optional[str]:
        """Get the parent node ID of the media entity.

        Parameters:
            - get_media_response_body: The response body from a GET request to the media entity's endpoint.
            - media_csv_row: The CSV row containing the media entity's field names and values.

        Returns:
            The parent node's ID if it corresponds to a valid node, otherwise None.

        NOTE: If node_id is specified in the CSV row, that value will be returned. Otherwise, the first node ID in the list of nodes the media entity is attached to will be returned.
        """
        if (
            "node_id" in media_csv_row
        ):  # If the CSV row contains a node ID, it takes precedence
            if media_csv_row["node_id"]:  # If the CSV row is not blank
                return media_csv_row["node_id"]
        if not get_media_response_body[
            "field_media_of"
        ]:  # If the media entity is not attached to any node
            logging.error(
                "Media ID %s is not attached to any node, which is a requirement for updating media files.",
                media_id,
            )
            return None
        try:
            return get_media_response_body["field_media_of"][0][
                "target_id"
            ]  # Return the first node ID in the list of nodes the media entity is attached to
        except Exception as e:
            logging.error(
                "Unable to get parent node ID for media ID %s: %s", media_id, e
            )
            return None

    def extract_media_id(config: dict, media_csv_row: dict) -> Optional[str]:
        """Extract the media entity's ID from the CSV row.

        Parameters:
            - config: The global configuration object.
            - media_csv_row: The CSV row containing the media entity's field names and values.

        Returns:
            - The media entity's ID if it could be extracted from the CSV row and is valid, otherwise None.
        """
        if "media_id" not in media_csv_row:  # Media ID column is missing
            logging.error("Media ID column missing in CSV file.")
            return None

        if not media_csv_row["media_id"]:  # Media ID column is present but empty
            logging.error("Row with empty media_id column detected in CSV file.")
            return None

        if not value_is_numeric(
            media_csv_row["media_id"]
        ):  # If media ID is not numeric, assume it is a media URL alias
            media_id = get_mid_from_media_url_alias(
                config, media_csv_row["media_id"]
            )  # Note that this function returns False if the media URL alias does not exist
            if media_id is False:  # Media URL alias does not exist
                logging.error(
                    "Media URL alias %s does not exist.", media_csv_row["media_id"]
                )
                return None
            else:
                return str(media_id)

        else:  # If media ID is numeric, use it as is, if it is a valid media ID
            if (
                ping_media(config, media_csv_row["media_id"]) is not True
            ):  # Invalid media ID
                logging.error("Media ID %s does not exist.", media_csv_row["media_id"])
                return None
            else:
                return media_csv_row[
                    "media_id"
                ]  # If media ID exists, use it as is (since this is a string)

    def delete_media_file(
        config: dict, media_id: str, get_media_response_body: dict
    ) -> bool:
        """Delete file attached to the media entity.

        Parameters:
            - config: The global configuration object.
            - media_id: A valid media entity ID.
            - get_media_response_body: The response body from a GET request to the media entity's endpoint.

        Returns:
            True if the file was successfully deleted or if there was no file attached to this media, False otherwise.
        """
        # Inspect the JSON response to get the file ID
        for file_field_name in file_fields:
            if file_field_name in get_media_response_body:
                try:
                    file_to_delete = str(
                        get_media_response_body[file_field_name][0]["target_id"]
                    )
                except Exception as e:
                    logging.warning(
                        'Unable to get file ID for media "%s" (%s).', media_id, e
                    )
                    return True
                break

        if file_to_delete:
            # Now we delete the file
            file_endpoint = (
                config["host"] + "/entity/file/" + file_to_delete + "?_format=json"
            )
            file_response = issue_request(config, "DELETE", file_endpoint)
            if file_response.status_code == 204:
                logging.info(
                    "File %s (from media %s) deleted.", file_to_delete, media_id
                )
                return True
            else:
                logging.error(
                    "File %s (from media %s) not deleted (HTTP response code %s).",
                    file_to_delete,
                    file_endpoint,
                    file_response.status_code,
                )
                return False

    def delete_media_track_files(
        config: dict, media_id: str, media_type: str, get_media_response_body: dict
    ) -> bool:
        """Delete the track file file attached to the media entity.

        Parameters:
            - config: The global configuration object.
            - media_id: A valid media entity ID.
            - media_type: The media entity's type.
            - get_media_response_body: The response body from a GET request to the media entity's endpoint.

        Returns:
            True if the track file was successfully deleted or if there were no track files in the first place, False otherwise.
        """
        # Inspect the JSON response to get the file ID
        if config["media_track_file_fields"][media_type] in get_media_response_body:
            for track_file in get_media_response_body[
                config["media_track_file_fields"][media_type]
            ]:
                try:
                    file_to_delete = str(track_file["target_id"])
                except (
                    Exception
                ) as e:  # There is a track file attached to this media, but we can't get its ID
                    logging.warning(
                        'Unable to get track file ID for a track file attached to media ID "%s" (%s).',
                        media_id,
                        e,
                    )
                    return False

                if config["standalone_media_url"] is True:
                    file_endpoint = (
                        config["host"]
                        + "/entity/file/"
                        + file_to_delete
                        + "?_format=json"
                    )
                else:
                    file_endpoint = (
                        config["host"]
                        + "/entity/file/"
                        + file_to_delete
                        + "/edit?_format=json"
                    )

                file_response = issue_request(config, "DELETE", file_endpoint)
                if file_response.status_code == 204:
                    logging.info(
                        "Track File %s (from media %s) deleted.",
                        file_to_delete,
                        media_id,
                    )
                else:
                    logging.error(
                        "Track File %s (from media %s) not deleted (HTTP response code %s).",
                        file_to_delete,
                        media_id,
                        file_response.status_code,
                    )
                    return False
            return True
        else:
            logging.warning(
                "Unable to find track files for media ID %s. Proceeding and assuming there were no track files to begin with.",
                media_id,
            )
            return True

    def attach_file_to_media(config: dict, media_type: str, file_id: str) -> dict:
        """Return the JSON object for a PATCH request required to attach the file to the media entity.

        Parameters:
            - config: The global configuration object.
            - media_type: The media entity's type (e.g., 'image').
            - file_id: A valid file entity ID.

        Returns:
            The JSON request for a PATCH request required to attach the file to the media entity.
        """
        media_field = config["media_type_file_fields"][
            media_type
        ]  # Get the name of the field that corresponds to the media type (e.g. 'field_media_image' for media type 'image')
        return {
            media_field: [
                {
                    "target_id": file_id,
                    "target_type": "file",
                }
            ]
        }

    def attach_track_files_to_media(
        config: dict,
        media_type: str,
        track_label_list: str,
        track_type_list: str,
        track_language_list: str,
        file_id_list: str,
    ) -> dict:
        """Return the JSON object for a PATCH request required to attach the track files and their information to a media entity.

        Parameters:
            - config: The global configuration object.
            - media_type: The media entity's type (e.g., 'image').
            - track_label_list: A list of track labels.
            - track_type_list: A list of track types.
            - track_language_list: A list of track languages.
            - file_id_list: A list of file IDs.

        Returns:
            The JSON request for a PATCH request required to attach the track files and their information to a media entity.
        """
        # We use list comprehension to create the JSON object that contains the track files and their information.
        return {
            config["media_track_file_fields"][media_type]: [
                {
                    "target_id": file_id_list[i],
                    "label": track_label_list[i],
                    "kind": track_type_list[i],
                    "srclang": track_language_list[i],
                    "target_type": "file",
                }
                for i in range(len(file_id_list))
            ]
        }

    def patch_media_use_terms_update_media(media_use_tids):
        """Return the JSON object for a PATCH request required to patch the media entity's media use terms.
           Note: workbench_utils has its own patch_media_use_terms().

        Parameters:
            - media_use_tids: A list of taxonomy term IDs to patch to the media entity's field_media_use.

        Returns:
            The JSON request for a PATCH request required to patch the media entity's media use terms.
        """
        return {
            "field_media_use": [
                {"target_id": media_use_tid, "target_type": "taxonomy_term"}
                for media_use_tid in media_use_tids
            ]
        }

    def patch_media_status(status: bool) -> dict:
        """Return the JSON object for a PATCH request required to patch the media entity's "Published" status.

        Parameters:
            - status: True if the media entity should be published, False otherwise.

        Returns:
            The JSON request for a PATCH request required to patch the media entity's "Published" status, or None if the status is not a boolean.
        """
        return {"status": [{"value": status}]}

    def is_revisions_enabled(config: dict, media_type: str):
        """Return True if the media type has revisions enabled, False otherwise.

        Parameters:
            - config: The global configuration object.
            - media_type: The media entity's type (e.g., 'image').

        Returns:
            True if the media type has revisions enabled, False otherwise.
        """
        media_type_endpoint = (
            f"{config['host']}/entity/media_type/{media_type}?_format=json"
        )
        response = issue_request(config, "GET", media_type_endpoint)
        if response.status_code == 200:
            return json.loads(response.text)["new_revision"]
        else:
            return False

    def patch_media_revision_log_message(revision_log_message) -> dict:
        """Return the JSON object for a PATCH request required to patch the media entity's revision log message.

        Parameters:
            - revision_log_message: The revision log message.

        Returns:
            The JSON request for a PATCH request required to patch the media entity's revision log message.
        """
        return {"revision_log_message": [{"value": revision_log_message}]}

    # ========================================================= Main Logic =========================================================

    # TODO: Updating the media file and the name simultaneously does not work. The media takes the name of the new file and not the specified name.

    message = (
        '"Update media" ('
        + config["update_mode"]
        + ") task started using config file "
        + args.config
        + "."
    )
    print(message)
    logging.info(message)

    csv_data = get_csv_data(config)
    csv_column_headers = csv_data.fieldnames

    if (
        config["id_field"] not in csv_data.fieldnames
    ):  # If the CSV file does not contain the ID field, we use the media ID field by default
        config["id_field"] = "media_id"

    row_count = 0
    for row in csv_data:
        # Delete expired items from request_cache before processing a row.
        if config["enable_http_cache"] is True:
            requests_cache.delete(expired=True)

        media_id = extract_media_id(
            config, row
        )  # Extract the media ID from the CSV row
        if media_id is None:  # If the media ID is invalid, skip this row
            row_count += 1
            print(
                "There are errors for CSV row "
                + str(row_count)
                + ". Please check the log for more details."
            )
            if config["progress_bar"] is True:
                row_position = get_percentage(row_count, num_csv_records)
                pbar(row_position)
            continue

        ping_media_result = ping_media(config, row["media_id"], "GET", True)

        # Now, the user may want to update one or more of the following
        # - Media File
        # - Track File
        # - Media Use TID
        # - Published status
        # - Plain text fields pertaining to the media.

        # We'll need the GET response for this media on multiple occasions.
        if config["standalone_media_url"] is True:
            media_json_url = config["host"] + "/media/" + media_id + "?_format=json"
        else:
            media_json_url = (
                config["host"] + "/media/" + media_id + "/edit?_format=json"
            )

        get_media_response = issue_request(config, "GET", media_json_url)
        get_media_response_body = json.loads(get_media_response.text)

        # From this we can get the media type, which we'll need as well.
        media_type = get_media_type(media_id, get_media_response)
        # If the media type is invalid, skip this row.
        if media_type is None:
            row_count += 1
            print(
                "Media at "
                + config["host"]
                + "/media/"
                + media_id
                + " could not be updated. Please check the log for more details."
            )
            if config["progress_bar"] is True:
                row_position = get_percentage(row_count, num_csv_records)
                pbar(row_position)
            continue

        field_definitions = get_field_definitions(config, "media", media_type)

        # Populate the media JSON that we PATCH with, which gets added to below.
        patch_request_json = {"bundle": [{"target_id": media_type}]}

        # Update media file.
        if "file" in row and row["file"] != "":
            # We need to first get the parent node ID of this media.
            node_id = get_media_parent_node_id(get_media_response_body, row)
            if node_id is None:  # If the node ID is invalid, skip this row.
                row_count += 1
                print(
                    "Media at "
                    + config["host"]
                    + "/media/"
                    + media_id
                    + " could not be updated. Please check the log for more details."
                )
                if config["progress_bar"] is True:
                    row_position = get_percentage(row_count, num_csv_records)
                    pbar(row_position)
                continue

            # At this point we have the node ID of the parent node of the media.
            # We use this with the create_file function to create the media file on the server.
            file_id = create_file(config, row["file"], "file", row, node_id)
            if (
                file_id is False or file_id is None
            ):  # If the file ID is invalid, skip this row.
                logging.error(
                    "Failed to create file for media ID "
                    + media_id
                    + ". Skipping this row."
                )
                row_count += 1
                print(
                    "Media at "
                    + config["host"]
                    + "/media/"
                    + media_id
                    + " could not be updated. Please check the log for more details."
                )
                if config["progress_bar"] is True:
                    row_position = get_percentage(row_count, num_csv_records)
                    pbar(row_position)
                continue

            # Now we'll get the JSON for the PATCH request to the file_field_name to update the file.
            try:
                patch_request_json.update(
                    attach_file_to_media(config, media_type, file_id)
                )
            except KeyError:
                logging.error("The media type " + media_type + " is not supported.")
                row_count += 1
                print(
                    "Media at "
                    + config["host"]
                    + "/media/"
                    + media_id
                    + " could not be updated. Please check the log for more details."
                )
                if config["progress_bar"] is True:
                    row_position = get_percentage(row_count, num_csv_records)
                    pbar(row_position)
                continue

        # Update track file.
        invalid_track_file = False
        if (
            media_type in config["media_track_file_fields"]
            and config["media_track_file_fields"][media_type] in row
            and row[config["media_track_file_fields"][media_type]] != ""
        ):
            # Get the node id of the parent node of the media, which is required for uploading the file.
            node_id = get_media_parent_node_id(get_media_response_body, row)
            if node_id is None:  # If the node ID is invalid, skip this row.
                row_count += 1
                print(
                    "Media at "
                    + config["host"]
                    + "/media/"
                    + media_id
                    + " could not be updated. Please check the log for more details."
                )
                if config["progress_bar"] is True:
                    row_position = get_percentage(row_count, num_csv_records)
                    pbar(row_position)
                continue

            # There may be multiple track files specified, separated by a delimeter. Add the information for each of these to a list.
            track_files = row[config["media_track_file_fields"][media_type]].split(
                config["subdelimiter"]
            )
            # Now make a dictionary with four lists, one for each of the four fields in the track file field corresponding to every track file.
            track_files_info = {
                "track_labels": [],
                "track_types": [],
                "track_languages": [],
                "track_file_ids": [],
            }
            # Loop over every track file and add the information to the lists.
            for track_file in track_files:
                if not validate_media_track_value(track_file):
                    logging.error(
                        "Invalid track file value for media ID "
                        + media_id
                        + ". Skipping this row."
                    )
                    invalid_track_file = True
                    break
                (
                    track_label,
                    track_type,
                    track_language,
                    track_filepath,
                ) = track_file.split(":")
                track_files_info["track_labels"].append(track_label)
                track_files_info["track_types"].append(track_type)
                track_files_info["track_languages"].append(track_language)
                # From the track file path, we can upload the file to the server and get the file ID.
                file_id = create_file(
                    config,
                    track_filepath,
                    config["media_track_file_fields"][media_type],
                    row,
                    node_id,
                )
                if not file_id:
                    logging.error(
                        "Failed to create file for media ID "
                        + media_id
                        + ". Skipping this row."
                    )
                    invalid_track_file = True
                    break
                track_files_info["track_file_ids"].append(file_id)
            if invalid_track_file:
                row_count += 1
                print(
                    "Media at "
                    + config["host"]
                    + "/media/"
                    + media_id
                    + " could not be updated. Please check the log for more details."
                )
                if config["progress_bar"] is True:
                    row_position = get_percentage(row_count, num_csv_records)
                    pbar(row_position)
                continue

            # Now we'll get the JSON for the PATCH request to the track_file_field_name to update the track files.
            try:
                patch_request_json.update(
                    attach_track_files_to_media(
                        config,
                        media_type,
                        track_files_info["track_labels"],
                        track_files_info["track_types"],
                        track_files_info["track_languages"],
                        track_files_info["track_file_ids"],
                    )
                )
            except KeyError:
                logging.error(
                    "The media type "
                    + media_type
                    + " is not set to have a track file field."
                )
                row_count += 1
                print(
                    "Media at "
                    + config["host"]
                    + "/media/"
                    + media_id
                    + " could not be updated. Please check the log for more details."
                )
                if config["progress_bar"] is True:
                    row_position = get_percentage(row_count, num_csv_records)
                    pbar(row_position)
                continue

        # Update media use tid.
        invalid_media_use_tid = False
        if "media_use_tid" in row:
            if (
                row["media_use_tid"] != ""
            ):  # User expects us to update media use tid to the one provided in the CSV.
                media_use_tids = row["media_use_tid"].split(config["subdelimiter"])
            else:  # User expects us to update media use tid to the default media use tid.
                media_use_tids = str(config["media_use_tid"]).split(
                    config["subdelimiter"]
                )

            for i in range(
                len(media_use_tids)
            ):  # Iterate through the list of media_use_tid values and process each tid.
                if not value_is_numeric(media_use_tids[i]):
                    tid = get_term_id_from_uri(
                        config, media_use_tids[i]
                    )  # Note that this call checks if the term exists by pinging it and returns False if it doesn't
                    if (
                        tid is False
                    ):  # If media use term URL alias does not exist, skip updating media
                        logging.error(
                            "Media use term URL alias %s not found or not accessible, skipping updating media.",
                            media_use_tids[i],
                        )
                        invalid_media_use_tid = True
                        break
                    else:
                        media_use_tids[i] = str(
                            tid
                        )  # As get_term_id_from_uri returns the tid as an int, we need to convert it to a string

                else:  # User has specified a numeric media use tid
                    if not ping_term(
                        config, media_use_tids[i]
                    ):  # Check if the media use term exists by pinging it
                        logging.error(
                            "Media use term %s not found or not accessible, skipping updating media.",
                            media_use_tids[i],
                        )
                        invalid_media_use_tid = True
                        break

            if invalid_media_use_tid:
                row_count += 1
                print(
                    "Media at "
                    + config["host"]
                    + "/media/"
                    + media_id
                    + " could not be updated. Please check the log for more details."
                )
                if config["progress_bar"] is True:
                    row_position = get_percentage(row_count, num_csv_records)
                    pbar(row_position)
                continue

            # By this point we have a valid media ID and media use tid, so we can update the media use tid.
            patch_request_json.update(
                patch_media_use_terms_update_media(media_use_tids)
            )  # This call returns a dict with the JSON for the PATCH request to update the media use tid

        # Update media status.
        if "status" in row and row["status"] != "":
            if row["status"] == "1" or row["status"].lower() == "true":
                patch_request_json.update(patch_media_status(True))
            elif row["status"] == "0" or row["status"].lower() == "false":
                patch_request_json.update(patch_media_status(False))
            else:
                logging.error("Invalid value for published status.")
                row_count += 1
                print(
                    "Media at "
                    + config["host"]
                    + "/media/"
                    + media_id
                    + " could not be updated. Please check the log for more details."
                )
                if config["progress_bar"] is True:
                    row_position = get_percentage(row_count, num_csv_records)
                    pbar(row_position)
                continue

        # Add custom (non-required) fields.
        required_fields = ["media_id"]
        custom_fields = list(set(csv_column_headers) - set(required_fields))
        for custom_field in custom_fields:
            media_has_all_fields = True
            # If media doesn't have the field, log that fact and skip updating the field.
            reserved_fields = ["published", "url_alias", "file", "revision_log"]
            if (
                custom_field not in json.loads(ping_media_result)
                and custom_field not in reserved_fields
            ):
                message = f'Media {row["media_id"]} does not have a "{custom_field}" field, skipping update.'
                print(f"ERROR: " + message)
                logging.warning(message)
                media_has_all_fields = False
                break

            media_field_values = get_media_field_values(config, row["media_id"])

            # Skip updating field if CSV field is empty (other than for 'delete' update mode).
            # For 'delete' update mode it doesn't matter if there's anything in the CSV field,
            # but users expect to be able to supply empty values for this operation.
            if len(row[custom_field].strip()) == 0:
                if config["update_mode"] != "delete":
                    continue

            if custom_field not in reserved_fields:
                # Assemble Drupal field structures from CSV data. If new field types are added to
                # workbench_fields.py, they need to be registered in the following if/elif/else block.

                # Entity reference fields (taxonomy term and node).
                if field_definitions[custom_field]["field_type"] == "entity_reference":
                    entity_reference_field = workbench_fields.EntityReferenceField()
                    patch_request_json = entity_reference_field.update(
                        config,
                        field_definitions,
                        patch_request_json,
                        row,
                        custom_field,
                        media_field_values[custom_field],
                    )

                # Entity reference revisions fields (paragraphs).
                elif (
                    field_definitions[custom_field]["field_type"]
                    == "entity_reference_revisions"
                ):
                    entity_reference_revisions_field = (
                        workbench_fields.EntityReferenceRevisionsField()
                    )
                    patch_request_json = entity_reference_revisions_field.update(
                        config,
                        field_definitions,
                        patch_request_json,
                        row,
                        custom_field,
                        media_field_values[custom_field],
                    )

                # Typed relation fields (currently, only taxonomy term).
                elif field_definitions[custom_field]["field_type"] == "typed_relation":
                    typed_relation_field = workbench_fields.TypedRelationField()
                    patch_request_json = typed_relation_field.update(
                        config,
                        field_definitions,
                        patch_request_json,
                        row,
                        custom_field,
                        media_field_values[custom_field],
                    )

                # Geolocation fields.
                elif field_definitions[custom_field]["field_type"] == "geolocation":
                    geolocation_field = workbench_fields.GeolocationField()
                    patch_request_json = geolocation_field.update(
                        config,
                        field_definitions,
                        patch_request_json,
                        row,
                        custom_field,
                        media_field_values[custom_field],
                    )

                # Link fields.
                elif field_definitions[custom_field]["field_type"] == "link":
                    link_field = workbench_fields.LinkField()
                    patch_request_json = link_field.update(
                        config,
                        field_definitions,
                        patch_request_json,
                        row,
                        custom_field,
                        media_field_values[custom_field],
                    )

                # Authority Link fields.
                elif field_definitions[custom_field]["field_type"] == "authority_link":
                    link_field = workbench_fields.AuthorityLinkField()
                    patch_request_json = link_field.update(
                        config,
                        field_definitions,
                        patch_request_json,
                        row,
                        custom_field,
                        media_field_values[custom_field],
                    )

                # For non-entity reference and non-typed relation fields (text, etc.).
                else:
                    simple_field = workbench_fields.SimpleField()
                    patch_request_json = simple_field.update(
                        config,
                        field_definitions,
                        patch_request_json,
                        row,
                        custom_field,
                        media_field_values[custom_field],
                    )

        # Before the patch requests, we delete the existing media/track files if the user wants to replace them.
        # Delete the old media file.
        if "file" in row and row["file"] != "":
            # Delete the old file which was attached to this media from the server.
            if not delete_media_file(config, media_id, get_media_response_body):
                row_count += 1
                print(
                    "Media at "
                    + config["host"]
                    + "/media/"
                    + media_id
                    + " could not be updated. Please check the log for more details."
                )
                if config["progress_bar"] is True:
                    row_position = get_percentage(row_count, num_csv_records)
                    pbar(row_position)
                continue

        # Delete the old track files.
        if (
            media_type in config["media_track_file_fields"]
            and config["media_track_file_fields"][media_type] in row
            and row[config["media_track_file_fields"][media_type]] != ""
        ):
            # Delete the old track files which were attached to this media from the server.
            if not delete_media_track_files(
                config, media_id, media_type, get_media_response_body
            ):
                row_count += 1
                print(
                    "Media at "
                    + config["host"]
                    + "/media/"
                    + media_id
                    + " could not be updated. Please check the log for more details."
                )
                if config["progress_bar"] is True:
                    row_position = get_percentage(row_count, num_csv_records)
                    pbar(row_position)
                continue

        # Update revision log message.
        if is_revisions_enabled(config, media_type):
            revision_log_message = (
                row["revision_log"]
                if "revision_log" in row and row["revision_log"] != ""
                else "Updated by Islandora Workbench."
            )
            patch_request_json.update(
                patch_media_revision_log_message(revision_log_message)
            )

        # Make the PATCH request.
        if config["standalone_media_url"] is True:
            update_media_url = config["host"] + "/media/" + media_id + "?_format=json"
        else:
            update_media_url = (
                config["host"] + "/media/" + media_id + "/edit?_format=json"
            )

        headers = {"Content-Type": "application/json"}
        response = issue_request(
            config, "PATCH", update_media_url, headers, patch_request_json
        )
        if response.status_code != 200:
            logging.error(
                "Error updating media "
                + media_id
                + ". Response code: "
                + str(response.status_code)
                + ". Response body: "
                + response.text
            )
            print(
                "Media at "
                + config["host"]
                + "/media/"
                + media_id
                + " could not be updated. Please check the log for more details."
            )
            row_count += 1
            if config["progress_bar"] is True:
                row_position = get_percentage(row_count, num_csv_records)
                pbar(row_position)
        else:
            print(
                "Media at "
                + config["host"]
                + "/media/"
                + media_id
                + " updated successfully."
            )
            row_count += 1
            if config["progress_bar"] is True:
                row_position = get_percentage(row_count, num_csv_records)
                pbar(row_position)


def delete_media():
    """Delete media from media IDs in the input CSV."""
    message = '"Delete media" task started using config file ' + args.config + "."
    print(message)
    logging.info(message)

    csv_data = get_csv_data(config)

    row_count = 0
    for row in csv_data:
        # Delete expired items from request_cache before processing a row.
        if config["enable_http_cache"] is True:
            requests_cache.delete(expired=True)

        if not value_is_numeric(row["media_id"]):
            row["media_id"] = get_mid_from_media_url_alias(config, row["node_id"])
        media_delete_status_code = remove_media_and_file(config, row["media_id"])
        if media_delete_status_code == 204:
            if config["progress_bar"] is False:
                message = (
                    "Media "
                    + config["host"]
                    + "/media/"
                    + str(row["media_id"])
                    + " and associated file deleted."
                )
                print(message)
                logging.info(message)
            else:
                message = (
                    "Media "
                    + config["host"]
                    + "/media/"
                    + str(row["media_id"])
                    + " and associated file not deleted."
                )
                print("ERROR: " + message + " See log for more information.")
                logging.error(
                    message + " HTTP response code %s.", media_delete_status_code
                )

        if config["progress_bar"] is True:
            row_count += 1
            row_position = get_percentage(row_count, num_csv_records)
            pbar(row_position)


def delete_media_by_node():
    """Delete all media from node IDs in the input CSV."""
    message = (
        '"Deleting media by node" task started using config file ' + args.config + "."
    )
    print(message)
    logging.info(message)

    csv_data = get_csv_data(config)

    row_count = 0
    for row in csv_data:
        # Delete expired items from request_cache before processing a row.
        if config["enable_http_cache"] is True:
            requests_cache.delete(expired=True)

        if not value_is_numeric(row["node_id"]):
            row["node_id"] = get_nid_from_url_alias(config, row["row_id"])

        node_ping_result = ping_node(config, row["node_id"], "GET", True)
        if node_ping_result is False:
            if config["progress_bar"] is False:
                message = (
                    "Node "
                    + row["node_id"]
                    + " not found or not accessible, skipping deleting media."
                )
                print(message)
            logging.warning(message)
            continue

        media_ids_from_node = get_node_media_ids(
            config, row["node_id"], config["delete_media_by_node_media_use_tids"]
        )
        if media_ids_from_node is not False:
            if len(media_ids_from_node) > 0:
                for media_id in media_ids_from_node:
                    media_delete_status_code = remove_media_and_file(config, media_id)
                    if media_delete_status_code == 204:
                        if config["progress_bar"] is False:
                            message = (
                                "Node "
                                + row["node_id"]
                                + "'s media "
                                + config["host"]
                                + "/media/"
                                + str(media_id)
                                + " and associated files deleted."
                            )
                            print(message)
                            logging.info(message)
                        else:
                            message = (
                                "Node "
                                + row["node_id"]
                                + "'s media "
                                + config["host"]
                                + "/media/"
                                + str(media_id)
                                + " and associated files not deleted."
                            )
                            print(
                                "ERROR: " + message + " See log for more information."
                            )
                            logging.error(
                                message + " HTTP response code %s.",
                                media_delete_status_code,
                            )

        if config["progress_bar"] is True:
            row_count += 1
            row_position = get_percentage(row_count, num_csv_records)
            pbar(row_position)


def create_from_files():
    """Create new nodes from files only (no CSV), and add media. The nodes will
    have a title (derived from filename), and a config-defined Islandora model,
    content type, and status. Media use is derived from config as well.
    """
    message = '"Create from files" task started using config file ' + args.config + "."
    print(message)
    logging.info(message)

    prepare_csv_id_to_node_id_map(config)

    file_dir_path = config["input_dir"]
    files = os.listdir(file_dir_path)

    path_to_rollback_csv_file = get_rollback_csv_filepath(config)
    prep_rollback_csv(config, path_to_rollback_csv_file)
    logging.info("Writing rollback CSV to " + path_to_rollback_csv_file)

    num_files = len(files)
    file_count = 0
    for file_name in files:
        # Delete expired items from request_cache before processing a row.
        if config["enable_http_cache"] is True:
            requests_cache.delete(expired=True)

        if file_name.startswith("rollback.") and file_name.endswith("csv"):
            continue
        if config["csv_id_to_node_id_map_path"] is not False and file_name.endswith(
            config["csv_id_to_node_id_map_path"]
        ):
            continue

        filename_without_extension = os.path.splitext(file_name)[0]
        if len(filename_without_extension) > config["max_node_title_length"]:
            message = (
                'Truncating the filename "'
                + filename_without_extension
                + '" since it exceeds maximum node title length of '
                + str(config["max_node_title_length"])
                + " characters."
            )
            logging.error(message)
            filename_without_extension = filename_without_extension[:255]

        node_json = {
            "type": [{"target_id": config["content_type"], "target_type": "node_type"}],
            "title": [{"value": filename_without_extension}],
        }

        # Add field_model if that field exists in the current content type.
        entity_fields = get_entity_fields(config, "node", config["content_type"])
        if "field_model" in entity_fields:
            islandora_model = set_model_from_extension(file_name, config)
            node_json["field_model"] = [
                {"target_id": islandora_model, "target_type": "taxonomy_term"}
            ]

        node_headers = {"Content-Type": "application/json"}
        node_endpoint = "/node?_format=json"
        node_response = issue_request(
            config, "POST", node_endpoint, node_headers, node_json, None
        )
        if node_response.status_code == 201:
            returned_node = json.loads(node_response.text)
            node_id = returned_node["nid"][0]["value"]
            node_uri = config["host"] + "/node/" + str(node_id)

            if config["progress_bar"] is False:
                print(
                    'Node for "'
                    + filename_without_extension
                    + '" created at '
                    + node_uri
                    + "."
                )
            logging.info(
                'Node for "%s" created at %s.', filename_without_extension, node_uri
            )
            if "output_csv" in config.keys():
                write_to_output_csv(config, "", node_response.text)

            node_nid = node_uri.rsplit("/", 1)[-1]
            write_rollback_node_id(config, node_nid, path_to_rollback_csv_file)

            populate_csv_id_to_node_id_map(config, "", "", file_name, node_nid)

            # Execute node-specific post-create scripts, if any are configured.
            if "node_post_create" in config and len(config["node_post_create"]) > 0:
                for command in config["node_post_create"]:
                    (
                        post_task_output,
                        post_task_return_code,
                    ) = execute_entity_post_task_script(
                        command,
                        args.config,
                        node_response.status_code,
                        node_response.text,
                    )
                    if post_task_return_code == 0:
                        logging.info(
                            "Post node create script "
                            + command
                            + " executed successfully."
                        )
                    else:
                        logging.error("Post node create script " + command + " failed.")

            file_path = os.path.join(config["input_dir"], file_name)
            fake_csv_record = collections.OrderedDict()
            fake_csv_record["title"] = filename_without_extension
            fake_csv_record["file"] = file_path

            media_type = set_media_type(config, file_path, "file", fake_csv_record)

            if media_type == "image":
                fake_csv_record["image_alt_text"] = filename_without_extension
            media_response_status_code = create_media(
                config, file_name, "file", node_nid, fake_csv_record
            )
            allowed_media_response_codes = [201, 204]
            if media_response_status_code in allowed_media_response_codes:
                if config["progress_bar"] is False:
                    print("+ Media for " + filename_without_extension + " created.")
                logging.info("Media for %s created.", file_path)
        else:
            logging.error(
                'Node for "%s" not created, HTTP response code was %s.',
                os.path.join(config["input_dir"], file_name),
                node_response.status_code,
            )

        if config["progress_bar"] is True:
            file_count += 1
            file_position = get_percentage(file_count, num_files)
            pbar(file_position)

    if config["progress_bar"] is True:
        pbar(100)


def export_csv():
    """Export a CSV file with values, in Islandora Workbench format,
    for each node in the input CSV.
    """
    message = '"Export CSV" task started using config file ' + args.config + "."
    if config["export_csv_term_mode"] == "name":
        message = (
            message
            + ' The "export_csv_term_mode" configuration option is set to "name", which will slow down the export.'
        )
    print(message)
    logging.info(message)

    field_definitions = get_field_definitions(config, "node")

    field_labels = collections.OrderedDict()
    field_names = list()
    for field_name in field_definitions.keys():
        field_names.append(field_name)
    for field_name in [
        "created",
        "uid",
        "langcode",
        "title",
        "node_id",
        "REMOVE THIS COLUMN (KEEP THIS ROW)",
    ]:
        field_names.insert(0, field_name)

    if len(config["export_csv_field_list"]) > 0:
        field_names = config["export_csv_field_list"]

    deduped_field_names = list("")
    [deduped_field_names.append(x) for x in field_names if x not in deduped_field_names]
    # We always include 'node_id and 'REMOVE THIS COLUMN (KEEP THIS ROW)'.
    if "node_id" not in deduped_field_names:
        deduped_field_names.insert(0, "node_id")
        deduped_field_names.insert(0, "REMOVE THIS COLUMN (KEEP THIS ROW)")

    for field_name in field_definitions:
        if field_name in deduped_field_names:
            if field_definitions[field_name]["label"] != "":
                field_labels[field_name] = field_definitions[field_name]["label"]
            else:
                field_labels[field_name] = ""
    field_labels["REMOVE THIS COLUMN (KEEP THIS ROW)"] = "LABEL (REMOVE THIS ROW)"

    if config["export_csv_file_path"] is not None:
        csv_file_path = config["export_csv_file_path"]
    else:
        csv_file_path = os.path.join(
            config["input_dir"], config["input_csv"] + ".csv_file_with_field_values"
        )
    if os.path.exists(csv_file_path):
        os.remove(csv_file_path)

    if (
        config["export_file_directory"] is not None
        and "file" not in deduped_field_names
    ):
        deduped_field_names.append("file")

    csv_file = open(csv_file_path, "a+", encoding="utf-8")
    writer = csv.DictWriter(
        csv_file, fieldnames=deduped_field_names, lineterminator="\n"
    )
    writer.writeheader()

    writer.writerow(field_labels)

    cardinality = collections.OrderedDict()
    cardinality["REMOVE THIS COLUMN (KEEP THIS ROW)"] = (
        "NUMBER OF VALUES ALLOWED (REMOVE THIS ROW)"
    )
    cardinality["node_id"] = "1"
    cardinality["uid"] = "1"
    cardinality["langcode"] = "1"
    cardinality["created"] = "1"
    cardinality["title"] = "1"
    for field_name in field_definitions:
        if field_definitions[field_name]["cardinality"] == -1:
            cardinality[field_name] = "unlimited"
        else:
            cardinality[field_name] = field_definitions[field_name]["cardinality"]

    cardinality_filtered = collections.OrderedDict()
    for cardinality_key in cardinality.keys():
        if cardinality_key in deduped_field_names:
            cardinality_filtered[cardinality_key] = cardinality[cardinality_key]
    writer.writerow(cardinality_filtered)

    csv_data = get_csv_data(config)

    row_count = 0
    for row in csv_data:
        # Delete expired items from request_cache before processing a row.
        if config["enable_http_cache"] is True:
            requests_cache.delete(expired=True)

        output_row = collections.OrderedDict()
        if not value_is_numeric(row["node_id"]):
            row["node_id"] = get_nid_from_url_alias(config, row["node_id"])
        if not ping_node(config, row["node_id"]):
            if config["progress_bar"] is False:
                print(
                    "Node "
                    + row["node_id"]
                    + " not found or not "
                    + "accessible, skipping export."
                )
            logging.warning(
                "Node "
                + row["node_id"]
                + " not found or not "
                + "accessible, skipping export."
            )
            continue

        # Get node.
        url = f"{config['host']}/node/{row['node_id']}?_format=json"
        response = issue_request(config, "GET", url)
        if response.status_code == 200:
            body = json.loads(response.text)
            if body["type"][0]["target_id"] != config["content_type"]:
                message = (
                    f"Node {row['node_id']} not written to output CSV because its content type {body['type'][0]['target_id']}"
                    + f' does not match the "content_type" configuration setting.'
                )
                if config["progress_bar"] is False:
                    print("Error: " + message)
                logging.error(message)
                continue

            for fieldname_to_serialize in deduped_field_names:
                if (
                    fieldname_to_serialize in body
                    and fieldname_to_serialize in field_definitions
                ):
                    csv_data = serialize_field_json(
                        config,
                        field_definitions,
                        fieldname_to_serialize,
                        body[fieldname_to_serialize],
                    )
                    output_row[fieldname_to_serialize] = csv_data

            if config["export_file_directory"] is not None:
                downloaded_file_name = download_file_from_drupal(config, row["node_id"])
                output_row["file"] = downloaded_file_name
        else:
            message = f"Attempt to get node {row['node_id']} returned a {response.status_code} status code."
            print("  Error: " + message)
            logging.warning(message)
            return False

        output_row["node_id"] = row["node_id"]
        writer.writerow(output_row)

        if config["export_file_directory"] is not None:
            and_files = f"and file "
        else:
            and_files = ""
        message = f"Exporting data {and_files}for node {row['node_id']} \"{body['title'][0]['value']}\"."

        if config["progress_bar"] is True:
            row_count += 1
            row_position = get_percentage(row_count, num_csv_records)
            pbar(row_position)
        else:
            print(message)

        logging.info(message)

    csv_file.close()

    if config["progress_bar"] is True:
        pbar(100)
    else:
        print("CSV export saved at " + csv_file_path + ".")


def get_data_from_view():
    """Retrieve data from a Drupal View via its REST export display.

    Note: We won't be able to use the progress_bar option in this task until
    https://www.drupal.org/project/drupal/issues/2982729 is resolved, since
    we have no way of knowing how many items are in the View output til then.
    """
    message = '"Get data from View" task started using config file ' + args.config + "."
    print(message)
    logging.info(message)

    view_parameters = (
        "&".join(config["view_parameters"]) if "view_parameters" in config else ""
    )
    view_url = (
        config["host"]
        + "/"
        + config["view_path"].lstrip("/")
        + "?page=0&"
        + view_parameters
    )
    view_path_status_code = ping_view_endpoint(config, view_url)
    if view_path_status_code != 200:
        message = f"Cannot access View at {view_url}."
        logging.error(
            message + " HTTP status code is " + str(view_path_status_code) + "."
        )
        sys.exit("Error: " + message + " See log for more information.")

    if config["export_csv_file_path"] is not None:
        csv_file_path = config["export_csv_file_path"]
    else:
        csv_file_path = os.path.join(
            config["input_dir"],
            os.path.basename(args.config).split(".")[0]
            + ".csv_file_with_data_from_view",
        )
    if os.path.exists(csv_file_path):
        os.remove(csv_file_path)

    if config["export_file_directory"] is not None:
        if not os.path.exists(config["export_csv_file_path"]):
            try:
                os.mkdir(config["export_csv_file_path"])
                os.rmdir(config["export_csv_file_path"])
            except Exception as e:
                message = (
                    'Path in configuration option "export_csv_file_path" ("'
                    + config["export_csv_file_path"]
                    + '") is not writable.'
                )
                logging.error(message + " " + str(e))
                sys.exit("Error: " + message + " See log for more detail.")

    field_definitions = get_field_definitions(config, "node")

    if len(config["export_csv_field_list"]) > 0:
        field_names = config["export_csv_field_list"]
        # Always include node_id and title.
        field_names.insert(0, "title")
        field_names.insert(0, "node_id")
    else:
        field_names = []
        for field_name in field_definitions.keys():
            if field_name.startswith("field_"):
                field_names.insert(0, field_name)
        # Always include node_id and title.
        field_names.insert(0, "title")
        field_names.insert(0, "node_id")

    deduped_field_names = list("")
    [deduped_field_names.append(x) for x in field_names if x not in deduped_field_names]

    if (
        config["export_file_directory"] is not None
        and "file" not in deduped_field_names
    ):
        deduped_field_names.append("file")

    csv_file = open(csv_file_path, "a+", encoding="utf-8")
    writer = csv.DictWriter(
        csv_file, fieldnames=deduped_field_names, lineterminator="\n"
    )
    writer.writeheader()

    seen_nids = list()

    view_url = config["host"] + "/" + config["view_path"].lstrip("/") + "?page="
    # Seed the first page of node IDs.
    page = 0
    url = view_url + str(0) + "&" + view_parameters
    response = issue_request(config, "GET", url)
    if response.status_code != 200:
        message = f"Request to View at {url} returned a non-200 status ({response.status_code})."
        logging.error(message)
        sys.exit("Error: " + message)
    nodes = json.loads(response.text)
    for node in nodes:
        # Delete expired items from request_cache before processing a row.
        if config["enable_http_cache"] is True:
            requests_cache.delete(expired=True)

        if node["nid"][0]["value"] not in seen_nids:
            if node["type"][0]["target_id"] == config["content_type"]:
                seen_nids.append(node["nid"][0]["value"])
                row = dict()
                row["node_id"] = node["nid"][0]["value"]
                row["title"] = node["title"][0]["value"]

                if config["export_file_directory"] is not None:
                    and_files = f"and file "
                else:
                    and_files = ""
                message = f"Exporting data {and_files}for node {row['node_id']} \"{row['title']}\"."
                print(message)
                logging.info(message)

                for field_name in deduped_field_names:
                    if field_name.startswith("field_") and field_name in node:
                        csv_data = serialize_field_json(
                            config, field_definitions, field_name, node[field_name]
                        )
                        row[field_name] = csv_data

                if config["export_file_directory"] is not None:
                    downloaded_file_name = download_file_from_drupal(
                        config, row["node_id"]
                    )
                    row["file"] = downloaded_file_name

                writer.writerow(row)

                # Execute node-specific post-export scripts, if any are configured.
                if "node_post_export" in config and len(config["node_post_export"]) > 0:
                    for command in config["node_post_export"]:
                        (
                            post_task_output,
                            post_task_return_code,
                        ) = execute_entity_post_task_script(
                            command,
                            args.config,
                            response.status_code,
                            json.dumps(node),
                        )
                        if post_task_return_code == 0:
                            logging.info(
                                "Post node export script "
                                + command
                                + " executed successfully."
                            )
                        else:
                            logging.error(
                                "Post node export script " + command + " failed."
                            )
        else:
            message = (
                f"Node {node['nid'][0]['value']} not written to output CVS because its content type ("
                + f"{node['type'][0]['target_id']} does not match the \"content_type\" configuration setting."
            )
            print("Warning: " + message)
            logging.warning(message)
            continue

    # Loop through the remaining pages of the View output, until we encounter an empty page.
    while len(nodes) > 0:
        page += 1
        url = view_url + str(page) + "&" + view_parameters
        response = issue_request(config, "GET", url)
        if response.status_code != 200:
            message = f"Request to View at {url} returned a non-200 status ({response.status_code}); page {page} of results not written to the output CSV file."
            logging.error(message)
            continue
        nodes = json.loads(response.text)
        for node in nodes:
            # Delete expired items from request_cache before processing a row.
            if config["enable_http_cache"] is True:
                requests_cache.delete(expired=True)

            if node["nid"][0]["value"] not in seen_nids:
                if node["type"][0]["target_id"] == config["content_type"]:
                    seen_nids.append(node["nid"][0]["value"])
                    row = dict()
                    row["node_id"] = node["nid"][0]["value"]
                    row["title"] = node["title"][0]["value"]

                    if config["export_file_directory"] is not None:
                        and_files = f"and file "
                    else:
                        and_files = ""
                    message = f"Exporting data {and_files}for node {row['node_id']} \"{row['title']}\"."
                    print(message)
                    logging.info(message)

                    for field_name in deduped_field_names:
                        if field_name.startswith("field_") and field_name in node:
                            csv_data = serialize_field_json(
                                config, field_definitions, field_name, node[field_name]
                            )
                            row[field_name] = csv_data

                    if config["export_file_directory"] is not None:
                        downloaded_file_name = download_file_from_drupal(
                            config, row["node_id"]
                        )
                        row["file"] = downloaded_file_name

                    writer.writerow(row)

                    # Execute node-specific post-export scripts, if any are configured.
                    if (
                        "node_post_export" in config
                        and len(config["node_post_export"]) > 0
                    ):
                        for command in config["node_post_export"]:
                            (
                                post_task_output,
                                post_task_return_code,
                            ) = execute_entity_post_task_script(
                                command,
                                args.config,
                                response.status_code,
                                json.dumps(node),
                            )
                            if post_task_return_code == 0:
                                logging.info(
                                    "Post node export script "
                                    + command
                                    + " executed successfully."
                                )
                            else:
                                logging.error(
                                    "Post node export script " + command + " failed."
                                )
                else:
                    message = (
                        f"Node {node['nid'][0]['value']} not written to output CVS because its content type ("
                        + f"{node['type'][0]['target_id']} does not match the \"content_type\" configuration setting."
                    )
                    print("Warning: " + message)
                    logging.warning(message)
                    continue

    csv_file.close()
    message = "CSV file is available at " + csv_file_path + "."
    logging.info(message)
    print(message)


def get_media_report_from_view():
    """Generates a report of what media  are present based on a list of nodes in a View.

    Note: We won't be able to use the progress_bar option in this task until
    https://www.drupal.org/project/drupal/issues/2982729 is resolved, since
    we have no way of knowing how many items are in the View output til then.
    """
    message = (
        '"Get media report from View" task started using config file '
        + args.config
        + "."
    )
    print(message)
    logging.info(message)

    view_parameters = (
        "&".join(config["view_parameters"]) if "view_parameters" in config else ""
    )
    view_url = (
        config["host"]
        + "/"
        + config["view_path"].lstrip("/")
        + "?page=0&"
        + view_parameters
    )
    view_path_status_code = ping_view_endpoint(config, view_url)
    if view_path_status_code != 200:
        message = f"Cannot access View at {view_url}."
        logging.error(
            message + " HTTP status code is " + str(view_path_status_code) + "."
        )
        sys.exit("Error: " + message + " See log for more information.")

    if config["export_csv_file_path"] is not None:
        csv_file_path = config["export_csv_file_path"]
    else:
        csv_file_path = os.path.join(
            config["input_dir"],
            os.path.basename(args.config).split(".")[0]
            + ".csv_file_with_media_report_from_view",
        )
    if os.path.exists(csv_file_path):
        os.remove(csv_file_path)

    report_field_names = [
        "Node ID",
        "Title",
        "Content type",
        "Islandora model",
        "Media",
    ]
    csv_file = open(csv_file_path, "a+", encoding="utf-8")
    writer = csv.DictWriter(
        csv_file, fieldnames=report_field_names, lineterminator="\n"
    )
    writer.writeheader()

    seen_nids = list()

    view_url = config["host"] + "/" + config["view_path"].lstrip("/") + "?page="
    # Seed the first page of node IDs.
    page = 0
    url = view_url + str(0) + "&" + view_parameters
    response = issue_request(config, "GET", url)
    if response.status_code != 200:
        message = f"Request to View at {url} returned a non-200 status ({response.status_code})."
        logging.error(message)
        sys.exit("Error: " + message)
    nodes = json.loads(response.text)
    for node in nodes:
        # Delete expired items from request_cache before processing a row.
        if config["enable_http_cache"] is True:
            requests_cache.delete(expired=True)

        if node["nid"][0]["value"] not in seen_nids:
            seen_nids.append(node["nid"][0]["value"])
            row = dict()
            row["Node ID"] = node["nid"][0]["value"]
            row["Title"] = node["title"][0]["value"]
            row["Content type"] = node["type"][0]["target_id"]
            row["Islandora model"] = get_term_name(
                config, node["field_model"][0]["target_id"]
            )
            row["Media"] = get_node_media_summary(config, node["nid"][0]["value"])

            message = f"Exporting media report for node {node['nid'][0]['value']} \"{node['title'][0]['value']}\"."
            logging.info(message)
            print(message)

            writer.writerow(row)

    # Loop through the remaining pages of the View output, until we encounter an empty page.
    while len(nodes) > 0:
        page += 1
        url = view_url + str(page) + "&" + view_parameters
        response = issue_request(config, "GET", url)
        if response.status_code != 200:
            message = f"Request to View at {url} returned a non-200 status ({response.status_code}); page {page} of results not written to the output CSV file."
            logging.error(message)
            continue
        nodes = json.loads(response.text)
        for node in nodes:
            # Delete expired items from request_cache before processing a row.
            if config["enable_http_cache"] is True:
                requests_cache.delete(expired=True)

            if node["nid"][0]["value"] not in seen_nids:
                seen_nids.append(node["nid"][0]["value"])
                row = dict()
                row["Node ID"] = node["nid"][0]["value"]
                row["Title"] = node["title"][0]["value"]
                row["Content type"] = node["type"][0]["target_id"]
                row["Islandora model"] = get_term_name(
                    config, node["field_model"][0]["target_id"]
                )
                row["Media"] = get_node_media_summary(config, node["nid"][0]["value"])

                message = f"Exporting media report for node {node['nid'][0]['value']} \"{node['title'][0]['value']}\"."
                logging.info(message)
                print(message)

                writer.writerow(row)

    csv_file.close()
    message = "CSV file is available at " + csv_file_path + "."
    logging.info(message)
    print(message)


def create_terms():
    """Create new terms via POST."""
    message = '"Create terms" task started using config file ' + args.config + "."
    print(message)
    logging.info(message)

    if config["csv_headers"] == "labels":
        fieldname_map_cache_path = os.path.join(
            config["temp_dir"],
            f"taxonomy_term-{config['vocab_id']}-labels.fieldname_map",
        )
        if os.path.exists(fieldname_map_cache_path):
            os.remove(fieldname_map_cache_path)

    if config["progress_bar"] is True:
        csv_data_to_count = list(get_csv_data(config))
        num_csv_records = len(csv_data_to_count)
        pbar = InitBar()

    # These should be set in WorkbenchConfig.get_config() but aren't
    # taking effect there. @todo: address later.
    config["allow_adding_terms"] = True
    config["id_field"] = "term_name"

    # This is the CSV data for creating non-hierarchical or child terms.
    csv_data = get_csv_data(config)

    # We also need copies of the vocabulary CSV data for the parent checks.
    is_parent_check_csv_data = get_csv_data(config)
    parent_csv_data = get_csv_data(config)

    # First check for any terms that are designated as parents (i.e., they
    # are in the 'parent' column in at least one row).
    is_parent = list()
    for is_parent_check_row in is_parent_check_csv_data:
        if (
            "parent" in is_parent_check_row
            and len(str(is_parent_check_row["parent"]).strip()) > 0
        ):
            is_parent.append(is_parent_check_row["parent"])

    term_row_count = 0
    # Then, get the CSV rows for each of the terms in is_parent and create the terms
    # so their IDs are available to the child terms.
    if len(is_parent) > 0:
        for parent_row in parent_csv_data:
            term_row_count += 1
            if parent_row["term_name"] in is_parent:
                term_exists = find_term_in_vocab(
                    config, config["vocab_id"], parent_row["term_name"]
                )
                if term_exists is False:
                    parent_term_id = create_term(
                        config, config["vocab_id"], parent_row["term_name"], parent_row
                    )
                    if config["progress_bar"] is not True:
                        if parent_term_id is not False:
                            print('Term "' + parent_row["term_name"] + '" created.')
                        else:
                            print(
                                'Error: Term "'
                                + parent_row["term_name"]
                                + '" not created. See log for more information.'
                            )
                else:
                    message = (
                        'Term "'
                        + parent_row["term_name"]
                        + '" already exists in the "'
                        + config["vocab_id"]
                        + '" vocabulary, skipping.'
                    )
                    if config["progress_bar"] is not True:
                        print(message)
                    logging.info(message)
                    continue

            if config["progress_bar"] is True:
                term_row_position = get_percentage(term_row_count, num_csv_records)
                pbar(term_row_position)

    # Now that we have created all terms that are parents, null out these copies
    # of the CSV data, no need to keep them around.
    parent_csv_data = None
    is_parent_check_csv_data = None

    # Finally, create any non-existent child terms.
    for row in csv_data:
        # Delete expired items from request_cache before processing a row.
        if config["enable_http_cache"] is True:
            requests_cache.delete(expired=True)

        # If it's a parent term, it will have been created above.
        if row["term_name"] in is_parent:
            continue
        term_row_count += 1
        term_exists = find_term_in_vocab(config, config["vocab_id"], row["term_name"])
        if term_exists is False:
            term_id = create_term(config, config["vocab_id"], row["term_name"], row)
            # Successful creation, and failure, is logged in create_term().
            if config["progress_bar"] is not True:
                if term_id is not False:
                    print('Term "' + row["term_name"] + '" created.')
                else:
                    print(
                        'Error: Term "'
                        + row["term_name"]
                        + '" not created. See log for more information.'
                    )
        else:
            message = (
                'Term "'
                + row["term_name"]
                + '" already exists in the "'
                + config["vocab_id"]
                + '" vocabulary, skipping.'
            )
            if config["progress_bar"] is not True:
                print(message)
            logging.info(message)
            continue

        if config["progress_bar"] is True:
            term_row_position = get_percentage(term_row_count, num_csv_records)
            pbar(term_row_position)

    if config["progress_bar"] is True:
        pbar(100)


def update_terms():
    """Placeholder function. See https://github.com/mjordan/islandora_workbench/issues/469 for more info."""
    """Update terms via PATCH. Note that PATCHing replaces the target field,
       so if we are adding an additional value to a multivalued field, we need
       to include the existing value(s) in our PATCH. The field classes take
       care of preserving existing values in 'append' updates.
    """
    message = (
        '"Update Terms" ('
        + config["update_mode"]
        + ") task started using config file "
        + args.config
        + "."
    )
    print(message)
    logging.info(message)

    if config["csv_headers"] == "labels":
        fieldname_map_cache_path = os.path.join(
            config["temp_dir"],
            f"taxonomy_term-{config['vocab_id']}-labels.fieldname_map",
        )
        if os.path.exists(fieldname_map_cache_path):
            os.remove(fieldname_map_cache_path)

    # This should be set in WorkbenchConfig.get_config() but isn't
    # taking effect there. @todo: address later.
    config["id_field"] = "term_id"

    field_definitions = get_field_definitions(
        config, "taxonomy_term", config["vocab_id"]
    )
    csv_data = get_csv_data(config)
    csv_column_headers = csv_data.fieldnames
    # invalid_target_ids = []

    if config["log_term_creation"] is False:
        logging.info(
            "'log_term_creation' configuration setting is False. Creation of new taxonomy terms will not be logged."
        )

    if "term_id" not in csv_column_headers:
        message = (
            'For "update_terms" tasks, your CSV file must contain a "term_id" column.'
        )
        logging.error(message)
        sys.exit("Error: " + message)

    method = "GET"
    row_count = 0
    for row in csv_data:
        # Delete expired items from request_cache before processing a row.
        if config["enable_http_cache"] is True:
            requests_cache.delete(expired=True)

        if not value_is_numeric(row["term_id"]):
            row["term_id"] = str(
                find_term_in_vocab(config, config["vocab_id"], row["term_id"])
            )
        term_ping_result = ping_term(config, row["term_id"])
        url = config["host"] + "/taxonomy/term/" + str(row["term_id"]) + "?_format=json"
        term_response = issue_request(config, method.upper(), url)
        if term_ping_result is False:
            if config["progress_bar"] is False:
                print(
                    "Term "
                    + row["term_name"]
                    + " not found or not accessible, skipping update."
                )
            logging.warning(
                "Term "
                + row["term_name"]
                + " not found or not accessible, skipping update."
            )
            continue

        term = {
            "vid": [
                {"target_id": config["vocab_id"], "target_type": "taxonomy_vocabulary"}
            ],
        }

        term_field_values = get_term_field_values(config, row["term_id"])

        if "term_name" in csv_column_headers:
            if len(row["term_name"].strip()) != 0:
                term["name"] = [{"value": row["term_name"]}]

        # Some optional term base fields.
        if "parent" in csv_column_headers:
            if len(row["parent"].strip()) != 0:
                if not value_is_numeric(row["parent"]):
                    row["parent"] = find_term_in_vocab(
                        config, config["vocab_id"], row["parent"]
                    )
                term["parent"] = [{"target_id": row["parent"]}]

        if "weight" in csv_column_headers:
            if len(row["weight"].strip()) != 0 and value_is_numeric(row["weight"]):
                term["weight"] = [{"value": row["weight"]}]

        if "description" in csv_column_headers:
            if len(row["description"].strip()) != 0:
                term["description"] = [{"value": row["description"]}]

        # Add custom (non-required) fields.
        required_fields = ["term_id"]
        custom_fields = list(set(csv_column_headers) - set(required_fields))
        for custom_field in custom_fields:
            term_has_all_fields = True
            # If node doesn't have the field, log that fact and skip updating the field.
            reserved_fields = ["parent", "weight", "description", "term_name"]
            if (
                custom_field not in json.loads(term_response.text)
                and custom_field not in reserved_fields
            ):
                message = f'Term {row["term_name"]} does not have a "{custom_field}" field, skipping update.'
                print(f"ERROR: " + message)
                logging.warning(message)
                term_has_all_fields = False
                break

            # 'term_name' is a reserved CSV field.
            if custom_field == "term_name":
                continue

            # 'parent' is a reserved CSV field.
            if custom_field == "parent":
                continue

            # 'weight' is a reserved CSV field.
            if custom_field == "weight":
                continue

            # 'description' is a reserved CSV field.
            if custom_field == "description":
                continue

            # Assemble Drupal field structures from CSV data. If new field types are added to
            # workbench_fields.py, they need to be registered in the following if/elif/else block.

            # Entity reference fields (taxonomy term and node).
            if field_definitions[custom_field]["field_type"] == "entity_reference":
                entity_reference_field = workbench_fields.EntityReferenceField()
                term = entity_reference_field.update(
                    config,
                    field_definitions,
                    term,
                    row,
                    custom_field,
                    term_field_values[custom_field],
                )

            # Typed relation fields (currently, only taxonomy term).
            elif field_definitions[custom_field]["field_type"] == "typed_relation":
                typed_relation_field = workbench_fields.TypedRelationField()
                term = typed_relation_field.update(
                    config,
                    field_definitions,
                    term,
                    row,
                    custom_field,
                    term_field_values[custom_field],
                )

            # Geolocation fields.
            elif field_definitions[custom_field]["field_type"] == "geolocation":
                geolocation_field = workbench_fields.GeolocationField()
                term = geolocation_field.update(
                    config,
                    field_definitions,
                    term,
                    row,
                    custom_field,
                    term_field_values[custom_field],
                )

            # Link fields.
            elif field_definitions[custom_field]["field_type"] == "link":
                link_field = workbench_fields.LinkField()
                term = link_field.update(
                    config,
                    field_definitions,
                    term,
                    row,
                    custom_field,
                    term_field_values[custom_field],
                )

            # Authority Link fields.
            elif field_definitions[custom_field]["field_type"] == "authority_link":
                link_field = workbench_fields.AuthorityLinkField()
                term = link_field.update(
                    config,
                    field_definitions,
                    term,
                    row,
                    custom_field,
                    term_field_values[custom_field],
                )

            # For non-entity reference and non-typed relation fields (text, etc.).
            else:
                simple_field = workbench_fields.SimpleField()
                term = simple_field.update(
                    config,
                    field_definitions,
                    term,
                    row,
                    custom_field,
                    term_field_values[custom_field],
                )

        if term_has_all_fields is True:
            term_endpoint = (
                config["host"] + "/taxonomy/term/" + row["term_id"] + "?_format=json"
            )
            term_headers = {"Content-Type": "application/json"}
            term_response = issue_request(
                config, "PATCH", term_endpoint, term_headers, term
            )

            if term_response.status_code == 200:
                if config["progress_bar"] is False:
                    print(
                        "Term "
                        + config["host"]
                        + "/taxonomy/term/"
                        + row["term_id"]
                        + " updated."
                    )
                logging.info(
                    "Term %s updated.",
                    config["host"] + "/taxonomy/term/" + row["term_id"],
                )

            if config["progress_bar"] is True:
                row_count += 1
                row_position = get_percentage(row_count, num_csv_records)
                pbar(row_position)


def create_redirects():
    """Create new redirect entities via POST."""
    message = '"Create redirects" task started using config file ' + args.config + "."
    print(message)
    logging.info(message)

    if config["progress_bar"] is True:
        csv_data_to_count = list(get_csv_data(config))
        num_csv_records = len(csv_data_to_count)
        pbar = InitBar()

    # This is the CSV data for creating non-hierarchical or child terms.
    csv_data = get_csv_data(config)

    redirect_row_count = 0
    for row in csv_data:
        # Delete expired items from request_cache before processing a row.
        if config["enable_http_cache"] is True:
            requests_cache.delete(expired=True)

        redirect_row_count += 1

        if row["redirect_source"].lower().startswith("http"):
            message = (
                'Redirect source values cannot contain a hostname, they must be a path only. Skipping creating redirect from "'
                + row["redirect_source"]
                + '".'
            )
            print("Warning: " + message)
            logging.warning(message)
            continue

        if row["redirect_target"].lower().startswith("http") is False:
            # It's an internal redirect.
            redirect_target = f'internal:/{row["redirect_target"]}'
        else:
            # It's an external redirect.
            redirect_target = row["redirect_target"]

        # Check to see if the redirect source value is already a redirect. We don't use issue_request()
        # since we don't want to override config["allow_redirects"] for this one request.
        is_redirect_url = config["host"].rstrip("/") + "/" + row["redirect_source"]
        is_redirect_response = requests.head(
            is_redirect_url,
            allow_redirects=False,
            verify=config["secure_ssl_only"],
            auth=(config["username"], config["password"]),
        )
        if str(is_redirect_response.status_code).startswith("30"):
            message = (
                'Redirect from "'
                + row["redirect_source"].strip()
                + '" is already a redirect (HTTP response code is '
                + str(is_redirect_response.status_code)
                + "), skipping."
            )
            print("Warning: " + message)
            logging.warning(message)
            continue

        # @TODO: --check should indicate that the redirect source path doesn't exist.
        redirect = {
            "type": [{"value": "redirect"}],
            "redirect_source": [{"path": row["redirect_source"].lstrip("/").strip()}],
            "redirect_redirect": [{"uri": redirect_target.strip()}],
            "status_code": [{"value": int(config["redirect_status_code"])}],
        }
        redirect_endpoint = config["host"] + "/entity/redirect?_format=json"
        headers = {"Content-Type": "application/json"}
        response = issue_request(
            config, "POST", redirect_endpoint, headers, redirect, None
        )
        if response.status_code == 201:
            message = (
                'OK, Redirect from "'
                + row["redirect_source"].strip()
                + '" to "'
                + row["redirect_target"].strip()
                + '" created.'
            )
            print(message)
            logging.info(message)
        else:
            message = (
                'Redirect from "'
                + row["redirect_source"].strip()
                + '" to "'
                + row["redirect_target"].strip()
                + '" not created.'
            )
            print("Error: " + message + " See log for more info.")
            logging.error(message + " HTTP status code: " + str(response.status_code))

        if config["progress_bar"] is True:
            redirect_row_position = get_percentage(redirect_row_count, num_csv_records)
            pbar(redirect_row_position)

    if config["progress_bar"] is True:
        pbar(100)


# Main program logic.

parser = argparse.ArgumentParser()
parser.add_argument("--config", required=True, help="Configuration file to use.")
parser.add_argument(
    "--check",
    help="Check input data and exit without creating/updating/etc.",
    action="store_true",
)
parser.add_argument(
    "--get_csv_template",
    help="Generate a CSV template using the specified configuration file.",
    action="store_true",
)
parser.add_argument(
    "--quick_delete_node",
    help="Delete the node (and all attached media) identified by the URL).",
)
parser.add_argument(
    "--quick_delete_media",
    help="Delete the media (and attached file) identified by the URL).",
)
parser.add_argument(
    "--contactsheet", help="Generate a contact sheet.", action="store_true"
)
parser.add_argument(
    "--skip_user_prompts",
    help='Include to skip any user prompts defined in your config file\'s "user_prompts" setting.',
    action="store_true",
)
parser.add_argument("--version", action="version", version="Islandora Workbench 0.0.0")
args = parser.parse_args()

try:
    workbench_config = WorkbenchConfig(args)
except Exception as e:
    # Workbench wouldn't get this far as the YAML config file has syntax errors or is not found,
    # or there has been a connection error such as an expired SSL cert. We can't log the exception
    # since config isn't initialized yet.
    yaml = YAML()
    with open(args.config, "r") as stream:
        config_to_get_host_value = yaml.load(stream)
    print(f"Oops, Workbench can't connect to {config_to_get_host_value['host']}.")
    print(
        'Confirm your "host" configuration setting is correct and that the website is running, and try again.'
    )
    sys.exit(str(e))
config = workbench_config.get_config()

if args.check is not True and config["remind_user_to_run_check"] is True:
    if args.skip_user_prompts is not True:
        user_has_run_check = input("Have you run --check? (y/n)")
        if user_has_run_check.lower() != "y":
            logging.info(
                'User was prompted whether they ran --check, and they responded "n".'
            )
            sys.exit(
                "Your Workbench configuration file indicates you were to be reminded to run --check."
            )

if args.check is not True and "check_lock_file_path" in config:
    check_lock_file_path = config["check_lock_file_path"]
    if os.path.exists(check_lock_file_path):
        config_file_md5 = get_file_hash_from_local(
            config, config["config_file_path"], "md5"
        )
        with open(check_lock_file_path, "r") as check_lock_file:
            check_lock_message_expected = f"Check against {config['config_file_path']} (md5 hash {config_file_md5}) OK"
            check_lock_message_from_file = check_lock_file.readline()
            if (
                check_lock_message_expected.strip()
                != check_lock_message_from_file.strip()
            ):
                message = f'The "check_lock_file_path" setting is present in config, but the contents of the lock file ("{check_lock_message_from_file.strip()}") do not match the expected --check lock message ("{check_lock_message_expected.strip()}").'
                logging.error(message)
                sys.exit("Error: " + message)
            else:
                message = f'Check lock file "{check_lock_file_path}" confirms --check was run. Removing lock file.'
                logging.info(message)
                os.remove(config["check_lock_file_path"])
    else:
        message = f'The "check_lock_file_path" setting is present in config but the specified lock file, "{check_lock_file_path}", does not exist.'
        logging.error(message)
        sys.exit("Error: " + message)

create_temp_dir(config)

if "user_prompts" in config and args.skip_user_prompts is not True:
    prompt_user(config)

if config["secondary_tasks"] is not None and len(config["secondary_tasks"]) > 0:
    secondary_tasks = []
    for secondary_config_file in config["secondary_tasks"]:
        secondary_tasks.append(os.path.abspath(secondary_config_file))
    secondary_tasks_registry_string = json.dumps(secondary_tasks)
    os.environ["ISLANDORA_WORKBENCH_SECONDARY_TASKS"] = secondary_tasks_registry_string
    # We can't use the temp_dir to cache this list since the secondary tasks may not use
    # the same temp_dir as the primary task.
    os.environ["ISLANDORA_WORKBENCH_PRIMARY_TASK_TEMP_DIR"] = os.path.abspath(
        config["temp_dir"]
    )

for handler in logging.root.handlers[:]:
    logging.root.removeHandler(handler)

if config["log_file_name_and_line_number"] is True:
    logging.basicConfig(
        filename=config["log_file_path"],
        level=logging.INFO,
        filemode=config["log_file_mode"],
        format="%(asctime)s - %(levelname)s - %(filename)s - %(lineno)d - %(message)s",
        datefmt="%d-%b-%y %H:%M:%S",
    )
else:
    logging.basicConfig(
        filename=config["log_file_path"],
        level=logging.INFO,
        filemode=config["log_file_mode"],
        format="%(asctime)s - %(levelname)s - %(message)s",
        datefmt="%d-%b-%y %H:%M:%S",
    )

ping_islandora(config, print_message=True)
check_integration_module_version(config)

validate_input_dir(config)

# Unzip (and fetch if remote) input data zip archives.
if len(config["input_data_zip_archives"]) > 0 and config["nodes_only"] is not True:
    for input_data_zip_archive in config["input_data_zip_archives"]:
        if input_data_zip_archive.lower().startswith("http"):
            downloaded_zip_file_path = download_remote_archive_file(
                config, input_data_zip_archive
            )
            unzip_archive(config, downloaded_zip_file_path)
        else:
            unzip_archive(config, input_data_zip_archive)

if "check" in config.keys():
    tasks_to_skip = ["create_from_files", "get_data_from_view"]
    if config["check"] is False and config["task"] not in tasks_to_skip:
        csv_data_to_count = get_csv_data(config)
        num_csv_records = len(list(csv_data_to_count))
        if num_csv_records == 0:
            zero_data_rows_message = (
                f"Input CSV \"{config['input_csv']}\" contains 0 data rows, exiting."
            )
            logging.warning(zero_data_rows_message)
            sys.exit("WARNING: " + zero_data_rows_message)

# Execute bootstrap scripts, if any are configured.
if config["check"] is False and "bootstrap" in config and len(config["bootstrap"]) > 0:
    for command in config["bootstrap"]:
        print("Executing bootstrap script " + command)
        output, return_code = execute_bootstrap_script(command, args.config)
        if return_code == 0:
            logging.info(f"Bootstrap script {command} executed successfully.")
        else:
            logging.error(
                f"Bootstrap script {command} failed with exit code {str(return_code)}."
            )

if (
    config["task"] != "create_from_files"
    and config["input_csv"].startswith("http") is True
):
    get_csv_from_google_sheet(config)
if (
    config["task"] != "create_from_files"
    and config["input_csv"].endswith(".xlsx") is True
):
    get_csv_from_excel(config)

if config["enable_http_cache"] is True:
    if config["http_cache_storage"] == "sqlite" and os.path.exists("http_cache.sqlite"):
        logging.info('Cleared HTTP cache file "http_cache.sqlite".')
        os.remove("http_cache.sqlite")
    requests_cache.install_cache(
        backend=config["http_cache_storage"],
        expire_after=config["http_cache_storage_expire_after"],
    )
    message = "Client-side request caching is enabled."
else:
    message = "Client-side request caching is not enabled."
logging.info(message)

check_drupal_core_version(config)

csv_subset_warning(config)

# Apparently, there's no built-in way of getting the number of items in a
# DictReader, so we read the CSV file, convert it to a list, and get its length.
if config["progress_bar"] is True:
    if config["task"] != "create_from_files":
        csv_data_as_list = list(get_csv_data(config))
        num_csv_records = len(csv_data_as_list)
    pbar = InitBar()

if "get_csv_template" in config.keys():
    if config["get_csv_template"]:
        # At the end of this function, Workbench exists, so code after this is not executed.
        get_csv_template(config, args)

if args.quick_delete_node is not None:
    # At the end of this function, Workbench exists, so code after this is not executed.
    quick_delete_node(config, args)

if args.quick_delete_media is not None:
    # At the end of this function, Workbench exists, so code after this is not executed.
    quick_delete_media(config, args)

try:
    if "check" in config.keys():
        if config["check"]:
            if config["task"] == "create_from_files":
                check_input_for_create_from_files(config, args)
            else:
                check_input(config, args)
except KeyboardInterrupt:
    print("Exiting before entire --check completed.")
    logging.warning('Workbench exiting after receiving "ctrl-c" during --check.')
    try:
        sys.exit(0)
    except SystemExit:
        os._exit(0)

try:
    if config["task"] == "create":
        create()
    if config["task"] == "update":
        update()
    if config["task"] == "delete":
        delete()
    if config["task"] == "add_media":
        add_media()
    if config["task"] == "delete_media":
        delete_media()
    if config["task"] == "delete_media_by_node":
        delete_media_by_node()
    if config["task"] == "create_from_files":
        create_from_files()
    if config["task"] == "export_csv":
        export_csv()
    if config["task"] == "get_data_from_view":
        get_data_from_view()
    if config["task"] == "get_media_report_from_view":
        get_media_report_from_view()
    if config["task"] == "create_terms":
        create_terms()
    if config["task"] == "update_media":
        update_media()
    if config["task"] == "update_terms":
        update_terms()
    if config["task"] == "create_redirects":
        create_redirects()

    if config["secondary_tasks"] is not None and len(config["secondary_tasks"]) > 0:
        for secondary_config_file in config["secondary_tasks"]:
            message = (
                "Executing secondary task using configuration file "
                + secondary_config_file
                + "."
            )
            print("")
            print(message)
            logging.info(message)
            cmd = [
                config["path_to_python"],
                config["path_to_workbench_script"],
                "--config",
                secondary_config_file,
            ]
            output = subprocess.run(cmd)

    # Execute shutdown scripts, if any are configured.
    if (
        config["check"] is False
        and "shutdown" in config
        and len(config["shutdown"]) > 0
    ):
        for command in config["shutdown"]:
            print("Executing shutdown script " + command)
            output, return_code = execute_shutdown_script(command, args.config)
            if return_code == 0:
                logging.info(f"Shutdown script {command} executed successfully.")
            else:
                logging.error(
                    f"Shutdown script {command} failed with exit code {str(return_code)}."
                )

    logging.info(f"Islandora Workbench successfully completed.")

    if os.environ.get("ISLANDORA_WORKBENCH_PRIMARY_TASK_TEMP_DIR") is not None:
        os.environ.pop("ISLANDORA_WORKBENCH_PRIMARY_TASK_TEMP_DIR")
    if os.environ.get("ISLANDORA_WORKBENCH_SECONDARY_TASKS") is not None:
        os.environ.pop("ISLANDORA_WORKBENCH_SECONDARY_TASKS")


except KeyboardInterrupt:
    print("Exiting before entire CSV processed. See log for more info.")
    logging.warning(
        'Workbench exiting after receiving "ctrl-c". Consult the documentation to learn how to resume your batch.'
    )
    try:
        sys.exit(0)
    except SystemExit:
        os._exit(0)