diff --git a/deploy/data_exporter/.gitignore b/deploy/data_exporter/.gitignore
new file mode 100644
index 000000000..0a03531c6
--- /dev/null
+++ b/deploy/data_exporter/.gitignore
@@ -0,0 +1 @@
+.aws-sam
diff --git a/deploy/data_exporter/__init__.py b/deploy/data_exporter/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/deploy/data_exporter/data_export_function/Dockerfile b/deploy/data_exporter/data_export_function/Dockerfile
new file mode 100644
index 000000000..c8da43e18
--- /dev/null
+++ b/deploy/data_exporter/data_export_function/Dockerfile
@@ -0,0 +1,15 @@
+FROM public.ecr.aws/docker/library/ubuntu:24.04
+
+RUN apt update && \
+    apt install -y postgresql-client-16 python3.12 python3-pip curl unzip python3.12-venv && \
+    rm -rf /var/lib/apt/lists/*
+
+COPY requirements.txt .
+RUN python3 -m venv .venv
+RUN ./.venv/bin/pip install -r requirements.txt
+RUN ./.venv/bin/pip install awslambdaric
+
+COPY . .
+
+ENTRYPOINT [".venv/bin/python", "-m", "awslambdaric" ]
+CMD [ "app.lambda_handler" ]
diff --git a/deploy/data_exporter/data_export_function/app.py b/deploy/data_exporter/data_export_function/app.py
new file mode 100644
index 000000000..68ec122b5
--- /dev/null
+++ b/deploy/data_exporter/data_export_function/app.py
@@ -0,0 +1,216 @@
+import os
+import subprocess
+from datetime import datetime, timedelta, timezone
+
+import boto3
+import psycopg
+from psycopg import sql
+
+ssm = boto3.client("ssm")
+s3 = boto3.client("s3", region_name="eu-west-1")
+BUCKET_NAME = "dc-ynr-short-term-backups"
+PREFIX = "ynr-export"
+FILENAME_FORMAT = "{PREFIX}-{CURRENT_TIME_STR}.dump"
+
+
+def get_parameter(name):
+    response = ssm.get_parameter(Name=name)
+    return response["Parameter"]["Value"]
+
+
+SOURCE_DATABASE = "ynr"
+TMP_DATABASE_NAME = "ynr-for-dev-export"
+DB_HOST = get_parameter("/ynr/production/POSTGRES_HOST")
+DB_USER = get_parameter("/ynr/production/POSTGRES_USERNAME")
+DB_PASSWORD = get_parameter("/ynr/production/POSTGRES_PASSWORD")
+DB_PORT = "5432"
+os.environ["PGPASSWORD"] = DB_PASSWORD
+
+
+def get_db_conn(db_name):
+    conn = psycopg.connect(
+        dbname=db_name,
+        user=DB_USER,
+        password=DB_PASSWORD,
+        host=DB_HOST,
+        port=DB_PORT,
+    )
+    conn.autocommit = True
+    return conn
+
+
+def create_database_and_restore():
+    conn = get_db_conn(SOURCE_DATABASE)
+    try:
+        with conn.cursor() as cur:
+            print(f"Deleting {TMP_DATABASE_NAME}")
+            cur.execute(
+                sql.SQL("DROP DATABASE IF EXISTS {};").format(
+                    sql.Identifier(TMP_DATABASE_NAME)
+                )
+            )
+        with conn.cursor() as cur:
+            # SQL to create the new database from the source
+            print(f"Creating {TMP_DATABASE_NAME}")
+            cur.execute(
+                sql.SQL("CREATE DATABASE {} ;").format(
+                    sql.Identifier(TMP_DATABASE_NAME),
+                )
+            )
+            print(
+                f"Database '{TMP_DATABASE_NAME}' created successfully '{SOURCE_DATABASE}'."
+            )
+    except psycopg.Error as e:
+        print(f"Error creating database: {e}")
+        raise
+
+    finally:
+        conn.close()
+
+    # Dump and restore the source DB to the temp one
+    dump_command = [
+        "pg_dump",
+        "-h",
+        DB_HOST,
+        "-U",
+        DB_USER,
+        "-d",
+        SOURCE_DATABASE,
+        "-Fc",
+    ]
+
+    restore_command = [
+        "pg_restore",
+        "-h",
+        DB_HOST,
+        "-U",
+        DB_USER,
+        "-d",
+        TMP_DATABASE_NAME,
+    ]
+
+    print("Populating new database (pg_dump | pg_restore")
+    with subprocess.Popen(
+        dump_command,
+        stdout=subprocess.PIPE,
+    ) as dump_proc:
+        subprocess.run(
+            restore_command,
+            stdin=dump_proc.stdout,
+            check=True,
+        )
+        dump_proc.stdout.close()
+
+
+def clean_database():
+    conn = get_db_conn(db_name=TMP_DATABASE_NAME)
+    with conn.cursor() as cur:
+        print("Cleaning Users table")
+        cur.execute(
+            """UPDATE auth_user SET 
+                email = CONCAT('anon_', id, '@example.com'), 
+                password = md5(random()::text);
+            """
+        )
+        print("Cleaning Account email table")
+        cur.execute(
+            """UPDATE account_emailaddress SET 
+                email = CONCAT('anon_', id, '@example.com');
+            """
+        )
+        print("Cleaning IP addresses from LoggedActions")
+        cur.execute(
+            """UPDATE candidates_loggedaction SET 
+                ip_address = '127.0.0.1';
+            """
+        )
+        print("Cleaning API tokens")
+        cur.execute(
+            """UPDATE authtoken_token SET 
+                key = md5(random()::text);
+            """
+        )
+        print("Cleaning sessions")
+        cur.execute("""TRUNCATE TABLE django_session;""")
+
+
+def get_filename():
+    return FILENAME_FORMAT.format(
+        PREFIX=PREFIX, CURRENT_TIME=datetime.now().isoformat().replace(":", "-")
+    )
+
+
+def dump_and_export():
+    dump_file = "/tmp/db_dump.sql"  # Temporary file for the dump
+
+    print("Run pg_dump to create the database dump")
+    try:
+        subprocess.run(
+            [
+                "pg_dump",
+                "-h",
+                DB_HOST,
+                "-U",
+                DB_USER,
+                "-d",
+                TMP_DATABASE_NAME,
+                "-Fc",
+                "-f",
+                dump_file,
+            ],
+            check=True,
+        )
+
+        file_name = get_filename()
+
+        print("Upload the dump to S3")
+        s3.upload_file(dump_file, BUCKET_NAME, file_name)
+
+        print("Generate a presigned URL for downloading the dump")
+        presigned_url = s3.generate_presigned_url(
+            "get_object",
+            Params={"Bucket": BUCKET_NAME, "Key": file_name},
+            ExpiresIn=3600,  # URL expires in 1 hour
+        )
+        print("Finished")
+        return presigned_url
+
+    except subprocess.CalledProcessError as e:
+        return f"Error generating database dump: {str(e)}"
+
+
+def check_for_recent_exports():
+    """
+    If we've exported a file in the last hour, don't export another one
+
+    """
+    one_hour_ago = datetime.now(timezone.utc) - timedelta(hours=1)
+    response = s3.list_objects_v2(Bucket=BUCKET_NAME, Prefix=PREFIX)
+    if "Contents" in response:
+        recent_files = [
+            obj
+            for obj in response["Contents"]
+            if obj["LastModified"] >= one_hour_ago
+        ]
+
+        recent_files.sort(key=lambda obj: obj["LastModified"], reverse=True)
+
+        if recent_files:
+            return s3.generate_presigned_url(
+                "get_object",
+                Params={"Bucket": BUCKET_NAME, "Key": recent_files[0]["Key"]},
+                ExpiresIn=3600,  # URL expires in 1 hour
+            )
+    return None
+
+
+def lambda_handler(event, context):
+    if recent_export := check_for_recent_exports():
+        return recent_export
+
+    print("Creating temp database")
+    create_database_and_restore()
+    print("Cleaning temp database")
+    clean_database()
+    print("Dumping and exporting")
+    return dump_and_export()
diff --git a/deploy/data_exporter/data_export_function/requirements.txt b/deploy/data_exporter/data_export_function/requirements.txt
new file mode 100644
index 000000000..934ff63fb
--- /dev/null
+++ b/deploy/data_exporter/data_export_function/requirements.txt
@@ -0,0 +1,2 @@
+boto3===1.35.56
+psycopg[binary]==3.2.3
diff --git a/deploy/data_exporter/samconfig.toml b/deploy/data_exporter/samconfig.toml
new file mode 100644
index 000000000..7eefffa43
--- /dev/null
+++ b/deploy/data_exporter/samconfig.toml
@@ -0,0 +1,33 @@
+# More information about the configuration file can be found here:
+# https://docs.aws.amazon.com/serverless-application-model/latest/developerguide/serverless-sam-cli-config.html
+version = 0.1
+
+[default.global.parameters]
+stack_name = "ynr-data-exporter"
+
+[default.build.parameters]
+cached = true
+parallel = true
+
+[default.validate.parameters]
+lint = true
+
+[default.deploy.parameters]
+capabilities = "CAPABILITY_IAM"
+confirm_changeset = true
+resolve_s3 = true
+s3_prefix = "ynr-data-exporter"
+region = "eu-west-2"
+image_repositories = ["DataExportFunction=929325949831.dkr.ecr.eu-west-2.amazonaws.com/ynrdataexporter736bb2dc/dataexportfunctionb95e9e19repo"]
+
+[default.package.parameters]
+resolve_s3 = true
+
+[default.sync.parameters]
+watch = true
+
+[default.local_start_api.parameters]
+warm_containers = "EAGER"
+
+[default.local_start_lambda.parameters]
+warm_containers = "EAGER"
diff --git a/deploy/data_exporter/template.yaml b/deploy/data_exporter/template.yaml
new file mode 100644
index 000000000..a88c8a578
--- /dev/null
+++ b/deploy/data_exporter/template.yaml
@@ -0,0 +1,49 @@
+AWSTemplateFormatVersion: '2010-09-09'
+Transform: AWS::Serverless-2016-10-31
+Description: >
+  data_exporter
+
+  Exports data from the prod database, cleans it and puts the resulting dump in an S3 bucket
+
+Globals:
+  Function:
+    Timeout: 600  # 10 minutes
+    MemorySize: 1024
+
+    LoggingConfig:
+      LogFormat: JSON
+Resources:
+  DataExportFunction:
+    Type: AWS::Serverless::Function
+    Properties:
+      FunctionName: ynr-data-exporter
+      PackageType: Image
+      ImageUri: data_export_function
+      # Needs to be at least as big as the DB export, currently at around 350mb
+      EphemeralStorage:
+        Size: 1024
+      # Don't allow more than one export job to run at a time
+      ReservedConcurrentExecutions: 1
+      Policies:
+        - Statement:
+            - Sid: S3Access
+              Effect: Allow
+              Action:
+                - s3:*
+              Resource:
+                - 'arn:aws:s3:::dc-ynr-short-term-backups'
+                - 'arn:aws:s3:::dc-ynr-short-term-backups/*'
+            - Sid: SSM
+              Effect: Allow
+              Action:
+                - ssm:*
+              Resource:
+                - 'arn:aws:ssm:*:*:parameter/ynr/*'
+
+Outputs:
+  DataExportFunction:
+    Description: Hello World Lambda Function ARN
+    Value: !GetAtt DataExportFunction.Arn
+  DataExportFunctionIamRole:
+    Description: Implicit IAM Role created for Hello World function
+    Value: !GetAtt DataExportFunctionRole.Arn
diff --git a/scripts/check-database-url.sh b/scripts/check-database-url.sh
new file mode 100755
index 000000000..7dacba9bb
--- /dev/null
+++ b/scripts/check-database-url.sh
@@ -0,0 +1,95 @@
+# This script does two things:
+#
+#  1. Gets a DATABASE_URL from the environment or the first argument and
+#     normalizes it to be able to connect to postgres's CLI tools
+#  2. Validates that it's possible to connect to the URL provided
+#  3. Sets a validated URL as the `_SCRIPT_DATABASE_URL` environment variable for
+#     use in other scripts. This only happens if the script detects it's not
+#     being invoked directly.
+#
+# This script can be used on its own for validating connections (useful for
+# debugging different environments and catching problems early) or as a
+# utility script in other scripts that need to connect to a database.
+
+REQUIRED_POSTGRES_VERSION="16"
+
+# Check for required tools
+REQUIRED_TOOLS="createdb psql"
+for tool in $REQUIRED_TOOLS; do
+  if ! command -v "$tool" >/dev/null 2>&1; then
+    echo "Error: $tool is required but not installed." >&2
+    exit 1
+  fi
+done
+
+
+# Get the database URL
+# TODO: we might want this to be its own script
+# 1. Check if DATABASE_URL is provided as the first argument
+if [ -n "${1:-}" ]; then
+    echo "Getting DATABASE_URL from the provided argument"
+    DATABASE_URL="$1"
+# 2. Check DATABASE_URL is set in the environment
+elif [ -n "$DATABASE_URL" ]; then
+    echo "Getting DATABASE_URL from the environment"
+    DATABASE_URL="$DATABASE_URL"
+fi
+
+# Normalize if DATABASE_URL starts with "postgis://"
+# We do this because `dj-database-url` uses "postgis://"
+# to alter the Django engine that's used, but the postgres
+# cli tools don't support this protocol.
+case "$DATABASE_URL" in postgis://*)
+        DATABASE_URL="postgres://${DATABASE_URL#postgis://}"
+        ;;
+esac
+
+# Check if DATABASE_URL is set after all attempts
+if [ -z "$DATABASE_URL" ]; then
+    echo "Error: DATABASE_URL is not provided."
+    echo "please the environment variable DATABASE_URL or pass it in as an argument"
+    echo "The format must comply with \033[4mhttps://www.postgresql.org/docs/$REQUIRED_POSTGRES_VERSION/libpq-connect.html#LIBPQ-CONNSTRING-URIS\033[0m"
+    exit 1
+fi
+
+# Extract the database name from the database URL.
+# 1. Use sed to remove any trailing slashes
+# 2. Use `tr` to replace slashes with newlines
+# 3. Use tail to get the last line, e.g the last element after a slash
+# 4. Use the same method to strip off any query arguments after a `?`
+DB_NAME=$(echo "$DATABASE_URL" |  sed 's:/*$::' | tr "/" "\n" | tail -n 1 | tr "?" "\n" | head -n 1)
+
+# Create the database if it doesn't exist.
+# If it already exists, we don't fail. At this point,
+# we're only making a DB to ensure that we can connect to the
+# database URL in the next step, so we can ignore fails here.
+# Because of this, we route the output of `createdb` to /dev/null.
+# Without this, the script prints an error that might confuse users
+echo "Creating the DB if it doesn't exist."
+createdb $DB_NAME >/dev/null 2>&1 || true
+
+# Check that we can connect to the local DB before returning
+psql $DATABASE_URL -c "\q"
+if [ $? -ne 0 ]; then
+  echo "❌ Failed to connect to $DATABASE_URL"
+  exit 1
+fi
+
+
+# Check the server version
+SERVER_POSTGRES_VERSION=$(psql -t -c "SHOW server_version;" -d $DATABASE_URL | cut -d '.' -f 1)
+if [ $SERVER_POSTGRES_VERSION != $REQUIRED_POSTGRES_VERSION ]; then
+  echo "❌ Postgres version $REQUIRED_POSTGRES_VERSION required, found $SERVER_POSTGRES_VERSION"
+fi
+
+echo "✅ Successfully connected to the local database '$DB_NAME'"
+
+
+# Check if the basename of $0 (the file that was executed) is the same
+# as this file name. If not, this script is being called as a 'utility'
+# so we should set an environment variable.
+if [ "${0##*/}" != "check-database-url.sh" ]; then
+    # Script is being sourced, export a "private" DATABASE URL
+    # that we can use in other scripts
+    export _SCRIPT_DATABASE_URL=$DATABASE_URL
+fi
diff --git a/scripts/get-prod-db.sh b/scripts/get-prod-db.sh
new file mode 100755
index 000000000..562e6b574
--- /dev/null
+++ b/scripts/get-prod-db.sh
@@ -0,0 +1,71 @@
+#!/bin/sh
+set -euxo
+
+# This script invokes an AWS Lambda function to retrieve a URL for downloading
+# a cleaned version of the production database and then restores
+# that data locally. By default the db name is "ynr-prod" but you can change the
+# local name by passing it as the first argument to the script.
+#
+# This script requires access to the YNR production AWS account
+#
+# Usage:
+#   ./scripts/get-prod-db.sh [LOCAL_DB_NAME]
+#
+# Arguments:
+#   LOCAL_DB_NAME: Optional. Name of the local database to restore data to.
+#                  Defaults to 'ynr-prod' if not specified.
+
+# Configurable variables
+LAMBDA_FUNCTION_NAME="ynr-data-exporter"
+LOCAL_DB_NAME="${1:-ynr-prod}"
+
+# Check for required tools
+REQUIRED_TOOLS="aws dropdb createdb pg_restore wget"
+for tool in $REQUIRED_TOOLS; do
+  if ! command -v "$tool" >/dev/null 2>&1; then
+    echo "Error: $tool is required but not installed." >&2
+    exit 1
+  fi
+done
+
+# Check the DB URL and get the cleaned $_SCRIPT_DATABASE_URL
+. ./scripts/check-database-url.sh
+
+
+# Create a temporary file and set up clean up on script exit
+TEMP_FILE=$(mktemp)
+trap 'rm -f "$TEMP_FILE"' EXIT
+
+# Invoke AWS Lambda and store the result in the temp file
+# The result is a pre-signed URL to the dump file on S3
+echo "Invoking Lambda to get DB URL. This might take a few minutes..."
+aws lambda invoke \
+  --function-name "$LAMBDA_FUNCTION_NAME" \
+  --cli-read-timeout=0 \
+  --no-cli-pager \
+  --output text \
+  --query 'Payload' \
+  "$TEMP_FILE"
+
+# Extract the URL from the response
+# This is because the response is quoted, so we just need to remove the quotation marks
+URL=$(sed 's/^"\(.*\)"$/\1/' "$TEMP_FILE")
+case "$URL" in
+    https://*)
+        echo "Got URL: $(URL)"
+
+        ;;
+    *)
+        echo "The received URL looks invalid. This might mean the database export failed."
+        echo "Check the logs of the '$LAMBDA_FUNCTION_NAME' Lambda function"
+        exit 1
+        ;;
+esac
+
+echo "Dropping DB $(_SCRIPT_DATABASE_URL)"
+dropdb --if-exists "$_SCRIPT_DATABASE_URL"
+echo "Creating DB $(_SCRIPT_DATABASE_URL)"
+createdb "$_SCRIPT_DATABASE_URL"
+
+echo "Downloading and restoring DB $(_SCRIPT_DATABASE_URL)"
+wget -qO- "$URL" | pg_restore -d "$_SCRIPT_DATABASE_URL" -Fc --no-owner --no-privileges