diff --git a/app/controllers/admin/reports_controller.rb b/app/controllers/admin/reports_controller.rb
index 0ec277834d..7fb35a60ec 100644
--- a/app/controllers/admin/reports_controller.rb
+++ b/app/controllers/admin/reports_controller.rb
@@ -21,6 +21,8 @@ def report
Reports::FeApprovedClaimsWithFailingProviderVerification.new
when "approved-claims-failing-qualification-task"
Reports::ApprovedClaimsFailingQualificationTask.new
+ when "duplicate-claims"
+ Reports::DuplicateApprovedClaims.new
else
raise ActiveRecord::RecordNotFound
end
diff --git a/app/models/admin/reports/duplicate_approved_claims.rb b/app/models/admin/reports/duplicate_approved_claims.rb
new file mode 100644
index 0000000000..4b29ca2db7
--- /dev/null
+++ b/app/models/admin/reports/duplicate_approved_claims.rb
@@ -0,0 +1,215 @@
+module Admin
+ module Reports
+ class DuplicateApprovedClaims
+ HEADERS = [
+ "Claim reference",
+ "Full name",
+ "TRN",
+ "Policy",
+ "Claim amount",
+ "Claim status",
+ "Decision date",
+ "Decision agent"
+ ]
+
+ def initialize(academic_year: AcademicYear.current)
+ @academic_year = AcademicYear.wrap(academic_year)
+ end
+
+ def filename
+ "duplicate_approved_claims"
+ end
+
+ def to_csv
+ CSV.generate(
+ row_sep: "\r\n",
+ write_headers: true,
+ headers: HEADERS
+ ) do |csv|
+ rows.each { |row| csv << row }
+ end
+ end
+
+ private
+
+ attr_reader :academic_year
+
+ def rows
+ scope.map(&ClaimPresenter.method(:new)).map(&:to_a)
+ end
+
+ def scope
+ Claim.where(
+ id: Set.new(duplicates_by_eligibility + duplicates_by_attributes)
+ ).includes(decisions: :created_by)
+ end
+
+ def duplicates_by_eligibility
+ ActiveRecord::Base.connection.execute(
+ Policies::POLICIES.map do |policy|
+ policy_with_claimable_policies(policy)
+ end.compact.join("\nUNION\n")
+ ).map { |row| row["id"] }
+ end
+
+ def policy_with_claimable_policies(policy)
+ left_table = policy::Eligibility.table_name
+
+ claimable_policies = claimable_policy_mapping(policy)
+
+ return if claimable_policies.empty?
+
+ claimable_policy_mapping(policy).map do |other_policy, matching_attributes|
+ right_table = other_policy::Eligibility.table_name
+ right_table_alias = "#{right_table}_#{left_table}"
+
+ join_condition = build_join_condition(
+ left_table,
+ right_table_alias,
+ matching_attributes
+ )
+
+ <<~SQL
+ SELECT claims.id
+ FROM #{left_table}
+ JOIN #{right_table} #{right_table_alias}
+ ON #{left_table}.id != #{right_table_alias}.id
+ AND (#{join_condition})
+ JOIN claims ON claims.eligibility_id = #{left_table}.id
+ JOIN claims other_claims ON other_claims.eligibility_id = #{right_table_alias}.id
+ JOIN decisions ON claims.id = decisions.claim_id
+ JOIN decisions other_decisions ON other_claims.id = other_decisions.claim_id
+ WHERE claims.academic_year = '#{academic_year}'
+ AND other_claims.academic_year = '#{academic_year}'
+ AND decisions.result = 0
+ AND other_decisions.result = 0
+ SQL
+ end.join("\nUNION\n")
+ end
+
+ # [["teacher_reference_number"], ["school_id", "nqt_in_academic_year"]]
+ # =>
+ # (
+ # (
+ # left_table.teacher_reference_number = right_table.teacher_reference_number
+ # AND left_table.teacher_reference_number IS NOT NULL
+ # AND left_table.teacher_reference_number != ''
+ # )
+ # )
+ # OR
+ # (
+ # (
+ # left_table.school_id = right_table.school_id
+ # AND left_table.school_id IS NOT NULL
+ # AND left_table.school_id != ''
+ # )
+ # AND
+ # (
+ # left_table.nqt_in_academic_year = right_table.nqt_in_academic_year
+ # AND left_table.nqt_in_academic_year IS NOT NULL
+ # AND left_table.nqt_in_academic_year != ''
+ # )
+ # )
+ def build_join_condition(left_table, right_table, matching_attributes)
+ matching_attributes.map do |attr_group|
+ "(" + attr_group.map do |attr|
+ "(" \
+ "#{left_table}.#{attr} = #{right_table}.#{attr} " \
+ "AND #{left_table}.#{attr} IS NOT NULL " \
+ "AND #{left_table}.#{attr} != ''" \
+ ")"
+ end.join(" AND ") + ")"
+ end.join(" OR ")
+ end
+
+ # Return a hash of other claimable policies and the attributes we can
+ # use for determining duplicates.
+ # "other_policy" => [["attr_1"], ["attr_2", "attr_3"]]
+ # If other policy is in the list of claimable policies but shares no
+ # matching attributes, we can't compare them, eg EY is in ECP
+ # other claimable policies, but EY doesn't have a
+ # `teacher_reference_number`.
+ def claimable_policy_mapping(policy)
+ policy.policies_claimable.map do |other_policy|
+ shared_matching_attributes = policy.eligibility_matching_attributes.select do |attribute_group|
+ attribute_group.all? do |attr|
+ other_policy::Eligibility.column_names.include?(attr)
+ end
+ end
+
+ [other_policy, shared_matching_attributes]
+ end.to_h.reject { |_, matching_attrs| matching_attrs.empty? }
+ end
+
+ # building_society_roll_number is no longer used, so is always null
+ # we only check the number and sort code when determining duplicates.
+ def claim_matching_attributes
+ Claim::MatchingAttributeFinder::CLAIM_ATTRIBUTE_GROUPS_TO_MATCH.map do |attr_group|
+ attr_group.without("building_society_roll_number")
+ end
+ end
+
+ def duplicates_by_attributes
+ # Limit the claims we're looking at
+ current_claims = <<~SQL
+ WITH current_claims AS (
+ SELECT claims.id, #{claim_matching_attributes.flatten.join(", ")}
+ FROM claims
+ JOIN decisions ON claims.id = decisions.claim_id
+ WHERE claims.academic_year = '#{academic_year}'
+ AND decisions.undone = false
+ AND decisions.result = 0
+ )
+ SQL
+
+ # Make sure to have indexes for the columns we're querying!
+ filter = claim_matching_attributes.flat_map do |attribute_group|
+ join_condition = attribute_group.map do |attr|
+ if Claim.column_for_attribute(attr).type == :string
+ "LOWER(current_claims.#{attr}) = LOWER(other_claims.#{attr})"
+ else
+ "current_claims.#{attr} = other_claims.#{attr}"
+ end
+ end.join(" AND ")
+
+ <<~SQL
+ SELECT current_claims.id
+ FROM current_claims
+ JOIN current_claims other_claims
+ ON #{join_condition}
+ WHERE current_claims.id != other_claims.id
+ SQL
+ end.join("\nUNION\n")
+
+ query = current_claims + "\n" + filter
+
+ ActiveRecord::Base.connection.execute(query).map { |row| row["id"] }
+ end
+
+ class ClaimPresenter
+ include Admin::ClaimsHelper
+
+ def initialize(claim)
+ @claim = claim
+ end
+
+ def to_a
+ [
+ claim.reference,
+ claim.full_name,
+ claim.eligibility.try(:teacher_reference_number),
+ I18n.t("#{claim.policy.locale_key}.policy_acronym"),
+ claim.award_amount,
+ status(claim),
+ claim.decisions.last.created_at.to_date,
+ claim.decisions.last.created_by.full_name
+ ]
+ end
+
+ private
+
+ attr_reader :claim
+ end
+ end
+ end
+end
diff --git a/app/views/admin/reports/index.html.erb b/app/views/admin/reports/index.html.erb
index 29074aefb4..9233313c69 100644
--- a/app/views/admin/reports/index.html.erb
+++ b/app/views/admin/reports/index.html.erb
@@ -7,6 +7,11 @@
Reports
+ <%= govuk_button_link_to(
+ "Duplicate claims",
+ admin_report_path("duplicate-claims", format: :csv),
+ secondary: true
+ ) %>
<%= govuk_button_link_to(
"FE TRI approved claims whereby the provider check status is 'failed'",
diff --git a/db/migrate/20241213142806_add_indexes_for_ops_report.rb b/db/migrate/20241213142806_add_indexes_for_ops_report.rb
new file mode 100644
index 0000000000..1787c2679f
--- /dev/null
+++ b/db/migrate/20241213142806_add_indexes_for_ops_report.rb
@@ -0,0 +1,30 @@
+class AddIndexesForOpsReport < ActiveRecord::Migration[8.0]
+ def change
+ add_index(
+ :claims,
+ "LOWER(email_address)",
+ name: "index_claims_on_lower_email_address"
+ )
+
+ add_index(
+ :claims,
+ "LOWER(national_insurance_number)",
+ name: "index_claims_on_lower_national_insurance_number"
+ )
+
+ # Even though bank details are "numbers" they're stored in a string column
+ # we call lower on these so we don't have to treat them differently to
+ # other string columns when building the query.
+ add_index(
+ :claims,
+ "LOWER(bank_account_number), LOWER(bank_sort_code)",
+ name: "index_claims_on_bank_details"
+ )
+
+ add_index(
+ :claims,
+ "LOWER(first_name), LOWER(surname), date_of_birth",
+ name: "index_claims_on_personal_details"
+ )
+ end
+end
diff --git a/db/schema.rb b/db/schema.rb
index ff5c7e9cb2..f18b69d08d 100644
--- a/db/schema.rb
+++ b/db/schema.rb
@@ -10,7 +10,7 @@
#
# It's strongly recommended that you check this file into your version control system.
-ActiveRecord::Schema[8.0].define(version: 2024_11_26_105650) do
+ActiveRecord::Schema[8.0].define(version: 2024_12_13_142806) do
# These are extensions that must be enabled in order to support this database
enable_extension "citext"
enable_extension "pg_catalog.plpgsql"
@@ -113,6 +113,10 @@
t.date "onelogin_idv_date_of_birth"
t.datetime "started_at", precision: nil, null: false
t.datetime "verified_at"
+ t.index "lower((bank_account_number)::text), lower((bank_sort_code)::text)", name: "index_claims_on_bank_details"
+ t.index "lower((email_address)::text)", name: "index_claims_on_lower_email_address"
+ t.index "lower((first_name)::text), lower((surname)::text), date_of_birth", name: "index_claims_on_personal_details"
+ t.index "lower((national_insurance_number)::text)", name: "index_claims_on_lower_national_insurance_number"
t.index ["academic_year"], name: "index_claims_on_academic_year"
t.index ["created_at"], name: "index_claims_on_created_at"
t.index ["eligibility_type", "eligibility_id"], name: "index_claims_on_eligibility_type_and_eligibility_id"
diff --git a/spec/factories/claims.rb b/spec/factories/claims.rb
index 6a4cfc9fb7..96a77d2572 100644
--- a/spec/factories/claims.rb
+++ b/spec/factories/claims.rb
@@ -395,5 +395,10 @@
}
end
end
+
+ trait :random_name do
+ first_name { Faker::Name.first_name }
+ surname { Faker::Name.last_name }
+ end
end
end
diff --git a/spec/features/admin/reports_spec.rb b/spec/features/admin/reports_spec.rb
index aa9e3c2e9d..26ca8310a4 100644
--- a/spec/features/admin/reports_spec.rb
+++ b/spec/features/admin/reports_spec.rb
@@ -163,4 +163,49 @@
expect(row.fetch("Qualification name")).to eq("BA (Hons)")
end
end
+
+ describe "Duplicate claims" do
+ it "returns a CSV report" do
+ claim_1 = create(
+ :claim,
+ :current_academic_year,
+ :approved,
+ email_address: "test@example.com",
+ policy: Policies::InternationalRelocationPayments,
+ eligibility_attributes: {
+ award_amount: 2_000
+ }
+ )
+
+ claim_2 = create(
+ :claim,
+ :current_academic_year,
+ :approved,
+ email_address: "test@example.com",
+ policy: Policies::InternationalRelocationPayments,
+ eligibility_attributes: {
+ award_amount: 2_000
+ }
+ )
+
+ sign_in_as_service_operator
+
+ visit admin_claims_path
+
+ click_on "Reports"
+
+ click_on "Duplicate claims"
+
+ csv_data = page.body
+
+ csv = CSV.parse(csv_data, headers: true)
+
+ claim_references = csv.map { |row| row.fetch("Claim reference") }
+
+ expect(claim_references).to match_array([
+ claim_1.reference,
+ claim_2.reference
+ ])
+ end
+ end
end
diff --git a/spec/models/admin/reports/duplicate_approved_claims_spec.rb b/spec/models/admin/reports/duplicate_approved_claims_spec.rb
new file mode 100644
index 0000000000..7aa33ee223
--- /dev/null
+++ b/spec/models/admin/reports/duplicate_approved_claims_spec.rb
@@ -0,0 +1,249 @@
+require "rails_helper"
+
+RSpec.describe Admin::Reports::DuplicateApprovedClaims do
+ describe "#to_csv" do
+ it "includes claims with duplicate details, excludes claims that aren't duplicates" do
+ claim_1 = create(
+ :claim,
+ :approved,
+ :current_academic_year,
+ :random_name,
+ email_address: "duplicate@example.com",
+ reference: "claim 1"
+ )
+
+ claim_2 = create(
+ :claim,
+ :approved,
+ :current_academic_year,
+ :random_name,
+ email_address: "duplicate@example.com",
+ reference: "claim 2"
+ )
+
+ create(
+ :claim,
+ :approved,
+ :current_academic_year,
+ :random_name,
+ email_address: "non-duplicate@example.com",
+ reference: "nondupe1"
+ )
+
+ create(
+ :claim,
+ :approved,
+ :current_academic_year,
+ :random_name,
+ national_insurance_number: "AB123456D",
+ reference: "nondupe2"
+ )
+
+ claim_3 = create(
+ :claim,
+ :approved,
+ :current_academic_year,
+ :random_name,
+ bank_account_number: "12345678",
+ bank_sort_code: "123456",
+ reference: "claim 3"
+ )
+
+ claim_4 = create(
+ :claim,
+ :approved,
+ :current_academic_year,
+ :random_name,
+ bank_account_number: "12345678",
+ bank_sort_code: "123456",
+ reference: "claim 4"
+ )
+
+ create(
+ :claim,
+ :approved,
+ :current_academic_year,
+ :random_name,
+ bank_account_number: "12345679",
+ bank_sort_code: "123456",
+ reference: "nondupe3"
+ )
+
+ claim_5 = create(
+ :claim,
+ :approved,
+ :current_academic_year,
+ first_name: "SEYMOUR",
+ surname: "Skinner",
+ date_of_birth: Date.new(1960, 1, 1),
+ reference: "claim 5"
+ )
+
+ claim_6 = create(
+ :claim,
+ :approved,
+ :current_academic_year,
+ first_name: "Seymour",
+ surname: "Skinner",
+ date_of_birth: Date.new(1960, 1, 1),
+ reference: "claim 6"
+ )
+
+ create(
+ :claim,
+ :approved,
+ :current_academic_year,
+ first_name: "Seymour",
+ surname: "Skinner",
+ date_of_birth: Date.new(1960, 1, 2),
+ reference: "nondupe4"
+ )
+
+ csv = CSV.parse(described_class.new.to_csv, headers: true)
+
+ claim_references = csv.map { |row| row["Claim reference"] }
+
+ expect(claim_references).to match_array([
+ claim_1.reference,
+ claim_2.reference,
+ claim_3.reference,
+ claim_4.reference,
+ claim_5.reference,
+ claim_6.reference
+ ])
+ end
+
+ it "includes claims with duplicate eligibility details" do
+ claim_1 = create(
+ :claim,
+ :approved,
+ :current_academic_year,
+ :random_name,
+ reference: "claim 1",
+ policy: Policies::EarlyCareerPayments,
+ eligibility_attributes: {
+ teacher_reference_number: "1234567"
+ }
+ )
+
+ claim_2 = create(
+ :claim,
+ :approved,
+ :current_academic_year,
+ :random_name,
+ reference: "claim 2",
+ policy: Policies::LevellingUpPremiumPayments,
+ eligibility_attributes: {
+ teacher_reference_number: "1234567"
+ }
+ )
+
+ claim_3 = create(
+ :claim,
+ :approved,
+ :current_academic_year,
+ :random_name,
+ reference: "claim 3",
+ policy: Policies::FurtherEducationPayments,
+ eligibility_attributes: {
+ teacher_reference_number: "1234567"
+ }
+ )
+
+ create(
+ :claim,
+ :approved,
+ :current_academic_year,
+ :random_name,
+ reference: "nondupe1",
+ policy: Policies::InternationalRelocationPayments
+ )
+
+ create(
+ :claim,
+ :approved,
+ :current_academic_year,
+ :random_name,
+ reference: "nondupe2",
+ policy: Policies::StudentLoans,
+ eligibility_attributes: {
+ teacher_reference_number: "1234568"
+ }
+ )
+
+ csv = CSV.parse(described_class.new.to_csv, headers: true)
+
+ claim_references = csv.map { |row| row["Claim reference"] }
+
+ expect(claim_references).to match_array([
+ claim_1.reference,
+ claim_2.reference,
+ claim_3.reference
+ ])
+ end
+
+ it "excludes claims with duplicate details that are not approved" do
+ create(
+ :claim,
+ :approved,
+ :current_academic_year,
+ email_address: "duplicate@example.com",
+ reference: "claim 1"
+ )
+
+ create(
+ :claim,
+ :current_academic_year,
+ email_address: "duplicate@example.com",
+ reference: "claim 2"
+ )
+
+ create(
+ :claim,
+ :approved,
+ :random_name,
+ :current_academic_year,
+ policy: Policies::EarlyCareerPayments,
+ eligibility_attributes: {
+ teacher_reference_number: "1234567"
+ },
+ reference: "claim 3"
+ )
+
+ create(
+ :claim,
+ :random_name,
+ :current_academic_year,
+ policy: Policies::EarlyCareerPayments,
+ eligibility_attributes: {
+ teacher_reference_number: "1234567"
+ },
+ reference: "claim 4"
+ )
+
+ csv = CSV.parse(described_class.new.to_csv, headers: true)
+
+ expect(csv.count).to eq(0)
+ end
+
+ it "excludes claims with duplicate details across academic years" do
+ create(
+ :claim,
+ :approved,
+ email_address: "duplicate@example.com",
+ academic_year: AcademicYear.new(2021)
+ )
+
+ create(
+ :claim,
+ :approved,
+ :current_academic_year,
+ email_address: "duplicate@example.com"
+ )
+
+ csv = CSV.parse(described_class.new.to_csv, headers: true)
+
+ expect(csv.count).to eq(0)
+ end
+ end
+end