diff --git a/app/controllers/admin/reports_controller.rb b/app/controllers/admin/reports_controller.rb index 0ec277834d..7fb35a60ec 100644 --- a/app/controllers/admin/reports_controller.rb +++ b/app/controllers/admin/reports_controller.rb @@ -21,6 +21,8 @@ def report Reports::FeApprovedClaimsWithFailingProviderVerification.new when "approved-claims-failing-qualification-task" Reports::ApprovedClaimsFailingQualificationTask.new + when "duplicate-claims" + Reports::DuplicateApprovedClaims.new else raise ActiveRecord::RecordNotFound end diff --git a/app/models/admin/reports/duplicate_approved_claims.rb b/app/models/admin/reports/duplicate_approved_claims.rb new file mode 100644 index 0000000000..4b29ca2db7 --- /dev/null +++ b/app/models/admin/reports/duplicate_approved_claims.rb @@ -0,0 +1,215 @@ +module Admin + module Reports + class DuplicateApprovedClaims + HEADERS = [ + "Claim reference", + "Full name", + "TRN", + "Policy", + "Claim amount", + "Claim status", + "Decision date", + "Decision agent" + ] + + def initialize(academic_year: AcademicYear.current) + @academic_year = AcademicYear.wrap(academic_year) + end + + def filename + "duplicate_approved_claims" + end + + def to_csv + CSV.generate( + row_sep: "\r\n", + write_headers: true, + headers: HEADERS + ) do |csv| + rows.each { |row| csv << row } + end + end + + private + + attr_reader :academic_year + + def rows + scope.map(&ClaimPresenter.method(:new)).map(&:to_a) + end + + def scope + Claim.where( + id: Set.new(duplicates_by_eligibility + duplicates_by_attributes) + ).includes(decisions: :created_by) + end + + def duplicates_by_eligibility + ActiveRecord::Base.connection.execute( + Policies::POLICIES.map do |policy| + policy_with_claimable_policies(policy) + end.compact.join("\nUNION\n") + ).map { |row| row["id"] } + end + + def policy_with_claimable_policies(policy) + left_table = policy::Eligibility.table_name + + claimable_policies = claimable_policy_mapping(policy) + + return if claimable_policies.empty? + + claimable_policy_mapping(policy).map do |other_policy, matching_attributes| + right_table = other_policy::Eligibility.table_name + right_table_alias = "#{right_table}_#{left_table}" + + join_condition = build_join_condition( + left_table, + right_table_alias, + matching_attributes + ) + + <<~SQL + SELECT claims.id + FROM #{left_table} + JOIN #{right_table} #{right_table_alias} + ON #{left_table}.id != #{right_table_alias}.id + AND (#{join_condition}) + JOIN claims ON claims.eligibility_id = #{left_table}.id + JOIN claims other_claims ON other_claims.eligibility_id = #{right_table_alias}.id + JOIN decisions ON claims.id = decisions.claim_id + JOIN decisions other_decisions ON other_claims.id = other_decisions.claim_id + WHERE claims.academic_year = '#{academic_year}' + AND other_claims.academic_year = '#{academic_year}' + AND decisions.result = 0 + AND other_decisions.result = 0 + SQL + end.join("\nUNION\n") + end + + # [["teacher_reference_number"], ["school_id", "nqt_in_academic_year"]] + # => + # ( + # ( + # left_table.teacher_reference_number = right_table.teacher_reference_number + # AND left_table.teacher_reference_number IS NOT NULL + # AND left_table.teacher_reference_number != '' + # ) + # ) + # OR + # ( + # ( + # left_table.school_id = right_table.school_id + # AND left_table.school_id IS NOT NULL + # AND left_table.school_id != '' + # ) + # AND + # ( + # left_table.nqt_in_academic_year = right_table.nqt_in_academic_year + # AND left_table.nqt_in_academic_year IS NOT NULL + # AND left_table.nqt_in_academic_year != '' + # ) + # ) + def build_join_condition(left_table, right_table, matching_attributes) + matching_attributes.map do |attr_group| + "(" + attr_group.map do |attr| + "(" \ + "#{left_table}.#{attr} = #{right_table}.#{attr} " \ + "AND #{left_table}.#{attr} IS NOT NULL " \ + "AND #{left_table}.#{attr} != ''" \ + ")" + end.join(" AND ") + ")" + end.join(" OR ") + end + + # Return a hash of other claimable policies and the attributes we can + # use for determining duplicates. + # "other_policy" => [["attr_1"], ["attr_2", "attr_3"]] + # If other policy is in the list of claimable policies but shares no + # matching attributes, we can't compare them, eg EY is in ECP + # other claimable policies, but EY doesn't have a + # `teacher_reference_number`. + def claimable_policy_mapping(policy) + policy.policies_claimable.map do |other_policy| + shared_matching_attributes = policy.eligibility_matching_attributes.select do |attribute_group| + attribute_group.all? do |attr| + other_policy::Eligibility.column_names.include?(attr) + end + end + + [other_policy, shared_matching_attributes] + end.to_h.reject { |_, matching_attrs| matching_attrs.empty? } + end + + # building_society_roll_number is no longer used, so is always null + # we only check the number and sort code when determining duplicates. + def claim_matching_attributes + Claim::MatchingAttributeFinder::CLAIM_ATTRIBUTE_GROUPS_TO_MATCH.map do |attr_group| + attr_group.without("building_society_roll_number") + end + end + + def duplicates_by_attributes + # Limit the claims we're looking at + current_claims = <<~SQL + WITH current_claims AS ( + SELECT claims.id, #{claim_matching_attributes.flatten.join(", ")} + FROM claims + JOIN decisions ON claims.id = decisions.claim_id + WHERE claims.academic_year = '#{academic_year}' + AND decisions.undone = false + AND decisions.result = 0 + ) + SQL + + # Make sure to have indexes for the columns we're querying! + filter = claim_matching_attributes.flat_map do |attribute_group| + join_condition = attribute_group.map do |attr| + if Claim.column_for_attribute(attr).type == :string + "LOWER(current_claims.#{attr}) = LOWER(other_claims.#{attr})" + else + "current_claims.#{attr} = other_claims.#{attr}" + end + end.join(" AND ") + + <<~SQL + SELECT current_claims.id + FROM current_claims + JOIN current_claims other_claims + ON #{join_condition} + WHERE current_claims.id != other_claims.id + SQL + end.join("\nUNION\n") + + query = current_claims + "\n" + filter + + ActiveRecord::Base.connection.execute(query).map { |row| row["id"] } + end + + class ClaimPresenter + include Admin::ClaimsHelper + + def initialize(claim) + @claim = claim + end + + def to_a + [ + claim.reference, + claim.full_name, + claim.eligibility.try(:teacher_reference_number), + I18n.t("#{claim.policy.locale_key}.policy_acronym"), + claim.award_amount, + status(claim), + claim.decisions.last.created_at.to_date, + claim.decisions.last.created_by.full_name + ] + end + + private + + attr_reader :claim + end + end + end +end diff --git a/app/views/admin/reports/index.html.erb b/app/views/admin/reports/index.html.erb index 29074aefb4..9233313c69 100644 --- a/app/views/admin/reports/index.html.erb +++ b/app/views/admin/reports/index.html.erb @@ -7,6 +7,11 @@

Reports

+ <%= govuk_button_link_to( + "Duplicate claims", + admin_report_path("duplicate-claims", format: :csv), + secondary: true + ) %> <%= govuk_button_link_to( "FE TRI approved claims whereby the provider check status is 'failed'", diff --git a/db/migrate/20241213142806_add_indexes_for_ops_report.rb b/db/migrate/20241213142806_add_indexes_for_ops_report.rb new file mode 100644 index 0000000000..1787c2679f --- /dev/null +++ b/db/migrate/20241213142806_add_indexes_for_ops_report.rb @@ -0,0 +1,30 @@ +class AddIndexesForOpsReport < ActiveRecord::Migration[8.0] + def change + add_index( + :claims, + "LOWER(email_address)", + name: "index_claims_on_lower_email_address" + ) + + add_index( + :claims, + "LOWER(national_insurance_number)", + name: "index_claims_on_lower_national_insurance_number" + ) + + # Even though bank details are "numbers" they're stored in a string column + # we call lower on these so we don't have to treat them differently to + # other string columns when building the query. + add_index( + :claims, + "LOWER(bank_account_number), LOWER(bank_sort_code)", + name: "index_claims_on_bank_details" + ) + + add_index( + :claims, + "LOWER(first_name), LOWER(surname), date_of_birth", + name: "index_claims_on_personal_details" + ) + end +end diff --git a/db/schema.rb b/db/schema.rb index ff5c7e9cb2..f18b69d08d 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema[8.0].define(version: 2024_11_26_105650) do +ActiveRecord::Schema[8.0].define(version: 2024_12_13_142806) do # These are extensions that must be enabled in order to support this database enable_extension "citext" enable_extension "pg_catalog.plpgsql" @@ -113,6 +113,10 @@ t.date "onelogin_idv_date_of_birth" t.datetime "started_at", precision: nil, null: false t.datetime "verified_at" + t.index "lower((bank_account_number)::text), lower((bank_sort_code)::text)", name: "index_claims_on_bank_details" + t.index "lower((email_address)::text)", name: "index_claims_on_lower_email_address" + t.index "lower((first_name)::text), lower((surname)::text), date_of_birth", name: "index_claims_on_personal_details" + t.index "lower((national_insurance_number)::text)", name: "index_claims_on_lower_national_insurance_number" t.index ["academic_year"], name: "index_claims_on_academic_year" t.index ["created_at"], name: "index_claims_on_created_at" t.index ["eligibility_type", "eligibility_id"], name: "index_claims_on_eligibility_type_and_eligibility_id" diff --git a/spec/factories/claims.rb b/spec/factories/claims.rb index 6a4cfc9fb7..96a77d2572 100644 --- a/spec/factories/claims.rb +++ b/spec/factories/claims.rb @@ -395,5 +395,10 @@ } end end + + trait :random_name do + first_name { Faker::Name.first_name } + surname { Faker::Name.last_name } + end end end diff --git a/spec/features/admin/reports_spec.rb b/spec/features/admin/reports_spec.rb index aa9e3c2e9d..26ca8310a4 100644 --- a/spec/features/admin/reports_spec.rb +++ b/spec/features/admin/reports_spec.rb @@ -163,4 +163,49 @@ expect(row.fetch("Qualification name")).to eq("BA (Hons)") end end + + describe "Duplicate claims" do + it "returns a CSV report" do + claim_1 = create( + :claim, + :current_academic_year, + :approved, + email_address: "test@example.com", + policy: Policies::InternationalRelocationPayments, + eligibility_attributes: { + award_amount: 2_000 + } + ) + + claim_2 = create( + :claim, + :current_academic_year, + :approved, + email_address: "test@example.com", + policy: Policies::InternationalRelocationPayments, + eligibility_attributes: { + award_amount: 2_000 + } + ) + + sign_in_as_service_operator + + visit admin_claims_path + + click_on "Reports" + + click_on "Duplicate claims" + + csv_data = page.body + + csv = CSV.parse(csv_data, headers: true) + + claim_references = csv.map { |row| row.fetch("Claim reference") } + + expect(claim_references).to match_array([ + claim_1.reference, + claim_2.reference + ]) + end + end end diff --git a/spec/models/admin/reports/duplicate_approved_claims_spec.rb b/spec/models/admin/reports/duplicate_approved_claims_spec.rb new file mode 100644 index 0000000000..7aa33ee223 --- /dev/null +++ b/spec/models/admin/reports/duplicate_approved_claims_spec.rb @@ -0,0 +1,249 @@ +require "rails_helper" + +RSpec.describe Admin::Reports::DuplicateApprovedClaims do + describe "#to_csv" do + it "includes claims with duplicate details, excludes claims that aren't duplicates" do + claim_1 = create( + :claim, + :approved, + :current_academic_year, + :random_name, + email_address: "duplicate@example.com", + reference: "claim 1" + ) + + claim_2 = create( + :claim, + :approved, + :current_academic_year, + :random_name, + email_address: "duplicate@example.com", + reference: "claim 2" + ) + + create( + :claim, + :approved, + :current_academic_year, + :random_name, + email_address: "non-duplicate@example.com", + reference: "nondupe1" + ) + + create( + :claim, + :approved, + :current_academic_year, + :random_name, + national_insurance_number: "AB123456D", + reference: "nondupe2" + ) + + claim_3 = create( + :claim, + :approved, + :current_academic_year, + :random_name, + bank_account_number: "12345678", + bank_sort_code: "123456", + reference: "claim 3" + ) + + claim_4 = create( + :claim, + :approved, + :current_academic_year, + :random_name, + bank_account_number: "12345678", + bank_sort_code: "123456", + reference: "claim 4" + ) + + create( + :claim, + :approved, + :current_academic_year, + :random_name, + bank_account_number: "12345679", + bank_sort_code: "123456", + reference: "nondupe3" + ) + + claim_5 = create( + :claim, + :approved, + :current_academic_year, + first_name: "SEYMOUR", + surname: "Skinner", + date_of_birth: Date.new(1960, 1, 1), + reference: "claim 5" + ) + + claim_6 = create( + :claim, + :approved, + :current_academic_year, + first_name: "Seymour", + surname: "Skinner", + date_of_birth: Date.new(1960, 1, 1), + reference: "claim 6" + ) + + create( + :claim, + :approved, + :current_academic_year, + first_name: "Seymour", + surname: "Skinner", + date_of_birth: Date.new(1960, 1, 2), + reference: "nondupe4" + ) + + csv = CSV.parse(described_class.new.to_csv, headers: true) + + claim_references = csv.map { |row| row["Claim reference"] } + + expect(claim_references).to match_array([ + claim_1.reference, + claim_2.reference, + claim_3.reference, + claim_4.reference, + claim_5.reference, + claim_6.reference + ]) + end + + it "includes claims with duplicate eligibility details" do + claim_1 = create( + :claim, + :approved, + :current_academic_year, + :random_name, + reference: "claim 1", + policy: Policies::EarlyCareerPayments, + eligibility_attributes: { + teacher_reference_number: "1234567" + } + ) + + claim_2 = create( + :claim, + :approved, + :current_academic_year, + :random_name, + reference: "claim 2", + policy: Policies::LevellingUpPremiumPayments, + eligibility_attributes: { + teacher_reference_number: "1234567" + } + ) + + claim_3 = create( + :claim, + :approved, + :current_academic_year, + :random_name, + reference: "claim 3", + policy: Policies::FurtherEducationPayments, + eligibility_attributes: { + teacher_reference_number: "1234567" + } + ) + + create( + :claim, + :approved, + :current_academic_year, + :random_name, + reference: "nondupe1", + policy: Policies::InternationalRelocationPayments + ) + + create( + :claim, + :approved, + :current_academic_year, + :random_name, + reference: "nondupe2", + policy: Policies::StudentLoans, + eligibility_attributes: { + teacher_reference_number: "1234568" + } + ) + + csv = CSV.parse(described_class.new.to_csv, headers: true) + + claim_references = csv.map { |row| row["Claim reference"] } + + expect(claim_references).to match_array([ + claim_1.reference, + claim_2.reference, + claim_3.reference + ]) + end + + it "excludes claims with duplicate details that are not approved" do + create( + :claim, + :approved, + :current_academic_year, + email_address: "duplicate@example.com", + reference: "claim 1" + ) + + create( + :claim, + :current_academic_year, + email_address: "duplicate@example.com", + reference: "claim 2" + ) + + create( + :claim, + :approved, + :random_name, + :current_academic_year, + policy: Policies::EarlyCareerPayments, + eligibility_attributes: { + teacher_reference_number: "1234567" + }, + reference: "claim 3" + ) + + create( + :claim, + :random_name, + :current_academic_year, + policy: Policies::EarlyCareerPayments, + eligibility_attributes: { + teacher_reference_number: "1234567" + }, + reference: "claim 4" + ) + + csv = CSV.parse(described_class.new.to_csv, headers: true) + + expect(csv.count).to eq(0) + end + + it "excludes claims with duplicate details across academic years" do + create( + :claim, + :approved, + email_address: "duplicate@example.com", + academic_year: AcademicYear.new(2021) + ) + + create( + :claim, + :approved, + :current_academic_year, + email_address: "duplicate@example.com" + ) + + csv = CSV.parse(described_class.new.to_csv, headers: true) + + expect(csv.count).to eq(0) + end + end +end