From 0b737044f429407cecdac0422de6c440762fbd2b Mon Sep 17 00:00:00 2001 From: Richard Lynch Date: Wed, 11 Dec 2024 14:02:08 +0000 Subject: [PATCH] Adds duplicate claims report Adds the query to identify duplicate approved claims. As per the MatchingAttributeFinder claims can either be duplicates on claim attributes or on eligibility attributes. Certain eligibilities can only be compared with other eligibilities determined by the policy's `SomePolicy.policies_claimable` method. Eligibilities define the attributes they should be compared with other eligibilities by in their `SomePolicy.eligibility_matching_attributes` method, however some policies that are comparable don't implement the attributes used for finding matches (eg `EarlyYearsPayments` is in `EarlyCareerPayments` `policies_claimable` list, but it doesn't implement `teacher_reference_number` which is what `EarlyCareerPayments` uses to determine duplicates). We have to do some fairly gnarl joins and unions to compare eligibilities! When comparing claims we search all approved claims in the current academic year for duplicates, ignoring the claim's policy's `policies_claimable` list, this is how MatchingAttributeFinder works so we've kept that behaviour. For each group in the `CLAIM_ATTRIBUTE_GROUPS_TO_MATCH` list we AND the conditions together, normalising strings, we then use UNION to handle the OR conditions, this performs significantly better than complicating the join condition. When adding new attributes to the duplicate detection list we _really_ need to make sure we add the appropriate indexes. --- app/controllers/admin/reports_controller.rb | 2 + .../reports/duplicate_approved_claims.rb | 215 +++++++++++++++ app/views/admin/reports/index.html.erb | 5 + ...241213142806_add_indexes_for_ops_report.rb | 30 +++ db/schema.rb | 6 +- spec/factories/claims.rb | 5 + spec/features/admin/reports_spec.rb | 45 ++++ .../reports/duplicate_approved_claims_spec.rb | 249 ++++++++++++++++++ 8 files changed, 556 insertions(+), 1 deletion(-) create mode 100644 app/models/admin/reports/duplicate_approved_claims.rb create mode 100644 db/migrate/20241213142806_add_indexes_for_ops_report.rb create mode 100644 spec/models/admin/reports/duplicate_approved_claims_spec.rb diff --git a/app/controllers/admin/reports_controller.rb b/app/controllers/admin/reports_controller.rb index 0ec277834d..7fb35a60ec 100644 --- a/app/controllers/admin/reports_controller.rb +++ b/app/controllers/admin/reports_controller.rb @@ -21,6 +21,8 @@ def report Reports::FeApprovedClaimsWithFailingProviderVerification.new when "approved-claims-failing-qualification-task" Reports::ApprovedClaimsFailingQualificationTask.new + when "duplicate-claims" + Reports::DuplicateApprovedClaims.new else raise ActiveRecord::RecordNotFound end diff --git a/app/models/admin/reports/duplicate_approved_claims.rb b/app/models/admin/reports/duplicate_approved_claims.rb new file mode 100644 index 0000000000..4b29ca2db7 --- /dev/null +++ b/app/models/admin/reports/duplicate_approved_claims.rb @@ -0,0 +1,215 @@ +module Admin + module Reports + class DuplicateApprovedClaims + HEADERS = [ + "Claim reference", + "Full name", + "TRN", + "Policy", + "Claim amount", + "Claim status", + "Decision date", + "Decision agent" + ] + + def initialize(academic_year: AcademicYear.current) + @academic_year = AcademicYear.wrap(academic_year) + end + + def filename + "duplicate_approved_claims" + end + + def to_csv + CSV.generate( + row_sep: "\r\n", + write_headers: true, + headers: HEADERS + ) do |csv| + rows.each { |row| csv << row } + end + end + + private + + attr_reader :academic_year + + def rows + scope.map(&ClaimPresenter.method(:new)).map(&:to_a) + end + + def scope + Claim.where( + id: Set.new(duplicates_by_eligibility + duplicates_by_attributes) + ).includes(decisions: :created_by) + end + + def duplicates_by_eligibility + ActiveRecord::Base.connection.execute( + Policies::POLICIES.map do |policy| + policy_with_claimable_policies(policy) + end.compact.join("\nUNION\n") + ).map { |row| row["id"] } + end + + def policy_with_claimable_policies(policy) + left_table = policy::Eligibility.table_name + + claimable_policies = claimable_policy_mapping(policy) + + return if claimable_policies.empty? + + claimable_policy_mapping(policy).map do |other_policy, matching_attributes| + right_table = other_policy::Eligibility.table_name + right_table_alias = "#{right_table}_#{left_table}" + + join_condition = build_join_condition( + left_table, + right_table_alias, + matching_attributes + ) + + <<~SQL + SELECT claims.id + FROM #{left_table} + JOIN #{right_table} #{right_table_alias} + ON #{left_table}.id != #{right_table_alias}.id + AND (#{join_condition}) + JOIN claims ON claims.eligibility_id = #{left_table}.id + JOIN claims other_claims ON other_claims.eligibility_id = #{right_table_alias}.id + JOIN decisions ON claims.id = decisions.claim_id + JOIN decisions other_decisions ON other_claims.id = other_decisions.claim_id + WHERE claims.academic_year = '#{academic_year}' + AND other_claims.academic_year = '#{academic_year}' + AND decisions.result = 0 + AND other_decisions.result = 0 + SQL + end.join("\nUNION\n") + end + + # [["teacher_reference_number"], ["school_id", "nqt_in_academic_year"]] + # => + # ( + # ( + # left_table.teacher_reference_number = right_table.teacher_reference_number + # AND left_table.teacher_reference_number IS NOT NULL + # AND left_table.teacher_reference_number != '' + # ) + # ) + # OR + # ( + # ( + # left_table.school_id = right_table.school_id + # AND left_table.school_id IS NOT NULL + # AND left_table.school_id != '' + # ) + # AND + # ( + # left_table.nqt_in_academic_year = right_table.nqt_in_academic_year + # AND left_table.nqt_in_academic_year IS NOT NULL + # AND left_table.nqt_in_academic_year != '' + # ) + # ) + def build_join_condition(left_table, right_table, matching_attributes) + matching_attributes.map do |attr_group| + "(" + attr_group.map do |attr| + "(" \ + "#{left_table}.#{attr} = #{right_table}.#{attr} " \ + "AND #{left_table}.#{attr} IS NOT NULL " \ + "AND #{left_table}.#{attr} != ''" \ + ")" + end.join(" AND ") + ")" + end.join(" OR ") + end + + # Return a hash of other claimable policies and the attributes we can + # use for determining duplicates. + # "other_policy" => [["attr_1"], ["attr_2", "attr_3"]] + # If other policy is in the list of claimable policies but shares no + # matching attributes, we can't compare them, eg EY is in ECP + # other claimable policies, but EY doesn't have a + # `teacher_reference_number`. + def claimable_policy_mapping(policy) + policy.policies_claimable.map do |other_policy| + shared_matching_attributes = policy.eligibility_matching_attributes.select do |attribute_group| + attribute_group.all? do |attr| + other_policy::Eligibility.column_names.include?(attr) + end + end + + [other_policy, shared_matching_attributes] + end.to_h.reject { |_, matching_attrs| matching_attrs.empty? } + end + + # building_society_roll_number is no longer used, so is always null + # we only check the number and sort code when determining duplicates. + def claim_matching_attributes + Claim::MatchingAttributeFinder::CLAIM_ATTRIBUTE_GROUPS_TO_MATCH.map do |attr_group| + attr_group.without("building_society_roll_number") + end + end + + def duplicates_by_attributes + # Limit the claims we're looking at + current_claims = <<~SQL + WITH current_claims AS ( + SELECT claims.id, #{claim_matching_attributes.flatten.join(", ")} + FROM claims + JOIN decisions ON claims.id = decisions.claim_id + WHERE claims.academic_year = '#{academic_year}' + AND decisions.undone = false + AND decisions.result = 0 + ) + SQL + + # Make sure to have indexes for the columns we're querying! + filter = claim_matching_attributes.flat_map do |attribute_group| + join_condition = attribute_group.map do |attr| + if Claim.column_for_attribute(attr).type == :string + "LOWER(current_claims.#{attr}) = LOWER(other_claims.#{attr})" + else + "current_claims.#{attr} = other_claims.#{attr}" + end + end.join(" AND ") + + <<~SQL + SELECT current_claims.id + FROM current_claims + JOIN current_claims other_claims + ON #{join_condition} + WHERE current_claims.id != other_claims.id + SQL + end.join("\nUNION\n") + + query = current_claims + "\n" + filter + + ActiveRecord::Base.connection.execute(query).map { |row| row["id"] } + end + + class ClaimPresenter + include Admin::ClaimsHelper + + def initialize(claim) + @claim = claim + end + + def to_a + [ + claim.reference, + claim.full_name, + claim.eligibility.try(:teacher_reference_number), + I18n.t("#{claim.policy.locale_key}.policy_acronym"), + claim.award_amount, + status(claim), + claim.decisions.last.created_at.to_date, + claim.decisions.last.created_by.full_name + ] + end + + private + + attr_reader :claim + end + end + end +end diff --git a/app/views/admin/reports/index.html.erb b/app/views/admin/reports/index.html.erb index 29074aefb4..9233313c69 100644 --- a/app/views/admin/reports/index.html.erb +++ b/app/views/admin/reports/index.html.erb @@ -7,6 +7,11 @@

Reports

+ <%= govuk_button_link_to( + "Duplicate claims", + admin_report_path("duplicate-claims", format: :csv), + secondary: true + ) %> <%= govuk_button_link_to( "FE TRI approved claims whereby the provider check status is 'failed'", diff --git a/db/migrate/20241213142806_add_indexes_for_ops_report.rb b/db/migrate/20241213142806_add_indexes_for_ops_report.rb new file mode 100644 index 0000000000..1787c2679f --- /dev/null +++ b/db/migrate/20241213142806_add_indexes_for_ops_report.rb @@ -0,0 +1,30 @@ +class AddIndexesForOpsReport < ActiveRecord::Migration[8.0] + def change + add_index( + :claims, + "LOWER(email_address)", + name: "index_claims_on_lower_email_address" + ) + + add_index( + :claims, + "LOWER(national_insurance_number)", + name: "index_claims_on_lower_national_insurance_number" + ) + + # Even though bank details are "numbers" they're stored in a string column + # we call lower on these so we don't have to treat them differently to + # other string columns when building the query. + add_index( + :claims, + "LOWER(bank_account_number), LOWER(bank_sort_code)", + name: "index_claims_on_bank_details" + ) + + add_index( + :claims, + "LOWER(first_name), LOWER(surname), date_of_birth", + name: "index_claims_on_personal_details" + ) + end +end diff --git a/db/schema.rb b/db/schema.rb index ff5c7e9cb2..f18b69d08d 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema[8.0].define(version: 2024_11_26_105650) do +ActiveRecord::Schema[8.0].define(version: 2024_12_13_142806) do # These are extensions that must be enabled in order to support this database enable_extension "citext" enable_extension "pg_catalog.plpgsql" @@ -113,6 +113,10 @@ t.date "onelogin_idv_date_of_birth" t.datetime "started_at", precision: nil, null: false t.datetime "verified_at" + t.index "lower((bank_account_number)::text), lower((bank_sort_code)::text)", name: "index_claims_on_bank_details" + t.index "lower((email_address)::text)", name: "index_claims_on_lower_email_address" + t.index "lower((first_name)::text), lower((surname)::text), date_of_birth", name: "index_claims_on_personal_details" + t.index "lower((national_insurance_number)::text)", name: "index_claims_on_lower_national_insurance_number" t.index ["academic_year"], name: "index_claims_on_academic_year" t.index ["created_at"], name: "index_claims_on_created_at" t.index ["eligibility_type", "eligibility_id"], name: "index_claims_on_eligibility_type_and_eligibility_id" diff --git a/spec/factories/claims.rb b/spec/factories/claims.rb index 6a4cfc9fb7..96a77d2572 100644 --- a/spec/factories/claims.rb +++ b/spec/factories/claims.rb @@ -395,5 +395,10 @@ } end end + + trait :random_name do + first_name { Faker::Name.first_name } + surname { Faker::Name.last_name } + end end end diff --git a/spec/features/admin/reports_spec.rb b/spec/features/admin/reports_spec.rb index aa9e3c2e9d..26ca8310a4 100644 --- a/spec/features/admin/reports_spec.rb +++ b/spec/features/admin/reports_spec.rb @@ -163,4 +163,49 @@ expect(row.fetch("Qualification name")).to eq("BA (Hons)") end end + + describe "Duplicate claims" do + it "returns a CSV report" do + claim_1 = create( + :claim, + :current_academic_year, + :approved, + email_address: "test@example.com", + policy: Policies::InternationalRelocationPayments, + eligibility_attributes: { + award_amount: 2_000 + } + ) + + claim_2 = create( + :claim, + :current_academic_year, + :approved, + email_address: "test@example.com", + policy: Policies::InternationalRelocationPayments, + eligibility_attributes: { + award_amount: 2_000 + } + ) + + sign_in_as_service_operator + + visit admin_claims_path + + click_on "Reports" + + click_on "Duplicate claims" + + csv_data = page.body + + csv = CSV.parse(csv_data, headers: true) + + claim_references = csv.map { |row| row.fetch("Claim reference") } + + expect(claim_references).to match_array([ + claim_1.reference, + claim_2.reference + ]) + end + end end diff --git a/spec/models/admin/reports/duplicate_approved_claims_spec.rb b/spec/models/admin/reports/duplicate_approved_claims_spec.rb new file mode 100644 index 0000000000..7aa33ee223 --- /dev/null +++ b/spec/models/admin/reports/duplicate_approved_claims_spec.rb @@ -0,0 +1,249 @@ +require "rails_helper" + +RSpec.describe Admin::Reports::DuplicateApprovedClaims do + describe "#to_csv" do + it "includes claims with duplicate details, excludes claims that aren't duplicates" do + claim_1 = create( + :claim, + :approved, + :current_academic_year, + :random_name, + email_address: "duplicate@example.com", + reference: "claim 1" + ) + + claim_2 = create( + :claim, + :approved, + :current_academic_year, + :random_name, + email_address: "duplicate@example.com", + reference: "claim 2" + ) + + create( + :claim, + :approved, + :current_academic_year, + :random_name, + email_address: "non-duplicate@example.com", + reference: "nondupe1" + ) + + create( + :claim, + :approved, + :current_academic_year, + :random_name, + national_insurance_number: "AB123456D", + reference: "nondupe2" + ) + + claim_3 = create( + :claim, + :approved, + :current_academic_year, + :random_name, + bank_account_number: "12345678", + bank_sort_code: "123456", + reference: "claim 3" + ) + + claim_4 = create( + :claim, + :approved, + :current_academic_year, + :random_name, + bank_account_number: "12345678", + bank_sort_code: "123456", + reference: "claim 4" + ) + + create( + :claim, + :approved, + :current_academic_year, + :random_name, + bank_account_number: "12345679", + bank_sort_code: "123456", + reference: "nondupe3" + ) + + claim_5 = create( + :claim, + :approved, + :current_academic_year, + first_name: "SEYMOUR", + surname: "Skinner", + date_of_birth: Date.new(1960, 1, 1), + reference: "claim 5" + ) + + claim_6 = create( + :claim, + :approved, + :current_academic_year, + first_name: "Seymour", + surname: "Skinner", + date_of_birth: Date.new(1960, 1, 1), + reference: "claim 6" + ) + + create( + :claim, + :approved, + :current_academic_year, + first_name: "Seymour", + surname: "Skinner", + date_of_birth: Date.new(1960, 1, 2), + reference: "nondupe4" + ) + + csv = CSV.parse(described_class.new.to_csv, headers: true) + + claim_references = csv.map { |row| row["Claim reference"] } + + expect(claim_references).to match_array([ + claim_1.reference, + claim_2.reference, + claim_3.reference, + claim_4.reference, + claim_5.reference, + claim_6.reference + ]) + end + + it "includes claims with duplicate eligibility details" do + claim_1 = create( + :claim, + :approved, + :current_academic_year, + :random_name, + reference: "claim 1", + policy: Policies::EarlyCareerPayments, + eligibility_attributes: { + teacher_reference_number: "1234567" + } + ) + + claim_2 = create( + :claim, + :approved, + :current_academic_year, + :random_name, + reference: "claim 2", + policy: Policies::LevellingUpPremiumPayments, + eligibility_attributes: { + teacher_reference_number: "1234567" + } + ) + + claim_3 = create( + :claim, + :approved, + :current_academic_year, + :random_name, + reference: "claim 3", + policy: Policies::FurtherEducationPayments, + eligibility_attributes: { + teacher_reference_number: "1234567" + } + ) + + create( + :claim, + :approved, + :current_academic_year, + :random_name, + reference: "nondupe1", + policy: Policies::InternationalRelocationPayments + ) + + create( + :claim, + :approved, + :current_academic_year, + :random_name, + reference: "nondupe2", + policy: Policies::StudentLoans, + eligibility_attributes: { + teacher_reference_number: "1234568" + } + ) + + csv = CSV.parse(described_class.new.to_csv, headers: true) + + claim_references = csv.map { |row| row["Claim reference"] } + + expect(claim_references).to match_array([ + claim_1.reference, + claim_2.reference, + claim_3.reference + ]) + end + + it "excludes claims with duplicate details that are not approved" do + create( + :claim, + :approved, + :current_academic_year, + email_address: "duplicate@example.com", + reference: "claim 1" + ) + + create( + :claim, + :current_academic_year, + email_address: "duplicate@example.com", + reference: "claim 2" + ) + + create( + :claim, + :approved, + :random_name, + :current_academic_year, + policy: Policies::EarlyCareerPayments, + eligibility_attributes: { + teacher_reference_number: "1234567" + }, + reference: "claim 3" + ) + + create( + :claim, + :random_name, + :current_academic_year, + policy: Policies::EarlyCareerPayments, + eligibility_attributes: { + teacher_reference_number: "1234567" + }, + reference: "claim 4" + ) + + csv = CSV.parse(described_class.new.to_csv, headers: true) + + expect(csv.count).to eq(0) + end + + it "excludes claims with duplicate details across academic years" do + create( + :claim, + :approved, + email_address: "duplicate@example.com", + academic_year: AcademicYear.new(2021) + ) + + create( + :claim, + :approved, + :current_academic_year, + email_address: "duplicate@example.com" + ) + + csv = CSV.parse(described_class.new.to_csv, headers: true) + + expect(csv.count).to eq(0) + end + end +end