Skip to content

Commit

Permalink
Adds duplicate claims report
Browse files Browse the repository at this point in the history
Adds the query to identify duplicate approved claims.
As per the MatchingAttributeFinder claims can either be duplicates on
claim attributes or on eligibility attributes. Certain eligibilities can
only be compared with other eligibilities determined by the policy's
`SomePolicy.policies_claimable` method. Eligibilities define the
attributes they should be compared with other eligibilities by in their
`SomePolicy.eligibility_matching_attributes` method, however some
policies that are comparable don't implement the attributes used for
finding matches (eg `EarlyYearsPayments` is in `EarlyCareerPayments`
`policies_claimable` list, but it doesn't implement
`teacher_reference_number` which is what `EarlyCareerPayments` uses to
determine duplicates). We have to do some fairly gnarl joins and unions
to compare eligibilities!

When comparing claims we search all approved claims in the current
academic year for duplicates, ignoring the claim's policy's
`policies_claimable` list, this is how MatchingAttributeFinder works so
we've kept that behaviour. For each group in the
`CLAIM_ATTRIBUTE_GROUPS_TO_MATCH` list we AND the conditions together,
normalising strings, we then use UNION to handle the OR conditions, this
performs significantly better than complicating the join condition. When
adding new attributes to the duplicate detection list we _really_ need
to make sure we add the appropriate indexes.
  • Loading branch information
rjlynch committed Dec 16, 2024
1 parent 67d5338 commit 0b73704
Show file tree
Hide file tree
Showing 8 changed files with 556 additions and 1 deletion.
2 changes: 2 additions & 0 deletions app/controllers/admin/reports_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ def report
Reports::FeApprovedClaimsWithFailingProviderVerification.new
when "approved-claims-failing-qualification-task"
Reports::ApprovedClaimsFailingQualificationTask.new
when "duplicate-claims"
Reports::DuplicateApprovedClaims.new
else
raise ActiveRecord::RecordNotFound
end
Expand Down
215 changes: 215 additions & 0 deletions app/models/admin/reports/duplicate_approved_claims.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,215 @@
module Admin
module Reports
class DuplicateApprovedClaims
HEADERS = [
"Claim reference",
"Full name",
"TRN",
"Policy",
"Claim amount",
"Claim status",
"Decision date",
"Decision agent"
]

def initialize(academic_year: AcademicYear.current)
@academic_year = AcademicYear.wrap(academic_year)
end

def filename
"duplicate_approved_claims"
end

def to_csv
CSV.generate(
row_sep: "\r\n",
write_headers: true,
headers: HEADERS
) do |csv|
rows.each { |row| csv << row }
end
end

private

attr_reader :academic_year

def rows
scope.map(&ClaimPresenter.method(:new)).map(&:to_a)
end

def scope
Claim.where(
id: Set.new(duplicates_by_eligibility + duplicates_by_attributes)
).includes(decisions: :created_by)
end

def duplicates_by_eligibility
ActiveRecord::Base.connection.execute(
Policies::POLICIES.map do |policy|
policy_with_claimable_policies(policy)
end.compact.join("\nUNION\n")
).map { |row| row["id"] }
end

def policy_with_claimable_policies(policy)
left_table = policy::Eligibility.table_name

claimable_policies = claimable_policy_mapping(policy)

return if claimable_policies.empty?

claimable_policy_mapping(policy).map do |other_policy, matching_attributes|
right_table = other_policy::Eligibility.table_name
right_table_alias = "#{right_table}_#{left_table}"

join_condition = build_join_condition(
left_table,
right_table_alias,
matching_attributes
)

<<~SQL
SELECT claims.id
FROM #{left_table}
JOIN #{right_table} #{right_table_alias}
ON #{left_table}.id != #{right_table_alias}.id
AND (#{join_condition})
JOIN claims ON claims.eligibility_id = #{left_table}.id
JOIN claims other_claims ON other_claims.eligibility_id = #{right_table_alias}.id
JOIN decisions ON claims.id = decisions.claim_id
JOIN decisions other_decisions ON other_claims.id = other_decisions.claim_id
WHERE claims.academic_year = '#{academic_year}'
AND other_claims.academic_year = '#{academic_year}'
AND decisions.result = 0
AND other_decisions.result = 0
SQL
end.join("\nUNION\n")
end

# [["teacher_reference_number"], ["school_id", "nqt_in_academic_year"]]
# =>
# (
# (
# left_table.teacher_reference_number = right_table.teacher_reference_number
# AND left_table.teacher_reference_number IS NOT NULL
# AND left_table.teacher_reference_number != ''
# )
# )
# OR
# (
# (
# left_table.school_id = right_table.school_id
# AND left_table.school_id IS NOT NULL
# AND left_table.school_id != ''
# )
# AND
# (
# left_table.nqt_in_academic_year = right_table.nqt_in_academic_year
# AND left_table.nqt_in_academic_year IS NOT NULL
# AND left_table.nqt_in_academic_year != ''
# )
# )
def build_join_condition(left_table, right_table, matching_attributes)
matching_attributes.map do |attr_group|
"(" + attr_group.map do |attr|
"(" \
"#{left_table}.#{attr} = #{right_table}.#{attr} " \
"AND #{left_table}.#{attr} IS NOT NULL " \
"AND #{left_table}.#{attr} != ''" \
")"
end.join(" AND ") + ")"
end.join(" OR ")
end

# Return a hash of other claimable policies and the attributes we can
# use for determining duplicates.
# "other_policy" => [["attr_1"], ["attr_2", "attr_3"]]
# If other policy is in the list of claimable policies but shares no
# matching attributes, we can't compare them, eg EY is in ECP
# other claimable policies, but EY doesn't have a
# `teacher_reference_number`.
def claimable_policy_mapping(policy)
policy.policies_claimable.map do |other_policy|
shared_matching_attributes = policy.eligibility_matching_attributes.select do |attribute_group|
attribute_group.all? do |attr|
other_policy::Eligibility.column_names.include?(attr)
end
end

[other_policy, shared_matching_attributes]
end.to_h.reject { |_, matching_attrs| matching_attrs.empty? }
end

# building_society_roll_number is no longer used, so is always null
# we only check the number and sort code when determining duplicates.
def claim_matching_attributes
Claim::MatchingAttributeFinder::CLAIM_ATTRIBUTE_GROUPS_TO_MATCH.map do |attr_group|
attr_group.without("building_society_roll_number")
end
end

def duplicates_by_attributes
# Limit the claims we're looking at
current_claims = <<~SQL
WITH current_claims AS (
SELECT claims.id, #{claim_matching_attributes.flatten.join(", ")}
FROM claims
JOIN decisions ON claims.id = decisions.claim_id
WHERE claims.academic_year = '#{academic_year}'
AND decisions.undone = false
AND decisions.result = 0
)
SQL

# Make sure to have indexes for the columns we're querying!
filter = claim_matching_attributes.flat_map do |attribute_group|
join_condition = attribute_group.map do |attr|
if Claim.column_for_attribute(attr).type == :string
"LOWER(current_claims.#{attr}) = LOWER(other_claims.#{attr})"
else
"current_claims.#{attr} = other_claims.#{attr}"
end
end.join(" AND ")

<<~SQL
SELECT current_claims.id
FROM current_claims
JOIN current_claims other_claims
ON #{join_condition}
WHERE current_claims.id != other_claims.id
SQL
end.join("\nUNION\n")

query = current_claims + "\n" + filter

ActiveRecord::Base.connection.execute(query).map { |row| row["id"] }
end

class ClaimPresenter
include Admin::ClaimsHelper

def initialize(claim)
@claim = claim
end

def to_a
[
claim.reference,
claim.full_name,
claim.eligibility.try(:teacher_reference_number),
I18n.t("#{claim.policy.locale_key}.policy_acronym"),
claim.award_amount,
status(claim),
claim.decisions.last.created_at.to_date,
claim.decisions.last.created_by.full_name
]
end

private

attr_reader :claim
end
end
end
end
5 changes: 5 additions & 0 deletions app/views/admin/reports/index.html.erb
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,11 @@
<h1 class="govuk-heading-xl">
Reports
</h1>
<%= govuk_button_link_to(
"Duplicate claims",
admin_report_path("duplicate-claims", format: :csv),
secondary: true
) %>

<%= govuk_button_link_to(
"FE TRI approved claims whereby the provider check status is 'failed'",
Expand Down
30 changes: 30 additions & 0 deletions db/migrate/20241213142806_add_indexes_for_ops_report.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
class AddIndexesForOpsReport < ActiveRecord::Migration[8.0]
def change
add_index(
:claims,
"LOWER(email_address)",
name: "index_claims_on_lower_email_address"
)

add_index(
:claims,
"LOWER(national_insurance_number)",
name: "index_claims_on_lower_national_insurance_number"
)

# Even though bank details are "numbers" they're stored in a string column
# we call lower on these so we don't have to treat them differently to
# other string columns when building the query.
add_index(
:claims,
"LOWER(bank_account_number), LOWER(bank_sort_code)",
name: "index_claims_on_bank_details"
)

add_index(
:claims,
"LOWER(first_name), LOWER(surname), date_of_birth",
name: "index_claims_on_personal_details"
)
end
end
6 changes: 5 additions & 1 deletion db/schema.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
#
# It's strongly recommended that you check this file into your version control system.

ActiveRecord::Schema[8.0].define(version: 2024_11_26_105650) do
ActiveRecord::Schema[8.0].define(version: 2024_12_13_142806) do
# These are extensions that must be enabled in order to support this database
enable_extension "citext"
enable_extension "pg_catalog.plpgsql"
Expand Down Expand Up @@ -113,6 +113,10 @@
t.date "onelogin_idv_date_of_birth"
t.datetime "started_at", precision: nil, null: false
t.datetime "verified_at"
t.index "lower((bank_account_number)::text), lower((bank_sort_code)::text)", name: "index_claims_on_bank_details"
t.index "lower((email_address)::text)", name: "index_claims_on_lower_email_address"
t.index "lower((first_name)::text), lower((surname)::text), date_of_birth", name: "index_claims_on_personal_details"
t.index "lower((national_insurance_number)::text)", name: "index_claims_on_lower_national_insurance_number"
t.index ["academic_year"], name: "index_claims_on_academic_year"
t.index ["created_at"], name: "index_claims_on_created_at"
t.index ["eligibility_type", "eligibility_id"], name: "index_claims_on_eligibility_type_and_eligibility_id"
Expand Down
5 changes: 5 additions & 0 deletions spec/factories/claims.rb
Original file line number Diff line number Diff line change
Expand Up @@ -395,5 +395,10 @@
}
end
end

trait :random_name do
first_name { Faker::Name.first_name }
surname { Faker::Name.last_name }
end
end
end
45 changes: 45 additions & 0 deletions spec/features/admin/reports_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -163,4 +163,49 @@
expect(row.fetch("Qualification name")).to eq("BA (Hons)")
end
end

describe "Duplicate claims" do
it "returns a CSV report" do
claim_1 = create(
:claim,
:current_academic_year,
:approved,
email_address: "[email protected]",
policy: Policies::InternationalRelocationPayments,
eligibility_attributes: {
award_amount: 2_000
}
)

claim_2 = create(
:claim,
:current_academic_year,
:approved,
email_address: "[email protected]",
policy: Policies::InternationalRelocationPayments,
eligibility_attributes: {
award_amount: 2_000
}
)

sign_in_as_service_operator

visit admin_claims_path

click_on "Reports"

click_on "Duplicate claims"

csv_data = page.body

csv = CSV.parse(csv_data, headers: true)

claim_references = csv.map { |row| row.fetch("Claim reference") }

expect(claim_references).to match_array([
claim_1.reference,
claim_2.reference
])
end
end
end
Loading

0 comments on commit 0b73704

Please sign in to comment.