Skip to content

Instantly share code, notes, and snippets.

@pontusab
Last active September 13, 2024 14:09
Show Gist options
  • Save pontusab/d966d53dfe22822dc78f3637c893c27f to your computer and use it in GitHub Desktop.
Save pontusab/d966d53dfe22822dc78f3637c893c27f to your computer and use it in GitHub Desktop.
Match
-- This SQL code defines functions for calculating similarity scores optimized for reconciliation
-- between transactions and inbox items (e.g., invoices). It focuses on high precision to avoid incorrect matches.
-- Function: calculate_name_similarity_score
-- Purpose: Calculates similarity score between transaction and inbox item names
-- Input: transaction_name (text), inbox_name (text)
-- Output: numeric (0 to 0.6)
create or replace function calculate_name_similarity_score(
transaction_name text,
inbox_name text
) returns numeric as $$
declare
name_similarity numeric;
similarity_score numeric := 0;
begin
if transaction_name is null or inbox_name is null then
return 0;
end if;
name_similarity := similarity(lower(transaction_name), lower(inbox_name));
similarity_score := 0.4 * name_similarity; -- Base score is 40% of calculated similarity
if name_similarity > 0.95 then -- Bonus for very high similarity
similarity_score := similarity_score + 0.2;
end if;
return round(least(similarity_score, 0.6), 2); -- Cap at 0.6
end;
$$ language plpgsql;
-- Function: calculate_amount_similarity
-- Purpose: Calculates similarity score based on transaction and inbox amounts
-- Input: transaction_currency (text), inbox_currency (text), transaction_amount (numeric), inbox_amount (numeric)
-- Output: numeric (0 to 1)
create or replace function calculate_amount_similarity(
transaction_currency text,
inbox_currency text,
transaction_amount numeric,
inbox_amount numeric
) returns numeric as $$
declare
similarity_score numeric := 0;
relative_difference numeric;
abs_transaction_amount numeric;
abs_inbox_amount numeric;
begin
if transaction_currency = inbox_currency then
abs_transaction_amount := abs(transaction_amount);
abs_inbox_amount := abs(inbox_amount);
relative_difference := abs(abs_transaction_amount - abs_inbox_amount) / greatest(abs_transaction_amount, abs_inbox_amount, 1);
if relative_difference < 0.001 then -- Exact match
similarity_score := 1;
elsif relative_difference < 0.01 then -- Very close match
similarity_score := 0.9;
else
similarity_score := 1 - least(relative_difference, 1);
similarity_score := similarity_score * similarity_score * 0.5; -- Quadratic scaling
end if;
end if;
return round(least(similarity_score, 1), 2);
end;
$$ language plpgsql;
-- Function: calculate_date_similarity
-- Purpose: Calculates similarity score based on date difference
-- Input: transaction_date (date), inbox_date (date)
-- Output: numeric (0 to 0.7)
create or replace function calculate_date_similarity(
transaction_date date,
inbox_date date
) returns numeric as $$
declare
date_difference integer;
similarity_score numeric := 0;
begin
date_difference := abs(transaction_date - inbox_date);
case
when date_difference = 0 then similarity_score := 0.7
when date_difference <= 1 then similarity_score := 0.5
when date_difference <= 3 then similarity_score := 0.3
when date_difference <= 7 then similarity_score := 0.1
else similarity_score := 0
end case;
return similarity_score;
end;
$$ language plpgsql;
-- Function: calculate_overall_similarity
-- Purpose: Calculates overall similarity between a transaction and an inbox item
-- Input: transaction_record (record), inbox_record (record)
-- Output: numeric (0 to 1)
create or replace function calculate_overall_similarity(transaction_record record, inbox_record record)
returns numeric as $$
declare
overall_score numeric := 0;
amount_score numeric;
date_score numeric;
name_score numeric;
begin
-- Calculate individual scores
amount_score := calculate_amount_similarity(
transaction_record.currency,
inbox_record.currency,
transaction_record.amount,
inbox_record.amount
);
date_score := calculate_date_similarity(transaction_record.date, inbox_record.date);
name_score := calculate_name_similarity_score(transaction_record.name, inbox_record.display_name);
-- Weighted combination of scores (60% amount, 20% date, 20% name)
overall_score := (amount_score * 0.6) + (date_score * 0.2) + (name_score * 0.2);
-- Bonus for perfect amount match
if amount_score = 1 then
overall_score := overall_score + 0.1;
end if;
return least(overall_score, 1);
end;
$$ language plpgsql;
-- Function: find_matching_inbox_item
-- Purpose: Finds matching inbox item for a given transaction
-- Input: transaction_id (uuid), specific_inbox_id (uuid, optional)
-- Output: table (inbox_id, transaction_id, transaction_name, similarity_score, file_name)
create or replace function find_matching_inbox_item(
transaction_id uuid,
specific_inbox_id uuid default null
) returns table (
inbox_id uuid,
transaction_id uuid,
transaction_name text,
similarity_score numeric,
file_name text
) as $$
declare
transaction_data record;
inbox_data record;
calculated_score numeric;
similarity_threshold numeric := 0.95; -- High threshold for precision
begin
-- Fetch transaction data
select t.*
into transaction_data
from transactions t
where t.id = transaction_id;
if specific_inbox_id is not null then
-- Check for a specific inbox item
select *
into inbox_data
from inbox
where id = specific_inbox_id
and team_id = transaction_data.team_id
and status = 'pending';
if inbox_data.id is not null then
calculated_score := calculate_overall_similarity(transaction_data, inbox_data);
if calculated_score >= similarity_threshold then
return query select specific_inbox_id, transaction_id, transaction_data.name, calculated_score, inbox_data.file_name;
end if;
end if;
else
-- Find best matching inbox item
return query
select
i.id as inbox_id,
transaction_data.id as transaction_id,
transaction_data.name as transaction_name,
calculate_overall_similarity(transaction_data, i.*) as similarity_score,
i.file_name
from inbox i
where
i.team_id = transaction_data.team_id
and i.status = 'pending'
and calculate_overall_similarity(transaction_data, i.*) >= similarity_threshold
order by
calculate_overall_similarity(transaction_data, i.*) desc,
abs(i.date - transaction_data.date) asc
limit 1; -- Return only the best match
end if;
end;
$$ language plpgsql;
-- Function: trigger_matching_process
-- Purpose: Triggers the matching process for new transactions or inbox items
-- Input: None (triggered by INSERT or UPDATE operations)
-- Output: trigger
create or replace function trigger_matching_process() returns trigger as $$
begin
if tg_table_name = 'transactions' then
perform find_matching_inbox_item(new.id);
elsif tg_table_name = 'inbox' then
perform find_matching_inbox_item(null, new.id);
end if;
return new;
end;
$$ language plpgsql;
-- Trigger: auto_match_new_transaction
-- Purpose: Automatically matches new transactions
create trigger auto_match_new_transaction
after insert on transactions
for each row execute function trigger_matching_process();
-- Trigger: auto_match_updated_inbox_item
-- Purpose: Automatically matches new or updated inbox items
create trigger auto_match_updated_inbox_item
after insert or update of amount, date, currency on inbox
for each row
when (new.amount is not null)
execute function trigger_matching_process();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment