From f2c9ac1f94afc5aa87594013a08b5818e19a1d49 Mon Sep 17 00:00:00 2001 From: suba1987 <47676633+suba1987@users.noreply.github.com> Date: Thu, 8 May 2025 18:24:46 +0200 Subject: [PATCH] wasb sql test commit This PR adds my solutions for the wasb sql test. - Data is loaded manually from the files. In the real world this data would be loaded either by some ETL process, custom scripts, COPY commands or something similar. In the interest of time, I focused on SQL exercises. If I had to load the data automatically, I'd probably try to write a Python script to parse the files. --- calculate_largest_expensors.sql | 19 ++++++++ create_employees.sql | 21 +++++++++ create_expenses.sql | 17 +++++++ create_invoices.sql | 30 +++++++++++++ find_manager_cycles.sql | 32 +++++++++++++ generate_supplier_payment_plans.sql | 70 +++++++++++++++++++++++++++++ 6 files changed, 189 insertions(+) diff --git a/calculate_largest_expensors.sql b/calculate_largest_expensors.sql index e69de29..0618f9c 100644 --- a/calculate_largest_expensors.sql +++ b/calculate_largest_expensors.sql @@ -0,0 +1,19 @@ +USE memory.default + +SELECT + e.employee_id + ,CONCAT (e.first_name,' ',e.last_name) AS employee_name + ,e.manager_id + ,CONCAT (m.first_name,' ',m.last_name) AS manager_name + ,SUM(ex.unit_price * ex.quantity) AS total_expensed_amount +FROM EXPENSE ex +INNER JOIN EMPLOYEE e ON ex.employee_id = e.employee_id +LEFT JOIN EMPLOYEE m ON m.employee_id = e.manager_id +GROUP BY e.employee_id + ,e.first_name + ,e.last_name + ,e.manager_id + ,m.first_name + ,m.last_name +HAVING SUM(ex.unit_price * ex.quantity) > 1000 +ORDER BY total_expensed_amount DESC; diff --git a/create_employees.sql b/create_employees.sql index e69de29..a2a471d 100644 --- a/create_employees.sql +++ b/create_employees.sql @@ -0,0 +1,21 @@ +USE memory.default; + +CREATE TABLE IF NOT EXISTS EMPLOYEE ( + employee_id TINYINT, + first_name VARCHAR, + last_name VARCHAR, + job_title VARCHAR, + manager_id TINYINT +); + +-- load the initial data from employee_index.csv +INSERT INTO EMPLOYEE VALUES + (1, 'Ian', 'James', 'CEO', 4), + (2, 'Umberto', 'Torrielli', 'CSO', 1), + (3, 'Alex', 'Jacobson', 'MD EMEA', 2), + (4, 'Darren', 'Poynton', 'CFO', 2), + (5, 'Tim', 'Beard', 'MD APAC', 2), + (6, 'Gemma', 'Dodd', 'COS', 1), + (7, 'Lisa', 'Platten', 'CHR', 6), + (8, 'Stefano', 'Camisaca', 'GM Activation', 2), + (9, 'Andrea', 'Ghibaudi', 'MD NAM', 2); \ No newline at end of file diff --git a/create_expenses.sql b/create_expenses.sql index e69de29..0d2f564 100644 --- a/create_expenses.sql +++ b/create_expenses.sql @@ -0,0 +1,17 @@ +USE memory.default; + +CREATE TABLE IF NOT EXISTS EXPENSE ( + employee_id TINYINT, + unit_price DECIMAL(8, 2), + quantity TINYINT +); + +-- load the initial data from receipts from last night +INSERT INTO EXPENSE VALUES + (3, 6.50, 14), + (3, 11.00, 20), + (3, 22.00, 18), + (3, 13.00, 75), + (9, 300.00, 1), + (4, 40.00, 9), + (2, 17.50, 4); diff --git a/create_invoices.sql b/create_invoices.sql index e69de29..6c649e3 100644 --- a/create_invoices.sql +++ b/create_invoices.sql @@ -0,0 +1,30 @@ +USE memory.default; + +CREATE TABLE IF NOT EXISTS SUPPLIER ( + supplier_id TINYINT, + name VARCHAR +); + +CREATE TABLE IF NOT EXISTS INVOICE ( + supplier_id TINYINT, + invoice_amount DECIMAL(8, 2), + due_date DATE +); + +INSERT INTO SUPPLIER VALUES + (1, 'Catering Plus'), + (2, 'Dave''s Discos'), + (3, 'Entertainment tonight'), + (4, 'Ice Ice Baby'), + (5, 'Party Animals'); + +-- I used current date to figure out the due date, in the real world we would probably +-- never do something like this because if the data gets reloaded at some point, the +-- whole payment scheduled would turn to mess +INSERT INTO INVOICE VALUES + (5, 6000.00, LAST_DAY_OF_MONTH(DATE_ADD('month', 3, CURRENT_DATE))), + (1, 2000.00, LAST_DAY_OF_MONTH(DATE_ADD('month', 2, CURRENT_DATE))), + (1, 1500.00, LAST_DAY_OF_MONTH(DATE_ADD('month', 3, CURRENT_DATE))), + (2, 500.00, LAST_DAY_OF_MONTH(DATE_ADD('month', 1, CURRENT_DATE))), + (3, 6000.00, LAST_DAY_OF_MONTH(DATE_ADD('month', 3, CURRENT_DATE))), + (4, 4000.00, LAST_DAY_OF_MONTH(DATE_ADD('month', 6, CURRENT_DATE))); diff --git a/find_manager_cycles.sql b/find_manager_cycles.sql index e69de29..91c3eed 100644 --- a/find_manager_cycles.sql +++ b/find_manager_cycles.sql @@ -0,0 +1,32 @@ +USE memory.default + +WITH RECURSIVE employee_manager ( + employee_id + ,manager_id + ,loop + ) +AS ( + -- base query, selects all employees + SELECT employee_id + ,manager_id + ,ARRAY [employee_id] AS loop + FROM EMPLOYEE + + UNION ALL + + -- recursive query, continuously joins to the base query + SELECT em.employee_id + ,e.manager_id + ,em.loop || e.employee_id + FROM employee_manager em + INNER JOIN EMPLOYEE e ON em.manager_id = e.employee_id + WHERE NOT CONTAINS ( + em.loop + ,e.employee_id + ) -- breaks the recursion, makes sure that the employee is not already in the loop + ) +-- final result, selects only those employees who are a part of the cycle +SELECT employee_id + ,loop +FROM employee_manager +WHERE manager_id = employee_id; diff --git a/generate_supplier_payment_plans.sql b/generate_supplier_payment_plans.sql index e69de29..2d00b84 100644 --- a/generate_supplier_payment_plans.sql +++ b/generate_supplier_payment_plans.sql @@ -0,0 +1,70 @@ +USE memory.default + +------------------------------ +/* +This SQL file displays payment plans for each of the suppliers. It consists of three +CTEs. +supplier_invoice finds the latest due date per supplier and calculates the sum of all +invoices for each of them. +monthly_payment_amount calculates number of monthly payments remaining based off the +latest due date. It also calculates monthly payment amount and the last month amount +(monthly payment plus the remainder). +balance generates a sequence (extra rows) for each of the months we have until due +date to cover our expenses. It also displays balance left for each month. + +The query was formatted using https://poorsql.com/ formatter. +*/ +------------------------------ + +WITH supplier_invoice +AS ( + SELECT s.supplier_id + ,s.name AS supplier_name + ,SUM(i.invoice_amount) AS total_invoice_amount + ,MAX(i.due_date) latest_due_date + FROM INVOICE i + LEFT JOIN SUPPLIER s ON i.supplier_id = s.supplier_id + GROUP BY s.supplier_id + ,s.name + ), +monthly_payment_amount +AS ( + SELECT supplier_id + ,supplier_name + ,total_invoice_amount + ,latest_due_date + ,DATE_DIFF('month', CURRENT_DATE, latest_due_date) + 1 AS number_of_payments -- added +1 because we want to start payments at the end of the current month, this creates that extra month for payment + ,FLOOR(total_invoice_amount / (DATE_DIFF('month', CURRENT_DATE, latest_due_date) + 1)) AS monthly_amount -- total amount divided by number of months to pay rounded to nearest integer + ,total_invoice_amount % (DATE_DIFF('month', CURRENT_DATE, latest_due_date) + 1) AS last_month_remainder -- remainder from the monthly amount, to be added for the last month of payment + FROM supplier_invoice + ), +balance +AS ( + SELECT supplier_id + ,supplier_name + ,CASE + WHEN seq < number_of_payments - 1 + THEN monthly_amount + ELSE last_month_remainder + monthly_amount + END AS payment_amount -- these are monthly payments which are all the same except last one that includes the remainder + ,seq + ,total_invoice_amount - sum(CASE + WHEN seq < number_of_payments - 1 + THEN monthly_amount + ELSE last_month_remainder + monthly_amount + END) OVER ( + PARTITION BY supplier_id ORDER BY seq + ) AS balance_outstanding -- calculates the balance remaining + , + LAST_DAY_OF_MONTH(date_add('month', seq, CURRENT_DATE)) AS payment_date + FROM monthly_payment_amount + CROSS JOIN UNNEST(sequence(0, number_of_payments - 1)) AS t(seq) -- generating a sequence of months for each supplier based on the latest due date + ) +SELECT supplier_id + ,supplier_name + ,payment_amount + ,balance_outstanding + ,payment_date +FROM balance +ORDER BY supplier_id + ,payment_date;