sbms4d · cheeryNinja · Dec 16, 2024 · Dec 16, 2024
diff --git a/README.md b/README.md
@@ -142,3 +142,41 @@ code to github and take the rest of the afternoon off to ~~recover~~ relax!
 1. Upload all of your code to your forked github repo in a new branch, and create a pull request with your changes into 
 the main branch.
 2. Share your branch name with your recruiting contact, who will be in touch regarding the results of your test.
+
+#
+
+
+
+## Sexi-silverbullet test solution
+
+### How to Run the SQL Tests Using Docker and Trino
+
+#### Step 1: Clone the Repository
+First, clone the repository to your local machine:
+
+https://github.com/cheeryNinja/wasb_sql_test.git   
+cd wasb_sql_test
+
+#### Step 2: Run the Docker Container with Mounted Volume
+To mount the repository folder as a Docker volume, run the appropriate command based on your operating system.
+
+For Windows (Command Prompt):
+docker run -v "%cd%":/mnt/data --name sexi-silverbullet -d trinodb/trino
+
+For Linux/macOS:
+docker run -v "$(pwd)":/mnt/data --name sexi-silverbullet -d trinodb/trino
+
+#### Step 3: Execute SQL Files to Create Data Structures
+Run the following commands to create the necessary tables and populate data:
+
+docker exec -it sexi-silverbullet trino --file /mnt/data/create_employees.sql   
+docker exec -it sexi-silverbullet trino --file /mnt/data/create_expenses.sql   
+docker exec -it sexi-silverbullet trino --file /mnt/data/create_invoices.sql   
+
+#### Step 4: Execute SQL Files to Verify Test Results
+To run verification queries and validate the data:
+
+docker exec -it sexi-silverbullet trino --file /mnt/data/find_manager_cycles.sql   
+docker exec -it sexi-silverbullet trino --file /mnt/data/calculate_largest_expensors.sql  
+docker exec -it sexi-silverbullet trino --file /mnt/data/generate_supplier_payment_plans.sql  
+
diff --git a/calculate_largest_expensors.sql b/calculate_largest_expensors.sql
@@ -0,0 +1,30 @@
+-- Query to calculate the largest expensors who expensed more than 1000
+-- of goods or services in SExI
+--------------------------------------------------------------------------------------
+-- Result:
+--   - employee_id - A unique identifier of employee
+--   - employee_name - Employee's full name
+--   - manager_id - A unique identifier of manager
+--   - manager_name - Manager's full name
+--   - total_expensed_amount - Total expensed amount 
+
+USE memory.default;
+
+SELECT 
+    e.employee_id, 
+    CONCAT(e.first_name, ' ', e.last_name) AS employee_name, 
+    e.manager_id, 
+    CONCAT(m.first_name, ' ', m.last_name) AS manager_name, 
+    SUM(exp.unit_price * exp.quantity) AS total_expensed_amount -- Total expensed amount
+FROM 
+    memory.default.EMPLOYEE e
+LEFT JOIN 
+    memory.default.EXPENSE exp ON e.employee_id = exp.employee_id
+LEFT JOIN 
+    memory.default.EMPLOYEE m ON e.manager_id = m.employee_id
+GROUP BY 
+    e.employee_id, e.first_name, e.last_name, e.manager_id, m.first_name, m.last_name
+HAVING 
+    SUM(exp.unit_price * exp.quantity) > 1000 -- Include only employees who expensed more than 1000
+ORDER BY 
+    total_expensed_amount DESC;
diff --git a/create_employees.sql b/create_employees.sql
@@ -0,0 +1,39 @@
+-- Create a new EMPLOYEE table in the memory.default schema
+--------------------------------------------------------------------------------------
+-- The table has three columns:
+--   - employee_id: A unique identifier for each employee (TINYINT)
+--   - first_name: The first name of the employee (VARCHAR)
+--   - last_name: The last name of the employee (VARCHAR)
+--   - job_title: The job title of the employee (VARCHAR)
+--   - manager_id: The ID of the manager to whom the employee reports (TINYINT)
+
+
+DROP TABLE IF EXISTS memory.default.EMPLOYEE;
+
+CREATE TABLE IF NOT EXISTS memory.default.EMPLOYEE (
+    employee_id TINYINT,
+    first_name VARCHAR,
+    last_name VARCHAR,
+    job_title VARCHAR,
+    manager_id TINYINT
+);
+
+
+
+-- Insert data into the EMPLOYEE table
+-- Data from hr/employee_index.csv
+--------------------------------------------------------------------------------------
+
+INSERT INTO memory.default.EMPLOYEE (employee_id, first_name, last_name, job_title, manager_id) 
+VALUES
+	(1, 'Ian', 'James', 'CEO', 4),
+	(2, 'Umberto', 'Torrielli', 'CSO', 1),
+	(3, 'Alex', 'Jacobson', 'MD EMEA', 2),
+	(4, 'Darren', 'Poynton', 'CFO', 2),
+	(5, 'Tim', 'Beard', 'MD APAC', 2),
+	(6, 'Gemma', 'Dodd', 'COS', 1),
+	(7, 'Lisa', 'Platten', 'CHR', 6),
+	(8, 'Stefano', 'Camisaca', 'GM Activation', 2),
+	(9, 'Andrea', 'Ghibaudi', 'MD NAM', 2);
+
+
diff --git a/create_expenses.sql b/create_expenses.sql
@@ -0,0 +1,30 @@
+-- Create a new EXPENSE table in the memory.default schema
+--------------------------------------------------------------------------------------
+-- The table has three columns:
+--   - employee_id: The ID of the employee associated with the expense (TINYINT)
+--   - unit_price: The price per unit of the item or service (DECIMAL with 2 decimal places)
+--   - quantity: The number of units purchased or expensed (TINYINT)
+
+DROP TABLE IF EXISTS memory.default.EXPENSE;
+
+CREATE TABLE IF NOT EXISTS memory.default.EXPENSE (
+    employee_id TINYINT,
+    unit_price DECIMAL(8,2),
+    quantity TINYINT
+);
+
+
+-- Insert data into the EXPENSE table
+-- Data from finance/receipts_from_last_night/*
+--------------------------------------------------------------------------------------
+
+INSERT INTO memory.default.EXPENSE (employee_id, unit_price, quantity)
+VALUES
+	(3, 6.50, 14),
+	(3, 11.00, 20),
+	(3, 22.00, 18),
+	(3, 13.00, 75),
+	(9, 300.00, 1),
+	(4, 40.00, 9),
+	(2, 17.50, 4);
+
diff --git a/create_invoices.sql b/create_invoices.sql
@@ -0,0 +1,61 @@
+-- Create the SUPPLIER table in the memory.default schema
+--------------------------------------------------------------------------------------
+-- The table has two columns:
+--   - supplier_id: A unique identifier for each supplier (TINYINT)
+--   - name: The name of the supplier (VARCHAR)
+
+DROP TABLE IF EXISTS memory.default.SUPPLIER;
+
+
+CREATE TABLE IF NOT EXISTS memory.default.SUPPLIER (
+    supplier_id TINYINT NOT NULL,
+    name VARCHAR NOT NULL
+);
+
+
+-- Insert data into the SUPPLIER table
+-- Data from finance/invoices_due/*
+--------------------------------------------------------------------------------------
+
+INSERT INTO memory.default.SUPPLIER (supplier_id, name)
+VALUES
+	(1, 'Catering Plus'),
+	(2, 'Dave''s Discos'),
+	(3, 'Entertainment tonight'),
+	(4, 'Party Animals');
+
+
+-- Create the INVOICE table in the memory.default schema
+--------------------------------------------------------------------------------------
+-- The table has three columns:
+--   - supplier_id: The ID of the supplier associated with the invoice (TINYINT)
+--   - invoice_amount: The amount of the invoice (DECIMAL with 2 decimal places)
+--   - due_date: The due date of the invoice, calculated as last day of any given month (DATE)
+
+
+DROP TABLE IF EXISTS memory.default.INVOICE;
+
+CREATE TABLE IF NOT EXISTS memory.default.INVOICE (
+    supplier_id TINYINT,
+    invoice_amount DECIMAL(8,2),
+    due_date DATE
+);
+
+
+-- Insert data into the INVOICE table
+-- Data from finance/invoices_due/*
+--------------------------------------------------------------------------------------
+
+INSERT INTO memory.default.INVOICE (supplier_id, invoice_amount, due_date)
+VALUES
+	(4, 6000.00, last_day_of_month(date_add('month', 3, current_date))),
+	(1, 2000.00, last_day_of_month(date_add('month', 2, current_date))),
+	(1, 1500.00, last_day_of_month(date_add('month', 3, current_date))),
+	(2, 500.00, last_day_of_month(date_add('month', 1, current_date))),
+	(3, 6000.00, last_day_of_month(date_add('month', 3, current_date)));
+
+
+
+
+
+
diff --git a/find_manager_cycles.sql b/find_manager_cycles.sql
@@ -0,0 +1,38 @@
+-- Query to check for cycles of employees who approve each others expenses in SExI
+--------------------------------------------------------------------------------------
+-- Result:
+--   - employee_id - A unique identifier for each employee
+--   - cycle_path - Cycle that represents the expences approval chain
+
+USE memory.default;
+
+WITH RECURSIVE EmployeeHierarchy (emp_id, mgr_id, hierarchy_path) AS (
+    -- Start with employees and their direct managers
+    SELECT
+        employee_id AS emp_id,
+        manager_id AS mgr_id,
+        CAST(employee_id AS VARCHAR) AS hierarchy_path
+    FROM
+        memory.default.EMPLOYEE
+    WHERE manager_id IS NOT NULL
+
+    UNION ALL
+
+    -- Traverse the hierarchy to find indirect managers
+    SELECT
+        eh.emp_id,
+        e.manager_id,
+        CONCAT(eh.hierarchy_path, ' , ', CAST(e.employee_id AS VARCHAR)) AS hierarchy_path
+    FROM
+        EmployeeHierarchy eh
+        INNER JOIN memory.default.EMPLOYEE e ON eh.mgr_id = e.employee_id
+    WHERE POSITION(CAST(e.employee_id AS VARCHAR) IN eh.hierarchy_path) = 0 -- Avoid revisiting employees
+)
+-- Identify cycles in the hierarchy
+SELECT
+    emp_id AS employee_id,
+    hierarchy_path AS cycle_path
+FROM
+    EmployeeHierarchy
+WHERE POSITION(CAST(mgr_id AS VARCHAR) IN hierarchy_path) > 0 -- Check if manager is already in the path
+ORDER  BY employee_id;
diff --git a/generate_supplier_payment_plans.sql b/generate_supplier_payment_plans.sql
@@ -0,0 +1,53 @@
+-- Query to generate supplier payment plans 
+--------------------------------------------------------------------------------------
+-- Result:
+--   - supplier - the `SUPPLIER.name` of the `SUPPLIER.supplier_id`.
+--   - payment_amount - the sum of all uniform monthly payments to fully pay the `SUPPLIER` for any `INVOICE` 
+--                      before the `INVOICE.due_date`. If a supplier has multiple invoices, the aggregate monthly payments may be uneven. 
+--   - balance_outstanding - total balance outstanding across ALL `INVOICE`s for the `SUPPLIER.supplier_id`
+--   - payment_date - the last day of the month for any payment for any invoice
+
+
+USE memory.default;
+
+
+WITH PaymentSchedule AS (
+    -- Calculate invoice payments and distribute them across months until the due date
+    SELECT
+        inv.supplier_id,
+        inv.invoice_amount / DATE_DIFF('month', CURRENT_DATE, inv.due_date) AS monthly_invoice,
+        last_day_of_month(DATE_ADD('month', offset - 1, CURRENT_DATE)) AS payment_date
+    FROM memory.default.INVOICE inv,
+         UNNEST(SEQUENCE(1, DATE_DIFF('month', CURRENT_DATE, inv.due_date))) AS t(offset) -- Alias the unnest result
+),
+MonthlyPayments AS (
+    -- Aggregate monthly payments for each supplier
+    SELECT
+        ps.supplier_id,
+        sup.name AS supplier_name,
+        ps.payment_date,
+        SUM(ps.monthly_invoice) AS total_monthly_payment,
+        SUM(SUM(ps.monthly_invoice)) OVER (PARTITION BY ps.supplier_id 
+                                          ORDER BY ps.payment_date 
+                                          ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+         ) AS cumulative_payments,
+        GREATEST(
+            SUM(SUM(ps.monthly_invoice)) OVER (PARTITION BY ps.supplier_id) 
+            - SUM(SUM(ps.monthly_invoice)) OVER (PARTITION BY ps.supplier_id 
+                                                 ORDER BY ps.payment_date 
+                                                 ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW),
+            0
+        ) AS balance_outstanding
+    FROM PaymentSchedule ps
+    JOIN memory.default.SUPPLIER sup ON ps.supplier_id = sup.supplier_id
+    GROUP BY ps.supplier_id, ps.payment_date, sup.name
+)
+-- Payment schedule
+SELECT
+    supplier_id,
+    supplier_name,
+    total_monthly_payment as payment_amount,
+    balance_outstanding,
+    payment_date
+FROM MonthlyPayments
+ORDER BY supplier_id, payment_date;