Skip to content

Commit 84ddfcd

Browse files
Added Employees Log and added pyspark solution to Final destination
1 parent d0948bb commit 84ddfcd

File tree

5 files changed

+323
-9
lines changed

5 files changed

+323
-9
lines changed

Employees Log/READme.md

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
# Employees Log
2+
![Star Badge](https://img.shields.io/static/v1?label=%F0%9F%8C%9F&message=If%20Useful&style=style=flat&color=BC4E99)
3+
[![View Main Folder](https://img.shields.io/badge/View-Main_Folder-971901?)](https://github.com/thecoddiwompler/SQL-Practice-Questions/tree/main)
4+
[![View Repositories](https://img.shields.io/badge/View-My_Repositories-blue?logo=GitHub)](https://github.com/thecoddiwompler?tab=repositories)
5+
[![View My Profile](https://img.shields.io/badge/View-My_Profile-green?logo=GitHub)](https://github.com/thecoddiwompler)
6+
7+
---
8+
9+
## 🛠️ Problem Statement
10+
11+
12+
<b>Table Name : employee_log</b>
13+
</br>
14+
| Column Name |Type |
15+
| ------------- | ------------- |
16+
| emp_id | INT |
17+
| log_date | DATE |
18+
| flag | CHAR(1) |
19+
20+
21+
The table contains emp_id, log_date, and the flag('Y' or 'N') showing if the employee logins on the given log_date or not.
22+
23+
24+
Write a SQL query to find the emp_id , the number of consecutive days logged in ,the start_date of the streak and end_date of the streak for each employee.
25+
Retrieve information about consecutive login streaks for employee who have logged in for at least two consecutive days.
26+
</br>
27+
</br>
28+
<b>The query result format is in the following example: </b>
29+
</br>
30+
</br>
31+
32+
<details>
33+
<summary>
34+
Input
35+
</summary>
36+
<br>
37+
38+
<b>Table Name : employee_log</b>
39+
40+
| emp_id | log_date | flag |
41+
|--------|------------|------|
42+
| 101 | 2024-01-02 | N |
43+
| 101 | 2024-01-03 | Y |
44+
| 101 | 2024-01-04 | N |
45+
| 101 | 2024-01-07 | Y |
46+
| 102 | 2024-01-01 | N |
47+
| 102 | 2024-01-02 | Y |
48+
| 102 | 2024-01-03 | Y |
49+
| 102 | 2024-01-04 | N |
50+
| 102 | 2024-01-05 | Y |
51+
| 102 | 2024-01-06 | Y |
52+
| 102 | 2024-01-07 | Y |
53+
| 103 | 2024-01-01 | N |
54+
| 103 | 2024-01-04 | N |
55+
| 103 | 2024-01-05 | Y |
56+
| 103 | 2024-01-06 | Y |
57+
| 103 | 2024-01-07 | N |
58+
59+
60+
<br/>
61+
62+
63+
</details>
64+
65+
<details>
66+
<summary>
67+
Output
68+
</summary>
69+
<br>
70+
71+
| emp_id | streak_start | streak_end | streak_length |
72+
|--------|--------------|-------------|--------|
73+
| 102 | 2024-01-05 | 2024-01-07 | 3 |
74+
| 102 | 2024-01-02 | 2024-01-03 | 2 |
75+
| 103 | 2024-01-05 | 2024-01-06 | 2 |
76+
77+
78+
</details>
79+
80+
---

Employees Log/schema.sql

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
-- Create Table
2+
CREATE TABLE employee_log (
3+
emp_id INT,
4+
log_date DATE,
5+
flag CHAR(1)
6+
);
7+
8+
-- Insert Data
9+
INSERT INTO
10+
employee_log (emp_id, log_date, flag)
11+
VALUES
12+
(101, '2024-01-02', 'N'),
13+
(101, '2024-01-03', 'Y'),
14+
(101, '2024-01-04', 'N'),
15+
(101, '2024-01-07', 'Y'),
16+
(102, '2024-01-01', 'N'),
17+
(102, '2024-01-02', 'Y'),
18+
(102, '2024-01-03', 'Y'),
19+
(102, '2024-01-04', 'N'),
20+
(102, '2024-01-05', 'Y'),
21+
(102, '2024-01-06', 'Y'),
22+
(102, '2024-01-07', 'Y'),
23+
(103, '2024-01-01', 'N'),
24+
(103, '2024-01-04', 'N'),
25+
(103, '2024-01-05', 'Y'),
26+
(103, '2024-01-06', 'Y'),
27+
(103, '2024-01-07', 'N');
28+
29+
COMMIT;

Employees Log/solution.sql

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
WITH init AS(
2+
SELECT
3+
emp_id,
4+
log_date,
5+
flag,
6+
LEAD(flag) OVER(
7+
PARTITION by emp_id
8+
ORDER BY
9+
log_date
10+
) lead_flag
11+
FROM
12+
employee_log
13+
),
14+
consecutive_days_tracker AS (
15+
SELECT
16+
a.emp_id,
17+
a.log_date,
18+
b.log_date prev_log_date,
19+
a.log_date - b.log_date date_diff,
20+
ROW_NUMBER() OVER(
21+
PARTITION by a.emp_id,
22+
a.log_date
23+
ORDER BY
24+
b.log_date DESC
25+
) rnk
26+
FROM
27+
init a
28+
INNER JOIN init b ON a.emp_id = b.emp_id
29+
AND a.log_date >= b.log_date
30+
AND a.flag = 'Y'
31+
AND b.flag = 'Y'
32+
AND (
33+
a.lead_flag = 'N'
34+
OR a.lead_flag IS NULL
35+
)
36+
),
37+
solution AS (
38+
SELECT
39+
emp_id,
40+
log_date,
41+
MAX(rnk) consecutive_days
42+
FROM
43+
consecutive_days_tracker
44+
WHERE
45+
date_diff + 1 = rnk
46+
GROUP BY
47+
emp_id,
48+
log_date
49+
)
50+
SELECT
51+
a.emp_id,
52+
b.prev_log_date streak_start,
53+
a.log_date streak_end,
54+
a.consecutive_days streak_length
55+
FROM
56+
solution a
57+
INNER JOIN consecutive_days_tracker b ON a.emp_id = b.emp_id
58+
AND a.consecutive_days = b.rnk
59+
AND a.log_date = b.log_date
60+
WHERE
61+
a.consecutive_days > 1
Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"metadata": {},
6+
"source": [
7+
"# Create Dataframe"
8+
]
9+
},
10+
{
11+
"cell_type": "code",
12+
"execution_count": 1,
13+
"metadata": {},
14+
"outputs": [
15+
{
16+
"name": "stderr",
17+
"output_type": "stream",
18+
"text": [
19+
"Setting default log level to \"WARN\".\n",
20+
"To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).\n",
21+
"24/01/15 17:24:02 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable\n",
22+
" \r"
23+
]
24+
},
25+
{
26+
"name": "stdout",
27+
"output_type": "stream",
28+
"text": [
29+
"+-------+---------+---------+-----------+\n",
30+
"|cust_id|flight_id| origin|destination|\n",
31+
"+-------+---------+---------+-----------+\n",
32+
"| 1| Flight1| Delhi| Hyderabad|\n",
33+
"| 1| Flight2|Hyderabad| Kochi|\n",
34+
"| 1| Flight3| Kochi| Mangalore|\n",
35+
"| 2| Flight1| Mumbai| Ayodhya|\n",
36+
"| 2| Flight2| Ayodhya| Gorakhpur|\n",
37+
"+-------+---------+---------+-----------+\n",
38+
"\n"
39+
]
40+
}
41+
],
42+
"source": [
43+
"from pyspark.sql import SparkSession\n",
44+
"spark = SparkSession.builder.getOrCreate()\n",
45+
"\n",
46+
"flights_data = [(1,'Flight1' , 'Delhi' , 'Hyderabad'),\n",
47+
" (1,'Flight2' , 'Hyderabad' , 'Kochi'),\n",
48+
" (1,'Flight3' , 'Kochi' , 'Mangalore'),\n",
49+
" (2,'Flight1' , 'Mumbai' , 'Ayodhya'),\n",
50+
" (2,'Flight2' , 'Ayodhya' , 'Gorakhpur')\n",
51+
" ]\n",
52+
"\n",
53+
"_schema = \"cust_id int, flight_id string , origin string , destination string\"\n",
54+
"\n",
55+
"df_flight = spark.createDataFrame(data = flights_data , schema= _schema)\n",
56+
"df_flight.show()"
57+
]
58+
},
59+
{
60+
"cell_type": "code",
61+
"execution_count": 2,
62+
"metadata": {},
63+
"outputs": [],
64+
"source": [
65+
"from pyspark.sql.functions import col\n",
66+
"\n",
67+
"df_final_stop = df_flight.alias(\"original_df_flight\").join(df_flight.alias(\"new_df_flight\"), \n",
68+
" [col(\"original_df_flight.cust_id\") == col(\"new_df_flight.cust_id\"), \\\n",
69+
" col(\"original_df_flight.destination\") == col(\"new_df_flight.origin\")], \"left\") \\\n",
70+
" .select(col(\"original_df_flight.cust_id\"),col(\"original_df_flight.destination\"),\n",
71+
" col(\"new_df_flight.destination\").alias(\"next_stop\")) \\\n",
72+
" .where(\"next_stop is NULL\")"
73+
]
74+
},
75+
{
76+
"cell_type": "code",
77+
"execution_count": 3,
78+
"metadata": {},
79+
"outputs": [],
80+
"source": [
81+
"df_origin = df_flight.alias(\"original_df_flight\").join(df_flight.alias(\"new_df_flight\"), \n",
82+
" [col(\"original_df_flight.cust_id\") == col(\"new_df_flight.cust_id\"), \\\n",
83+
" col(\"original_df_flight.origin\") == col(\"new_df_flight.destination\")], \"left\") \\\n",
84+
" .select(col(\"original_df_flight.cust_id\"),col(\"original_df_flight.origin\"),\n",
85+
" col(\"new_df_flight.origin\").alias(\"previous_stop\")) \\\n",
86+
" .where(\"previous_stop is NULL\")"
87+
]
88+
},
89+
{
90+
"cell_type": "code",
91+
"execution_count": 4,
92+
"metadata": {},
93+
"outputs": [
94+
{
95+
"name": "stderr",
96+
"output_type": "stream",
97+
"text": [
98+
" \r"
99+
]
100+
},
101+
{
102+
"name": "stdout",
103+
"output_type": "stream",
104+
"text": [
105+
"+-------+------+-----------+\n",
106+
"|cust_id|origin|destination|\n",
107+
"+-------+------+-----------+\n",
108+
"| 1| Delhi| Mangalore|\n",
109+
"| 2|Mumbai| Gorakhpur|\n",
110+
"+-------+------+-----------+\n",
111+
"\n"
112+
]
113+
}
114+
],
115+
"source": [
116+
"df_final = df_final_stop.alias(\"destination\") \\\n",
117+
" .join(df_origin.alias(\"origin\"), df_final_stop.cust_id == df_origin.cust_id, \"inner\") \\\n",
118+
" .select(col(\"origin.cust_id\"),col(\"origin.origin\"),col(\"destination.destination\")).show()"
119+
]
120+
}
121+
],
122+
"metadata": {
123+
"kernelspec": {
124+
"display_name": "Python 3",
125+
"language": "python",
126+
"name": "python3"
127+
},
128+
"language_info": {
129+
"codemirror_mode": {
130+
"name": "ipython",
131+
"version": 3
132+
},
133+
"file_extension": ".py",
134+
"mimetype": "text/x-python",
135+
"name": "python",
136+
"nbconvert_exporter": "python",
137+
"pygments_lexer": "ipython3",
138+
"version": "3.11.6"
139+
}
140+
},
141+
"nbformat": 4,
142+
"nbformat_minor": 2
143+
}

iqsql.md

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,13 @@
22
2. Department Top 3 Salary
33
3. Employees Check-in Details
44
4. Employees Hiring [Difficult]
5-
5. Final Destination
6-
6. Highest-Grossing Items
7-
7. Increasing Sales Revenue
8-
8. Last Person to Fit in the Bus
9-
9. Manager with at least 5 direct reportees
10-
10. Mismatched IDs
11-
11. Odd and Even Measurements
12-
12. Onboarded Cities
13-
13. Qualifying Criteria
5+
5. Employees Log [Extra Difficult]
6+
6. Final Destination
7+
7. Highest-Grossing Items
8+
8. Increasing Sales Revenue
9+
9. Last Person to Fit in the Bus
10+
10. Manager with at least 5 direct reportees
11+
11. Mismatched IDs
12+
12. Odd and Even Measurements
13+
13. Onboarded Cities
14+
14. Qualifying Criteria

0 commit comments

Comments
 (0)