-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtransformation_functions.py
74 lines (55 loc) · 2.17 KB
/
transformation_functions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import pandas as pd
from models import loaded_table_dict as table_dict, TableObj
import nlp_helpers
join_types = ['left', 'right', 'outer', 'inner']
def prompt_join_type():
join_type_query = input('Which type of join left, right, outer, inner\n-->')
join_type_query_tokens = nlp_helpers.ProcessLanguageTokens(query=join_type_query)
# only take first join type found in user query
join_type = join_type_query_tokens.get_matches(join_types, threshold=90)[0]
# If join type not found then ask again
if not join_type:
print('Join type not found! Try again')
return prompt_join_type()
return join_type
def prompt_join_keys(left_or_right, table_column_list):
join_keys_query = input('Join Key for the ' + left_or_right + ' Table?\n-->')
join_keys_tokens = nlp_helpers.ProcessLanguageTokens(join_keys_query)
# Get matched column with the user_query
matched_key = join_keys_tokens.get_matches(table_column_list, threshold=91)
return matched_key
def join(l_table, r_table, type=None, left_on=None, right_on=None):
l_table = l_table
r_table = r_table
type = type
if not type:
type = prompt_join_type()
left_on = left_on
if not left_on:
left_on = prompt_join_keys('Left', l_table.columns)
right_on = right_on
if not right_on:
right_on = prompt_join_keys('Right', r_table.columns)
# If left_on is empty then join on default columns
if not left_on:
print('Left Table keys not found! table will be joined on same columns')
right_on = []
joined_table = pd.merge(l_table.table, r_table.table, how=type, left_on=left_on, right_on=right_on)
return TableObj(joined_table, 'tx_table')
def union(table_a, table_b):
# Same Column name for sql union like behaviour
table_b.table.columns = table_a.table.columns
return TableObj(pd.concat([table_a.table, table_b.table], ignore_index=True), 'tx_table')
def select(table, columns):
print(table.table[columns])
return TableObj(table.table[columns], 'tx_table')
transformations_dict = {
'join': join,
'union': union,
'select': select
}
# --------------------- for debugging ----------------------------------
if __name__ == '__main__':
tableA = table_dict.get('tableA')
tableB = table_dict.get('tableB')
print(join(tableA, tableB))