-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbq_copy_project.py
131 lines (100 loc) · 3.56 KB
/
bq_copy_project.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
'''
copy BigQuery Tables to another project
the copy preserves all DAY partitions
processes the tables sequentially
'''
from google.cloud import bigquery
from bq_lib import *
def src_configure(settings):
client = bigquery.Client.from_service_account_json(settings['gcp_cfg'])
dataset = client.dataset(settings['dataset'])
if settings['only_use']:
f_filter = make_is_in(settings['only_use'])
else:
f_filter = make_is_not(make_is_in(settings['ignore']))
table_filter = make_filter_tables(f_filter)
list_tables = make_list_tables(dataset, table_filter)
copy = lambda : True
insert_select = make_insert_select(client)
make_insert_data = make_make_insert_data(insert_select)
load = make_load(make_insert_data)
return {'list_tables': list_tables,
'load': load,
'copy': copy
}
def dest_configure(settings):
client = bigquery.Client.from_service_account_json(settings['gcp_cfg'])
dataset = client.dataset(settings['dataset'])
if settings['only_use']:
f_filter = make_is_in(settings['only_use'])
else:
f_filter = make_is_not(make_is_in(settings['ignore']))
table_filter = make_filter_tables(f_filter)
list_tables = make_list_tables(dataset, table_filter)
rename = lambda x: x
copy_table = make_copy_table(dataset, [], rename)
copy = make_copy(copy_table)
insert_select = make_insert_select(client)
make_insert_data = make_make_insert_data(insert_select)
load = make_load(make_insert_data)
return {'list_tables': list_tables,
'load': load,
'copy': copy
}
def configure(options):
config = {
'dev-ostro': {
'gcp_cfg': './etc/dev-ostro/gcp.json',
'only_use': [],
'ignore' : ['storm_warn', 'weather_extern',
'weather_hist'],
'project': 'zephyrus-ef4-prod-ostro',
'dataset': 'ostro',
},
'ostro': {
'gcp_cfg': './etc/prod-ostro/gcp.json',
'only_use': [],
'ignore' : ['storm_warn', 'weather_extern',
'weather_hist'],
'project': 'zephyrus-ef4-prod-ostro',
'dataset': 'ostro',
}
}
game = options['SOURCE']
src_config = src_configure(config[game])
game = options['DESTINATION']
dest_config = dest_configure(config[game])
return src_config, dest_config
def main(options):
src, dest = configure(options)
# in Ruby or Elixir when the key of the dictionary is an Atom
# src.list_tables()
all_tables = src['list_tables']()
if options['--copy']:
dest_tables = dest['copy'](all_tables)
elif options['--load']:
dest_tables = dest['list_tables']()
dest['load'](dest_tables, all_tables)
elif options['--drop']:
dest_tables = dest['list_tables']()
drop(dest_tables)
# Options probably must start with a unique letter
# --reverse and --restore does not work
_usage="""
Copy BigQuery tables to another project.
Preserve all DAY partitions.
Usage:
bq_copy_project (--copy | --load | --drop) SOURCE DESTINATION
Arguments:
SOURCE name of the project
DESTINATION name of the project
Options:
-h --help show this
--copy create copies of all tables in DESTINATION
--load load the data into DESTINATION tables
--drop drop all tables in DESTINATION
"""
from docopt import docopt
if __name__ == '__main__':
options = docopt(_usage)
main(options)