-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmerge.py
131 lines (103 loc) · 4.22 KB
/
merge.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
import os
from pathlib import Path
##############################################################################
# CONFIG
##############################################################################
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
DATA_DIR = os.path.join(BASE_DIR, 'scraped_docs')
ESSENTIALS = [
# Docs
'developers_flow_com',
# GitHub Repos
'github_com_onflow_cadence_lang_org',
'github_com_onflow_flips',
'github_com_onflow_flow_core_contracts',
'github_com_onflow_flow_ft',
'github_com_onflow_flow_nft',
'github_com_onflow_nft_storefront',
'github_com_onflow_flow_evm_bridge',
'github_com_dapperlabs_nba_smart_contracts',
'github_com_dapperlabs_nfl_smart_contracts',
'github_com_outblock_frw_scripts',
]
CADENCE_DOCS = [
# GitHub Repos
'github_com_onflow_cadence_lang_org',
'github_com_onflow_flow_core_contracts',
'github_com_onflow_flow_ft',
'github_com_onflow_flow_nft',
'github_com_onflow_nft_storefront',
'github_com_onflow_flow_evm_bridge',
'github_com_dapperlabs_nba_smart_contracts',
'github_com_dapperlabs_nfl_smart_contracts',
'github_com_outblock_frw_scripts',
]
OUTPUT_DIR = os.path.join(BASE_DIR, 'merged_docs')
##############################################################################
# UTILS
##############################################################################
def ensure_dir_exists(path: str):
if not os.path.exists(path):
os.makedirs(path)
def merge_md_files(src_path: str):
"""Merge all .md files in src_path into one file"""
merged_content = []
for root, _, files in os.walk(src_path):
for file in files:
if file.endswith('.md'):
file_path = os.path.join(root, file)
with open(file_path, 'r', encoding='utf-8') as f:
merged_content.append(f.read())
merged_content.append("\n\n\n\n\n---\n\n------------ FILE_DIVIDER ------------\n\n---\n\n\n\n\n")
return ''.join(merged_content)
# Merge all docs
def merge_all_md_files(output_path: str):
"""Merge all .md files in DATA_DIR into one file"""
merged_content = []
for root, dirnames, _ in os.walk(DATA_DIR):
for dir in dirnames:
dir_path = os.path.join(root, dir)
if not os.path.exists(dir_path):
continue
merged_content.append(merge_md_files(dir_path))
with open(output_path, 'w', encoding='utf-8') as f:
f.write(''.join(merged_content))
def merge_essentials_md_files(output_path: str):
"""Merge only .md files from ESSENTIALS folders"""
merged_content = []
for essential in ESSENTIALS:
essential_dir = os.path.join(DATA_DIR, essential)
if not os.path.exists(essential_dir):
continue
merged_content.append(merge_md_files(essential_dir))
with open(output_path, 'w', encoding='utf-8') as f:
f.write(''.join(merged_content))
def merge_cadence_docs_md_files(output_path: str):
"""Merge only .md files from CADENCE_DOCS folders"""
merged_content = []
for cadence_doc in CADENCE_DOCS:
cadence_doc_dir = os.path.join(DATA_DIR, cadence_doc)
if not os.path.exists(cadence_doc_dir):
continue
merged_content.append(merge_md_files(cadence_doc_dir))
with open(output_path, 'w', encoding='utf-8') as f:
f.write(''.join(merged_content))
##############################################################################
# MAIN
##############################################################################
def main():
ensure_dir_exists(OUTPUT_DIR)
# Merge essentials only
essentials_output = os.path.join(OUTPUT_DIR, 'essentials_merged.md')
merge_essentials_md_files(essentials_output)
print(f"Merged essentials to: {essentials_output}")
# Merge cadence docs only
cadence_docs_output = os.path.join(OUTPUT_DIR, 'cadence_docs_merged.md')
merge_cadence_docs_md_files(cadence_docs_output)
print(f"Merged cadence docs to: {cadence_docs_output}")
# Merge all files
all_output = os.path.join(OUTPUT_DIR, 'all_merged.md')
merge_all_md_files(all_output)
print(f"Merged all docs to: {all_output}")
if __name__ == "__main__":
main()