Skip to content

Commit 4316d71

Browse files
committed
feat: add Identity.ModPath to identify build unit (ex: go module) (cloudwego#42)
* refactor: add `Identity.RepoPath` to locate repo * not seperater internal/external * feat: support multi modules in one repo * fix * rust tmp * tmp * binary * server interface * rename 'Module' to 'ModulePath'
1 parent 8a19aab commit 4316d71

13 files changed

Lines changed: 924 additions & 531 deletions

File tree

go_ast

5.87 MB
Binary file not shown.

src/compress/compress.rs

Lines changed: 91 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,14 @@
1-
use std::clone;
21
use std::collections::HashMap;
32
use std::error::Error;
4-
use std::hash::Hash;
53
use std::ops::Add;
64

75
use async_recursion::async_recursion;
86
// Add these imports at the beginning of your file
9-
use serde::{Deserialize, Serialize};
107

11-
use llm::ollama::ollama_compress;
128
use types::types::{
139
CalledType, Identity, KeyValueType, Repository, ToCompressFunc, ToCompressType,
1410
};
1511

16-
use crate::compress::compress;
17-
use crate::compress::llm;
1812
use crate::compress::llm::coze::coze_compress;
1913
use crate::compress::types;
2014
use crate::storage::cache::get_cache;
@@ -29,21 +23,17 @@ pub async fn compress_all(repo: &mut Repository) {
2923
let mut to_compress_func = Vec::new();
3024
let mut to_compress_type = Vec::new();
3125

32-
for (_, pkg) in &repo.packages {
33-
for (_, func) in &pkg.functions {
34-
let id = Identity {
35-
pkg_path: func.pkg_path.clone(),
36-
name: func.name.clone(),
37-
};
38-
to_compress_func.push(id)
39-
}
26+
for (_, _mod) in &repo.modules {
27+
for (_, pkg) in &_mod.packages {
28+
for (_, func) in &pkg.functions {
29+
let id = func.id();
30+
to_compress_func.push(id)
31+
}
4032

41-
for (_, _type) in &pkg.types {
42-
let id = Identity {
43-
pkg_path: _type.pkg_path.clone(),
44-
name: _type.name.clone(),
45-
};
46-
to_compress_type.push(id)
33+
for (_, _type) in &pkg.types {
34+
let id = _type.id();
35+
to_compress_type.push(id)
36+
}
4737
}
4838
}
4939

@@ -57,24 +47,32 @@ pub async fn compress_all(repo: &mut Repository) {
5747
cascade_compress_struct(&id, repo, &mut m).await;
5848
}
5949

60-
for (id, pkg) in repo.packages.clone().iter() {
61-
if pkg.compress_data.is_none() {
62-
compress_package(id, repo).await;
63-
} else {
64-
println!("package {} is already compressed, skip it.", id);
50+
for (mname, _mod) in &repo.clone().modules {
51+
for (id, pkg) in &_mod.packages {
52+
if pkg.compress_data.is_none() {
53+
compress_package(&id, mname, repo).await;
54+
} else {
55+
println!("package {} is already compressed, skip it.", id);
56+
}
6557
}
6658
}
6759
}
6860

69-
pub async fn compress_package(id: &str, repo: &mut Repository) {
61+
pub async fn compress_package(id: &str, module: &str, repo: &mut Repository) {
7062
println!("start to compress package: {}", id);
71-
let source = repo.packages.get(id).unwrap().export_api().to_string();
72-
let compress_data = llm_compress_package(source.as_str()).await;
63+
let pkg = repo
64+
.modules
65+
.get_mut(module)
66+
.unwrap()
67+
.packages
68+
.get_mut(id)
69+
.unwrap();
70+
71+
let compress_data = llm_compress_package(pkg.export_api().as_str()).await;
7372
if compress_data.is_none() {
7473
return;
7574
}
7675
let compress_data = compress_data.unwrap();
77-
let pkg = repo.packages.get_mut(id).unwrap();
7876
pkg.compress_data = Some(compress_data);
7977
repo.save_to_cache();
8078
println!("finish to compress package: {}", id);
@@ -90,6 +88,9 @@ pub async fn cascade_compress_function(
9088

9189
{
9290
let func_opt = repo
91+
.modules
92+
.get(&id.mod_path)
93+
.unwrap()
9394
.packages
9495
.get(id.pkg_path.as_str())
9596
.unwrap()
@@ -108,7 +109,7 @@ pub async fn cascade_compress_function(
108109
}
109110

110111
// Start to compress internal function callls
111-
if let Some(calls) = &func_opt.internal_function_calls {
112+
if let Some(calls) = &func_opt.function_calls {
112113
for (_, f) in calls {
113114
if f.name == id.name && f.pkg_path == id.pkg_path {
114115
println!("find a recursive function: {}", f.name);
@@ -119,17 +120,14 @@ pub async fn cascade_compress_function(
119120
println!("find a calling cycle: {}", compress_key);
120121
continue;
121122
}
122-
let id = Identity {
123-
pkg_path: f.pkg_path.clone(),
124-
name: f.name.clone(),
125-
};
123+
let id = f.clone();
126124
m.insert(compress_key, true);
127125
to_compress.push(id);
128126
}
129127
}
130128

131129
// Start to compress internal method_calls
132-
if let Some(calls) = &func_opt.internal_method_calls {
130+
if let Some(calls) = &func_opt.method_calls {
133131
for (_, f) in calls {
134132
if f.name == id.name && f.pkg_path == id.pkg_path {
135133
println!("find a recursive method: {}", f.name);
@@ -140,10 +138,7 @@ pub async fn cascade_compress_function(
140138
println!("find a calling cycle: {}", compress_key);
141139
continue;
142140
}
143-
let id = Identity {
144-
pkg_path: f.pkg_path.clone(),
145-
name: f.name.clone(),
146-
};
141+
let id = f.clone();
147142
m.insert(compress_key, true);
148143
to_compress.push(id);
149144
}
@@ -158,6 +153,9 @@ pub async fn cascade_compress_function(
158153
let mut map = HashMap::new();
159154
let content = {
160155
let func_opt = repo
156+
.modules
157+
.get(&id.mod_path)
158+
.unwrap()
161159
.packages
162160
.get(id.pkg_path.as_str())
163161
.unwrap()
@@ -166,14 +164,22 @@ pub async fn cascade_compress_function(
166164
.unwrap();
167165

168166
// Add the compress data of internal function calls
169-
if let Some(calls) = &func_opt.internal_function_calls {
167+
if let Some(calls) = &func_opt.function_calls {
170168
for (k, f) in calls {
169+
// TODO: compress extrenal symbol too
170+
if !repo.contains(f) {
171+
continue;
172+
}
173+
171174
if f.name == id.name && f.pkg_path == id.pkg_path {
172175
println!("find a recursive function: {}", f.name);
173176
continue;
174177
}
175178

176179
let sub_function = repo
180+
.modules
181+
.get(&f.mod_path)
182+
.unwrap()
177183
.packages
178184
.get(f.pkg_path.as_str())
179185
.unwrap()
@@ -196,14 +202,22 @@ pub async fn cascade_compress_function(
196202
}
197203

198204
// Add the compress data of internal method calls
199-
if let Some(calls) = &func_opt.internal_method_calls {
205+
if let Some(calls) = &func_opt.method_calls {
200206
for (k, f) in calls {
207+
// TODO: compress extrenal symbol too
208+
if !repo.contains(f) {
209+
continue;
210+
}
211+
201212
if f.name == id.name && f.pkg_path == id.pkg_path {
202213
println!("find a recursive method: {}", f.name);
203214
continue;
204215
}
205216

206217
let sub_function = repo
218+
.modules
219+
.get(&f.mod_path)
220+
.unwrap()
207221
.packages
208222
.get(f.pkg_path.as_str())
209223
.unwrap()
@@ -221,42 +235,6 @@ pub async fn cascade_compress_function(
221235
}
222236
}
223237

224-
// Add the compress data of third party functions/methods
225-
let mut cache = get_cache();
226-
if let Some(calls) = &func_opt.third_party_function_calls {
227-
for (k, f) in calls {
228-
if let Some(repo) = load_repo(&mut cache, &pkg_name_to_repo_name(&f.pkg_path)) {
229-
if let Some(f) = repo.get_func(f) {
230-
if let Some(compress_data) = f.compress_data.clone() {
231-
map.insert(k.clone(), compress_data.clone());
232-
}
233-
} else {
234-
eprintln!("do not find {} in repo: {}", &f.name, &f.pkg_path);
235-
}
236-
} else {
237-
println!("meet a third party repo which we haven't compressed before.")
238-
}
239-
}
240-
}
241-
if let Some(calls) = &func_opt.third_party_method_calls {
242-
for (k, f) in calls {
243-
if let Some(repo) = load_repo(&mut cache, &pkg_name_to_repo_name(&f.pkg_path)) {
244-
if let Some(f) = repo.get_func(f) {
245-
if let Some(compress_data) = f.compress_data.clone() {
246-
map.insert(k.clone(), compress_data.clone());
247-
}
248-
} else {
249-
eprintln!("do not find {} in repo: {}", &f.name, &f.pkg_path);
250-
}
251-
} else {
252-
println!(
253-
"meet a third party repo which we haven't compressed before: {}",
254-
&f.pkg_path
255-
);
256-
}
257-
}
258-
}
259-
260238
println!("start to compress function: {}", func_opt.name);
261239
if func_opt.content.is_empty() {
262240
println!("content is empty skip it");
@@ -267,6 +245,9 @@ pub async fn cascade_compress_function(
267245
};
268246

269247
let func_opt = repo
248+
.modules
249+
.get_mut(&id.mod_path)
250+
.unwrap()
270251
.packages
271252
.get_mut(id.pkg_path.as_str())
272253
.unwrap()
@@ -314,6 +295,9 @@ pub async fn cascade_compress_struct(
314295

315296
{
316297
let struct_opt = repo
298+
.modules
299+
.get(&id.mod_path)
300+
.unwrap()
317301
.packages
318302
.get(id.pkg_path.as_str())
319303
.unwrap()
@@ -334,7 +318,8 @@ pub async fn cascade_compress_struct(
334318
// Start to compress sub struct
335319
if let Some(sub) = &stru.sub_struct {
336320
for (_, f) in sub {
337-
if !f.pkg_path.starts_with(&repo.mod_name) {
321+
// TODO: compress extrenal symbol too
322+
if !repo.contains(f) {
338323
continue;
339324
}
340325

@@ -344,10 +329,7 @@ pub async fn cascade_compress_struct(
344329
continue;
345330
}
346331

347-
let id = Identity {
348-
pkg_path: f.pkg_path.clone(),
349-
name: f.name.clone(),
350-
};
332+
let id = f.clone();
351333
to_compress.push(id);
352334
m.insert(compress_key, true);
353335
}
@@ -356,7 +338,8 @@ pub async fn cascade_compress_struct(
356338
// Start to compress inline struct
357339
if let Some(inline) = &stru.inline_struct {
358340
for (_, f) in inline {
359-
if !f.pkg_path.starts_with(&repo.mod_name) {
341+
// TODO: compress extrenal symbol too
342+
if !repo.contains(f) {
360343
continue;
361344
}
362345

@@ -366,10 +349,7 @@ pub async fn cascade_compress_struct(
366349
continue;
367350
}
368351

369-
let id = Identity {
370-
pkg_path: f.pkg_path.clone(),
371-
name: f.name.clone(),
372-
};
352+
let id = f.clone();
373353
to_compress.push(id);
374354
m.insert(compress_key, true);
375355
}
@@ -386,6 +366,9 @@ pub async fn cascade_compress_struct(
386366
let mut method_map = HashMap::new();
387367
let content = {
388368
let _type = repo
369+
.modules
370+
.get(&id.mod_path)
371+
.unwrap()
389372
.packages
390373
.get(id.pkg_path.as_str())
391374
.unwrap()
@@ -399,7 +382,12 @@ pub async fn cascade_compress_struct(
399382
// Add the compress data of sub struct
400383
if let Some(subs) = &_type.sub_struct {
401384
for (k, f) in subs {
402-
let pkg = repo.packages.get(f.pkg_path.as_str());
385+
let pkg = repo
386+
.modules
387+
.get(&f.mod_path)
388+
.unwrap()
389+
.packages
390+
.get(f.pkg_path.as_str());
403391
if pkg.is_none() {
404392
// try to load third party struct
405393
if let Some(repo) = load_repo(&mut cache, &pkg_name_to_repo_name(&f.pkg_path)) {
@@ -430,7 +418,12 @@ pub async fn cascade_compress_struct(
430418
// Add the compress data of inline struct
431419
if let Some(inlines) = &_type.inline_struct {
432420
for (k, f) in inlines {
433-
let pkg = repo.packages.get(f.pkg_path.as_str());
421+
let pkg = repo
422+
.modules
423+
.get(&f.mod_path)
424+
.unwrap()
425+
.packages
426+
.get(f.pkg_path.as_str());
434427
if pkg.is_none() {
435428
// try to load third party struct
436429
if let Some(repo) = load_repo(&mut cache, &pkg_name_to_repo_name(&f.pkg_path)) {
@@ -446,6 +439,9 @@ pub async fn cascade_compress_struct(
446439
continue;
447440
}
448441
let inline = repo
442+
.modules
443+
.get(&f.mod_path)
444+
.unwrap()
449445
.packages
450446
.get(f.pkg_path.as_str())
451447
.unwrap()
@@ -467,6 +463,9 @@ pub async fn cascade_compress_struct(
467463
if let Some(methods) = &_type.methods {
468464
for (k, f) in methods {
469465
let func = repo
466+
.modules
467+
.get(&f.mod_path)
468+
.unwrap()
470469
.packages
471470
.get(f.pkg_path.as_str())
472471
.unwrap()
@@ -491,6 +490,9 @@ pub async fn cascade_compress_struct(
491490
};
492491

493492
let mut type_opt = repo
493+
.modules
494+
.get_mut(&id.mod_path)
495+
.unwrap()
494496
.packages
495497
.get_mut(id.pkg_path.as_str())
496498
.unwrap()

0 commit comments

Comments
 (0)