Skip to content

Commit 6a0fda0

Browse files
committed
trying to implement the root dir check
1 parent dc8cce8 commit 6a0fda0

6 files changed

Lines changed: 70 additions & 94 deletions

File tree

src/core/ast_parser/ast_parser.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ impl AstParser {
3232
}
3333
}
3434

35-
pub fn parse(&self, path: &Path) {
35+
pub fn parse(&self, path: &Path, root_dirs: &Vec<String>) {
3636
let mut variables: HashMap<String, Vec<String>> = HashMap::new();
3737
let code = fs::read_to_string(path).expect("Failed to read file");
3838
let language: Language = tree_sitter_python::LANGUAGE.into();
@@ -45,6 +45,7 @@ impl AstParser {
4545
&code,
4646
&query,
4747
root_node,
48+
root_dirs,
4849
);
4950
let imports = result.0;
5051
let defines = result.1;
Lines changed: 13 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -1,49 +1,12 @@
1-
use std::process::Command;
2-
use std::path::Path;
3-
41
/// Classifies Python imports as standard library or third-party/project-specific.
52
pub struct ImportClassifier {
6-
/// List of standard library module names.
7-
stdlib_modules: Vec<String>
83
}
94

105
impl ImportClassifier {
116
/// Creates a new `ImportClassifier` and initializes the
127
/// list of standard library modules.
138
pub fn new() -> Self {
14-
let stdlib_modules: Vec<String> = [
15-
"__phello__", "_pyrepl", "asyncio", "collections", "compression",
16-
"concurrent", "ctypes", "curses", "dbm", "email", "encodings",
17-
"ensurepip", "html", "http", "idlelib", "importlib", "json",
18-
"logging", "multiprocessing", "pathlib", "profiling", "pydoc_data",
19-
"re", "site-packages", "sqlite3", "string", "sysconfig", "test",
20-
"tkinter", "tomllib", "turtledemo", "unittest", "urllib", "venv",
21-
"wsgiref", "xml", "xmlrpc", "zipfile", "zoneinfo", "__future__", "__hello__",
22-
"_aix_support", "_android_support", "_apple_support", "_ast_unparse",
23-
"_collections_abc", "_colorize", "_compat_pickle", "_ios_support", "_markupbase",
24-
"_opcode_metadata", "_osx_support", "_py_abc", "_py_warnings", "_pydatetime",
25-
"_pydecimal", "_pyio", "_pylong", "_sitebuiltins", "_strptime", "_threading_local",
26-
"_weakrefset", "abc", "annotationlib", "antigravity", "argparse", "ast", "base64",
27-
"bdb", "bisect", "bz2", "cProfile", "calendar", "cmd", "code", "codecs",
28-
"codeop", "colorsys", "compileall", "configparser", "contextlib", "contextvars",
29-
"copy", "copyreg", "csv", "dataclasses", "datetime", "decimal", "difflib",
30-
"dis", "doctest", "enum", "filecmp", "fileinput", "fnmatch", "fractions",
31-
"ftplib", "functools", "genericpath", "getopt", "getpass", "gettext", "glob",
32-
"graphlib", "gzip", "hashlib", "heapq", "hmac", "imaplib", "inspect", "io",
33-
"ipaddress", "keyword", "linecache", "locale", "lzma", "mailbox", "mimetypes",
34-
"modulefinder", "netrc", "ntpath", "nturl2path", "numbers", "opcode", "operator",
35-
"optparse", "os", "pdb", "pickle", "pickletools", "pkgutil", "platform", "plistlib",
36-
"poplib", "posixpath", "pprint", "profile", "pstats", "pty", "py_compile", "pyclbr",
37-
"pydoc", "queue", "quopri", "random", "reprlib", "rlcompleter", "runpy", "sched",
38-
"secrets", "selectors", "shelve", "shlex", "shutil", "signal", "site", "smtplib",
39-
"socket", "socketserver", "ssl", "stat", "statistics", "stringprep", "struct",
40-
"subprocess", "symtable", "tabnanny", "tarfile", "tempfile", "textwrap", "this",
41-
"threading", "timeit", "token", "tokenize", "trace", "traceback", "tracemalloc",
42-
"tty", "turtle", "types", "typing", "uuid", "warnings", "wave", "weakref",
43-
"webbrowser", "zipapp", "zipimport"
44-
].iter().map(|s| s.to_string()).collect();
45-
46-
Self {stdlib_modules}
9+
Self {}
4710
}
4811

4912
/// Returns `true` if the import is project level.
@@ -59,38 +22,17 @@ impl ImportClassifier {
5922
/// assert_eq!(classifier.is_eligible(&"infra.db.modules".to_string()), true);
6023
/// assert_eq!(classifier.is_eligible(&"os".to_string()), false);
6124
/// ```
62-
pub fn is_eligible(&self, import: &String) -> bool {
63-
// Checks is import from standard
64-
// library or not
65-
true
66-
// if self.stdlib_modules.contains(import) {
67-
// return false;
68-
// } else if self.is_third_party(import) {
69-
// return false;
70-
// } else {
71-
// return true;
72-
// }
73-
}
25+
pub fn is_eligible(
26+
&self,
27+
import: &String,
28+
root_dirs: &Vec<String>,
29+
) -> bool {
7430

75-
// fn is_third_party(&self, import: &str) -> bool {
76-
// let site_packages = self.get_site_packages();
77-
// for sp in site_packages {
78-
// let path = Path::new(&sp).join(import);
79-
// println!("{}", path.to_string_lossy());
80-
// if path.exists() {
81-
// return true;
82-
// }
83-
// }
84-
// false
85-
// }
86-
87-
// fn get_site_packages(&self) -> Vec<String> {
88-
// let output = Command::new("python3")
89-
// .args(&["-c", "import site; print('\\n'.join(site.getsitepackages()))"])
90-
// .output()
91-
// .expect("Failed to run python3");
92-
93-
// let stdout = String::from_utf8_lossy(&output.stdout);
94-
// stdout.lines().map(|s| s.to_string()).collect()
95-
// }
31+
for dir in root_dirs {
32+
if import.starts_with(dir) {
33+
return true;
34+
}
35+
}
36+
return false;
37+
}
9638
}

src/core/ast_parser/tree_analyzer.rs

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
use std::fmt::format;
2-
31
use tree_sitter::{Query, Node, QueryCursor, StreamingIterator};
42

53
use crate::core::ast_parser::import_classifier::ImportClassifier;
@@ -23,6 +21,7 @@ impl TreeAnalyzer {
2321
code: &str,
2422
query: &Query,
2523
root_node: Node,
24+
root_dirs: &Vec<String>,
2625
) -> (Vec<String>, Vec<String>, Vec<String>) {
2726

2827
let mut imports: Vec<String> = vec![];
@@ -49,7 +48,7 @@ impl TreeAnalyzer {
4948
}
5049
"import" => {
5150
let text = self.dot_name.get(&code, cap.node);
52-
let is_eligible: bool = self.import_classifier.is_eligible(&text);
51+
let is_eligible: bool = self.import_classifier.is_eligible(&text, root_dirs);
5352
if is_eligible {
5453
imports.push(text);
5554
}
@@ -58,6 +57,7 @@ impl TreeAnalyzer {
5857
let text = self.dot_name.get(&code, cap.node);
5958
if !text.starts_with("__") & !text.ends_with("__") {
6059
let mut parent = cap.node.parent();
60+
let mut found_class = false;
6161
while let Some(p) = parent {
6262
if p.kind() == "class_definition" {
6363
if let Some(name_node) = p.child_by_field_name("name") {
@@ -66,17 +66,21 @@ impl TreeAnalyzer {
6666
.to_string();
6767
let class_method = format!("{class}.{text}");
6868
defines.push(class_method);
69+
found_class = true;
6970
}
7071
}
7172
parent = p.parent();
7273
}
74+
if !found_class {
75+
defines.push(text);
76+
}
7377
}
7478
}
7579
_ => {}
7680
}
7781
}
7882
if let Some(m) = module {
79-
let is_eligible: bool = self.import_classifier.is_eligible(&m);
83+
let is_eligible: bool = self.import_classifier.is_eligible(&m, root_dirs);
8084
if is_eligible {
8185
for n in names {
8286
let full_import = format!("{}.{}", m, n);

src/core/file_walker/file_walker.rs

Lines changed: 40 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
use std::fs;
2+
use std::fs::DirEntry;
23
use std::path::Path;
34

45
use regex::Regex;
@@ -22,34 +23,58 @@ impl FileWalker {
2223
}
2324

2425
pub fn run(&self, path: &Path) {
25-
self.walk(path);
26-
self.file_hasher.write_to_file();
26+
let root_dirs = self.get_root_dirs(path);
27+
28+
self.walk(path, &root_dirs);
2729
self.graph_creator.write_to_file();
2830
}
2931

30-
pub fn walk(&self, path: &Path) {
31-
let entries: Vec<_> = fs::read_dir(path)
32-
.unwrap()
33-
.map(|e| e.unwrap())
34-
.collect();
35-
32+
pub fn walk(&self, path: &Path, root_dirs: &Vec<String>) {
33+
let entries = self.get_entries(path);
3634
entries.par_iter().for_each(|entry| {
3735
if entry.file_type().unwrap().is_dir() {
38-
self.walk(&entry.path());
36+
self.walk(&entry.path(), root_dirs);
3937
} else if self.is_eligible(entry.path().to_str().unwrap()) {
4038
if !entry.path().to_str().unwrap().contains("test") {
4139
self.file_hasher.hash(&entry.path());
4240
}
43-
self.graph_creator.create_graph(&entry.path());
41+
self.graph_creator.create_graph(&entry.path(), &root_dirs);
4442
}
4543
});
4644
}
4745

46+
fn get_entries(&self, path: &Path) -> Vec<DirEntry> {
47+
let entries: Vec<_> = fs::read_dir(path)
48+
.unwrap()
49+
.map(|e| e.unwrap())
50+
.collect();
51+
52+
return entries
53+
}
54+
4855
fn is_eligible(&self, path: &str) -> bool {
49-
let ignore_regex = Regex::new(r"(^|/)(__+).*\.py$").unwrap();
50-
return path.ends_with(".py")
51-
&& !path.contains("venv")
52-
&& !path.contains("test")
53-
&& !ignore_regex.is_match(path)
56+
let ignore_regex = Regex::new(
57+
r"^(?!.*venv)(?!.*test)(?!.*(?:^|/)(__+).+\.py$).+\.py$"
58+
).unwrap();
59+
return !ignore_regex.is_match(path)
60+
}
61+
62+
fn is_eligible_dir(&self, path: &str) -> bool {
63+
let dir_regex = Regex::new(r"^(?!.*cache)(?!__.*__)(?!\.venv).*").unwrap();
64+
return dir_regex.is_match(path);
65+
}
66+
67+
fn get_root_dirs(&self, path: &Path) -> Vec<String> {
68+
let mut root_dirs = vec![];
69+
let entries = self.get_entries(path);
70+
for root_entry in entries {
71+
if root_entry.path().is_dir() {
72+
if self.is_eligible_dir(root_entry.path().to_str().unwrap()) {
73+
println!("{}", path.display().to_string());
74+
root_dirs.push(path.display().to_string());
75+
}
76+
}
77+
}
78+
return root_dirs
5479
}
5580
}

src/core/graph_creator/graph_creator.rs

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,12 @@ impl GraphCreator {
1313
Self {ast_data: HashMap::new(), ast_parser: AstParser::new()}
1414
}
1515

16-
pub fn create_graph(&self, path: &Path) {
17-
self.ast_parser.parse(path);
16+
pub fn create_graph(
17+
&self,
18+
path: &Path,
19+
root_dirs: &Vec<String>,
20+
) {
21+
self.ast_parser.parse(path, root_dirs);
1822
}
1923

2024
pub fn write_to_file(&self) {

src/core/project_intializer/project_initializer.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ impl ProjectInitializer {
4949
/// Returns an `io::Error` if configuration creation fails.
5050
pub fn init(&mut self) -> io::Result<()> {
5151
self.config_creator.create_config()?;
52-
self.file_walker.run(Path::new("./"));
52+
self.file_walker.run(Path::new("."));
5353
Ok(())
5454
}
5555
}

0 commit comments

Comments
 (0)