Skip to content

Commit 36f47bf

Browse files
John Van Schultzmeta-codesync[bot]
authored andcommitted
Refactor lib.rs bundle_typeshed function and add support for stub package paths
Summary: [Pyrefly] Refactor lib.rs bundle_typeshed function and add support for stub package paths Reviewed By: kinto0 Differential Revision: D84909554 fbshipit-source-id: 6aa186f67bc35c73debe1af1c3b2ac00a37dafa3
1 parent 158a054 commit 36f47bf

File tree

1 file changed

+183
-4
lines changed
  • crates/pyrefly_bundled/src

1 file changed

+183
-4
lines changed

crates/pyrefly_bundled/src/lib.rs

Lines changed: 183 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,40 +16,219 @@ use zstd::stream::read::Decoder;
1616

1717
const BUNDLED_TYPESHED_BYTES: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/typeshed.tar.zst"));
1818

19-
pub fn bundled_typeshed() -> anyhow::Result<SmallMap<PathBuf, String>> {
19+
enum PathFilter {
20+
/// Filter for stdlib files (typeshed/stdlib/...)
21+
Stdlib,
22+
/// Filter for third-party stubs (typeshed/stubs/package-name/...)
23+
ThirdPartyStubs,
24+
}
25+
26+
impl PathFilter {
27+
fn expected_first_component(&self) -> &str {
28+
match self {
29+
PathFilter::Stdlib => "stdlib",
30+
PathFilter::ThirdPartyStubs => "stubs",
31+
}
32+
}
33+
34+
#[allow(dead_code)]
35+
fn should_skip_next_component(&self) -> bool {
36+
match self {
37+
PathFilter::Stdlib => false,
38+
PathFilter::ThirdPartyStubs => true, // Skip package directory
39+
}
40+
}
41+
}
42+
43+
fn extract_pyi_files_from_archive(filter: PathFilter) -> anyhow::Result<SmallMap<PathBuf, String>> {
2044
let decoder = Decoder::new(BUNDLED_TYPESHED_BYTES)?;
2145
let mut archive = Archive::new(decoder);
2246
let entries = archive
2347
.entries()
2448
.context("Cannot query all entries in typeshed archive")?;
2549

2650
let mut items = SmallMap::new();
51+
2752
for maybe_entry in entries {
2853
let mut entry = maybe_entry.context("Cannot read individual entry in typeshed archive")?;
54+
2955
if entry.header().entry_type().is_dir() {
30-
// Skip directories
3156
continue;
3257
}
58+
3359
let relative_path_context = entry
3460
.path()
3561
.context("Cannot extract path from archive entry")?;
62+
3663
let mut relative_path_components = relative_path_context.components();
64+
3765
let first_component = relative_path_components.next();
38-
if first_component.is_none_or(|component| component.as_os_str() != "stdlib") {
39-
// We bundle only the stdlib/ portion of typeshed.
66+
if first_component
67+
.is_none_or(|component| component.as_os_str() != filter.expected_first_component())
68+
{
4069
continue;
4170
}
71+
4272
let relative_path = relative_path_components.collect::<PathBuf>();
4373
if relative_path.extension().is_none_or(|ext| ext != "pyi") {
4474
// typeshed/stdlib/ contains non-.pyi files like VERSIONS that we don't care about.
4575
continue;
4676
}
77+
4778
let size = entry.size();
4879
let mut contents = String::with_capacity(size as usize);
4980
entry
5081
.read_to_string(&mut contents)
5182
.context("Cannot read content of archive entry")?;
5283
items.entry(relative_path).or_insert(contents);
5384
}
85+
5486
Ok(items)
5587
}
88+
89+
pub fn bundled_typeshed() -> anyhow::Result<SmallMap<PathBuf, String>> {
90+
extract_pyi_files_from_archive(PathFilter::Stdlib)
91+
}
92+
93+
#[allow(dead_code)]
94+
pub fn bundled_third_party_stubs() -> anyhow::Result<SmallMap<PathBuf, String>> {
95+
extract_pyi_files_from_archive(PathFilter::ThirdPartyStubs)
96+
}
97+
98+
#[cfg(test)]
99+
mod tests {
100+
use super::*;
101+
102+
#[test]
103+
fn test_bundled_typeshed_returns_stdlib_files() {
104+
let result = bundled_typeshed();
105+
assert!(result.is_ok(), "bundled_typeshed should succeed");
106+
107+
let files = result.unwrap();
108+
assert!(!files.is_empty(), "Should contain stdlib .pyi files");
109+
110+
// Verify all returned paths are .pyi files
111+
for (path, _) in files.iter() {
112+
assert_eq!(
113+
path.extension().and_then(|ext| ext.to_str()),
114+
Some("pyi"),
115+
"All files should have .pyi extension, found: {:?}",
116+
path
117+
);
118+
}
119+
}
120+
121+
#[test]
122+
fn test_bundled_typeshed_paths_are_relative() {
123+
let result = bundled_typeshed().unwrap();
124+
125+
// Verify paths don't start with "stdlib" (it should be stripped)
126+
for (path, _) in result.iter() {
127+
let first_component = path.components().next();
128+
assert_ne!(
129+
first_component.and_then(|c| c.as_os_str().to_str()),
130+
Some("stdlib"),
131+
"Path should not start with 'stdlib', found: {:?}",
132+
path
133+
);
134+
}
135+
}
136+
137+
#[test]
138+
fn test_bundled_typeshed_contains_common_modules() {
139+
let files = bundled_typeshed().unwrap();
140+
141+
// Check that at least some .pyi files exist that match common module patterns
142+
let has_builtins = files.iter().any(|(path, _)| {
143+
path.to_str()
144+
.map(|s| s.contains("builtins"))
145+
.unwrap_or(false)
146+
});
147+
let has_sys = files
148+
.iter()
149+
.any(|(path, _)| path.to_str().map(|s| s.contains("sys")).unwrap_or(false));
150+
let has_os = files
151+
.iter()
152+
.any(|(path, _)| path.to_str().map(|s| s.contains("os")).unwrap_or(false));
153+
154+
assert!(
155+
has_builtins || has_sys || has_os,
156+
"Should contain at least one common stdlib module (builtins, sys, or os)"
157+
);
158+
}
159+
160+
#[test]
161+
fn test_bundled_typeshed_file_contents_not_empty() {
162+
let files = bundled_typeshed().unwrap();
163+
164+
let non_empty_count = files
165+
.iter()
166+
.filter(|(_, content)| !content.is_empty())
167+
.count();
168+
assert!(
169+
non_empty_count > 0,
170+
"Should have at least some files with content"
171+
);
172+
}
173+
174+
#[test]
175+
fn test_extract_pyi_files_from_archive_stdlib_filter() {
176+
let result = extract_pyi_files_from_archive(PathFilter::Stdlib);
177+
assert!(result.is_ok(), "Should successfully extract stdlib files");
178+
179+
let files = result.unwrap();
180+
assert!(
181+
!files.is_empty(),
182+
"Should extract at least some stdlib files"
183+
);
184+
185+
let unique_count = files.len();
186+
assert_eq!(files.len(), unique_count, "Should not have duplicate paths");
187+
}
188+
189+
#[test]
190+
fn test_path_filter_expected_first_component() {
191+
assert_eq!(
192+
PathFilter::Stdlib.expected_first_component(),
193+
"stdlib",
194+
"Stdlib filter should expect 'stdlib' component"
195+
);
196+
assert_eq!(
197+
PathFilter::ThirdPartyStubs.expected_first_component(),
198+
"stubs",
199+
"ThirdPartyStubs filter should expect 'stubs' component"
200+
);
201+
}
202+
203+
#[test]
204+
fn test_bundled_typeshed_contains_valid_python_stubs() {
205+
let files = bundled_typeshed().unwrap();
206+
207+
// Check that at least some files contain Python stub signatures
208+
let has_python_stub_content = files.iter().any(|(_, content)| {
209+
content.contains("def ") || content.contains("class ") || content.contains("import ")
210+
});
211+
212+
assert!(
213+
has_python_stub_content,
214+
"At least some files should contain Python stub content"
215+
);
216+
}
217+
218+
#[test]
219+
fn test_no_non_pyi_files_included() {
220+
let stdlib_files = bundled_typeshed().unwrap();
221+
let third_party_files = bundled_third_party_stubs().unwrap();
222+
223+
for (path, _) in stdlib_files.iter().chain(third_party_files.iter()) {
224+
let ext = path.extension().and_then(|e| e.to_str());
225+
assert_eq!(
226+
ext,
227+
Some("pyi"),
228+
"Should only include .pyi files, found extension: {:?} in path: {:?}",
229+
ext,
230+
path
231+
);
232+
}
233+
}
234+
}

0 commit comments

Comments
 (0)