@@ -16,40 +16,219 @@ use zstd::stream::read::Decoder;
1616
1717const BUNDLED_TYPESHED_BYTES : & [ u8 ] = include_bytes ! ( concat!( env!( "OUT_DIR" ) , "/typeshed.tar.zst" ) ) ;
1818
19- pub fn bundled_typeshed ( ) -> anyhow:: Result < SmallMap < PathBuf , String > > {
19+ enum PathFilter {
20+ /// Filter for stdlib files (typeshed/stdlib/...)
21+ Stdlib ,
22+ /// Filter for third-party stubs (typeshed/stubs/package-name/...)
23+ ThirdPartyStubs ,
24+ }
25+
26+ impl PathFilter {
27+ fn expected_first_component ( & self ) -> & str {
28+ match self {
29+ PathFilter :: Stdlib => "stdlib" ,
30+ PathFilter :: ThirdPartyStubs => "stubs" ,
31+ }
32+ }
33+
34+ #[ allow( dead_code) ]
35+ fn should_skip_next_component ( & self ) -> bool {
36+ match self {
37+ PathFilter :: Stdlib => false ,
38+ PathFilter :: ThirdPartyStubs => true , // Skip package directory
39+ }
40+ }
41+ }
42+
43+ fn extract_pyi_files_from_archive ( filter : PathFilter ) -> anyhow:: Result < SmallMap < PathBuf , String > > {
2044 let decoder = Decoder :: new ( BUNDLED_TYPESHED_BYTES ) ?;
2145 let mut archive = Archive :: new ( decoder) ;
2246 let entries = archive
2347 . entries ( )
2448 . context ( "Cannot query all entries in typeshed archive" ) ?;
2549
2650 let mut items = SmallMap :: new ( ) ;
51+
2752 for maybe_entry in entries {
2853 let mut entry = maybe_entry. context ( "Cannot read individual entry in typeshed archive" ) ?;
54+
2955 if entry. header ( ) . entry_type ( ) . is_dir ( ) {
30- // Skip directories
3156 continue ;
3257 }
58+
3359 let relative_path_context = entry
3460 . path ( )
3561 . context ( "Cannot extract path from archive entry" ) ?;
62+
3663 let mut relative_path_components = relative_path_context. components ( ) ;
64+
3765 let first_component = relative_path_components. next ( ) ;
38- if first_component. is_none_or ( |component| component. as_os_str ( ) != "stdlib" ) {
39- // We bundle only the stdlib/ portion of typeshed.
66+ if first_component
67+ . is_none_or ( |component| component. as_os_str ( ) != filter. expected_first_component ( ) )
68+ {
4069 continue ;
4170 }
71+
4272 let relative_path = relative_path_components. collect :: < PathBuf > ( ) ;
4373 if relative_path. extension ( ) . is_none_or ( |ext| ext != "pyi" ) {
4474 // typeshed/stdlib/ contains non-.pyi files like VERSIONS that we don't care about.
4575 continue ;
4676 }
77+
4778 let size = entry. size ( ) ;
4879 let mut contents = String :: with_capacity ( size as usize ) ;
4980 entry
5081 . read_to_string ( & mut contents)
5182 . context ( "Cannot read content of archive entry" ) ?;
5283 items. entry ( relative_path) . or_insert ( contents) ;
5384 }
85+
5486 Ok ( items)
5587}
88+
89+ pub fn bundled_typeshed ( ) -> anyhow:: Result < SmallMap < PathBuf , String > > {
90+ extract_pyi_files_from_archive ( PathFilter :: Stdlib )
91+ }
92+
93+ #[ allow( dead_code) ]
94+ pub fn bundled_third_party_stubs ( ) -> anyhow:: Result < SmallMap < PathBuf , String > > {
95+ extract_pyi_files_from_archive ( PathFilter :: ThirdPartyStubs )
96+ }
97+
98+ #[ cfg( test) ]
99+ mod tests {
100+ use super :: * ;
101+
102+ #[ test]
103+ fn test_bundled_typeshed_returns_stdlib_files ( ) {
104+ let result = bundled_typeshed ( ) ;
105+ assert ! ( result. is_ok( ) , "bundled_typeshed should succeed" ) ;
106+
107+ let files = result. unwrap ( ) ;
108+ assert ! ( !files. is_empty( ) , "Should contain stdlib .pyi files" ) ;
109+
110+ // Verify all returned paths are .pyi files
111+ for ( path, _) in files. iter ( ) {
112+ assert_eq ! (
113+ path. extension( ) . and_then( |ext| ext. to_str( ) ) ,
114+ Some ( "pyi" ) ,
115+ "All files should have .pyi extension, found: {:?}" ,
116+ path
117+ ) ;
118+ }
119+ }
120+
121+ #[ test]
122+ fn test_bundled_typeshed_paths_are_relative ( ) {
123+ let result = bundled_typeshed ( ) . unwrap ( ) ;
124+
125+ // Verify paths don't start with "stdlib" (it should be stripped)
126+ for ( path, _) in result. iter ( ) {
127+ let first_component = path. components ( ) . next ( ) ;
128+ assert_ne ! (
129+ first_component. and_then( |c| c. as_os_str( ) . to_str( ) ) ,
130+ Some ( "stdlib" ) ,
131+ "Path should not start with 'stdlib', found: {:?}" ,
132+ path
133+ ) ;
134+ }
135+ }
136+
137+ #[ test]
138+ fn test_bundled_typeshed_contains_common_modules ( ) {
139+ let files = bundled_typeshed ( ) . unwrap ( ) ;
140+
141+ // Check that at least some .pyi files exist that match common module patterns
142+ let has_builtins = files. iter ( ) . any ( |( path, _) | {
143+ path. to_str ( )
144+ . map ( |s| s. contains ( "builtins" ) )
145+ . unwrap_or ( false )
146+ } ) ;
147+ let has_sys = files
148+ . iter ( )
149+ . any ( |( path, _) | path. to_str ( ) . map ( |s| s. contains ( "sys" ) ) . unwrap_or ( false ) ) ;
150+ let has_os = files
151+ . iter ( )
152+ . any ( |( path, _) | path. to_str ( ) . map ( |s| s. contains ( "os" ) ) . unwrap_or ( false ) ) ;
153+
154+ assert ! (
155+ has_builtins || has_sys || has_os,
156+ "Should contain at least one common stdlib module (builtins, sys, or os)"
157+ ) ;
158+ }
159+
160+ #[ test]
161+ fn test_bundled_typeshed_file_contents_not_empty ( ) {
162+ let files = bundled_typeshed ( ) . unwrap ( ) ;
163+
164+ let non_empty_count = files
165+ . iter ( )
166+ . filter ( |( _, content) | !content. is_empty ( ) )
167+ . count ( ) ;
168+ assert ! (
169+ non_empty_count > 0 ,
170+ "Should have at least some files with content"
171+ ) ;
172+ }
173+
174+ #[ test]
175+ fn test_extract_pyi_files_from_archive_stdlib_filter ( ) {
176+ let result = extract_pyi_files_from_archive ( PathFilter :: Stdlib ) ;
177+ assert ! ( result. is_ok( ) , "Should successfully extract stdlib files" ) ;
178+
179+ let files = result. unwrap ( ) ;
180+ assert ! (
181+ !files. is_empty( ) ,
182+ "Should extract at least some stdlib files"
183+ ) ;
184+
185+ let unique_count = files. len ( ) ;
186+ assert_eq ! ( files. len( ) , unique_count, "Should not have duplicate paths" ) ;
187+ }
188+
189+ #[ test]
190+ fn test_path_filter_expected_first_component ( ) {
191+ assert_eq ! (
192+ PathFilter :: Stdlib . expected_first_component( ) ,
193+ "stdlib" ,
194+ "Stdlib filter should expect 'stdlib' component"
195+ ) ;
196+ assert_eq ! (
197+ PathFilter :: ThirdPartyStubs . expected_first_component( ) ,
198+ "stubs" ,
199+ "ThirdPartyStubs filter should expect 'stubs' component"
200+ ) ;
201+ }
202+
203+ #[ test]
204+ fn test_bundled_typeshed_contains_valid_python_stubs ( ) {
205+ let files = bundled_typeshed ( ) . unwrap ( ) ;
206+
207+ // Check that at least some files contain Python stub signatures
208+ let has_python_stub_content = files. iter ( ) . any ( |( _, content) | {
209+ content. contains ( "def " ) || content. contains ( "class " ) || content. contains ( "import " )
210+ } ) ;
211+
212+ assert ! (
213+ has_python_stub_content,
214+ "At least some files should contain Python stub content"
215+ ) ;
216+ }
217+
218+ #[ test]
219+ fn test_no_non_pyi_files_included ( ) {
220+ let stdlib_files = bundled_typeshed ( ) . unwrap ( ) ;
221+ let third_party_files = bundled_third_party_stubs ( ) . unwrap ( ) ;
222+
223+ for ( path, _) in stdlib_files. iter ( ) . chain ( third_party_files. iter ( ) ) {
224+ let ext = path. extension ( ) . and_then ( |e| e. to_str ( ) ) ;
225+ assert_eq ! (
226+ ext,
227+ Some ( "pyi" ) ,
228+ "Should only include .pyi files, found extension: {:?} in path: {:?}" ,
229+ ext,
230+ path
231+ ) ;
232+ }
233+ }
234+ }
0 commit comments