Skip to content

Commit ddc6a46

Browse files
authored
Adds support for pulling packages from PyPI (#273)
This PR adds experimental support for pulling packages from the PyPI repository. Such dependencies must be annotated with the `pypi:` prefix and have no special support in the linker phase. <!-- CURSOR_SUMMARY --> --- > [!NOTE] > **Medium Risk** > Adds a new network-backed package source and resolution logic (PEP 440 parsing, metadata-driven version selection, dependency inference), which can affect install determinism and compatibility across registries. > > **Overview** > Adds *experimental* `pypi:` support end-to-end: new `Range`/`Reference` variants plus PEP 440 parsing (`PypiVersion`/`PypiSpecifierSet`) via `pep440_rs`, and serialization coverage in `Descriptor`/`Locator`. > > `zpm` now resolves `pypi:` ranges/tags by querying PyPI JSON metadata, selecting a wheel artifact, and deriving dependencies from `requires_dist` (ignoring marker-based entries), then fetches and caches the wheel bytes as a `.zip` artifact. Content flag extraction is skipped for PyPI archives. > > Acceptance tests add a mock PyPI API to the test server and new protocol tests validating byte-for-byte caching, range resolution, and `requires_dist` handling. > > <sup>Reviewed by [Cursor Bugbot](https://cursor.com/bugbot) for commit f29e955. Bugbot is set up for automated code reviews on this repo. Configure [here](https://www.cursor.com/dashboard/bugbot).</sup> <!-- /CURSOR_SUMMARY -->
1 parent cd6d95a commit ddc6a46

22 files changed

Lines changed: 1179 additions & 6 deletions

File tree

Cargo.lock

Lines changed: 19 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ reqwest = { version = "0.12.26", default-features = false, features = ["gzip", "
8282
regex = "1.10.6"
8383
ring = "0.17.14"
8484
rstest = "0.26.1"
85+
pep440_rs = "0.7.3"
8586
serde_plain = "1.0.2"
8687
serde_with = "3.9.0"
8788
serde_yaml = "0.9.34"

packages/zpm-primitives/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ edition = "2024"
55

66
[dependencies]
77
colored = { workspace = true }
8+
pep440_rs = { workspace = true }
89
regex = { workspace = true }
910
rkyv = { workspace = true, features = ["bytecheck"] }
1011
serde_plain = { workspace = true }

packages/zpm-primitives/src/descriptor.rs

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,11 @@ impl Descriptor {
9191
version: params.version,
9292
}.into(),
9393

94+
Reference::PypiRegistry(params) if params.ident == self.ident && params.url.is_none() => reference::PypiShorthandReference {
95+
version: params.version,
96+
url: None,
97+
}.into(),
98+
9499
_ => reference,
95100
};
96101

@@ -259,7 +264,8 @@ impl_file_string_serialization!(Descriptor);
259264

260265
#[rstest]
261266
#[case("foo@npm:1.0.0")]
262-
#[case("foo@npm:1.0.0::parent=root@workspace:")]
267+
#[case("foo@pypi:1.0.0")]
268+
#[case("foo@npm:1.0.0::parent=root@workspace:.")]
263269
fn test_descriptor_serialization(#[case] str: &str) {
264270
assert_eq!(str, Descriptor::from_file_string(str).unwrap().to_file_string());
265271
}

packages/zpm-primitives/src/lib.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ mod descriptor_filter;
44
mod descriptor_resolution;
55
mod descriptor_semver;
66
mod descriptor;
7+
mod pypi;
78
mod range_peer;
89
mod range;
910
mod reference;
@@ -17,6 +18,7 @@ pub use descriptor_filter::*;
1718
pub use descriptor_resolution::*;
1819
pub use descriptor_semver::*;
1920
pub use descriptor::*;
21+
pub use pypi::*;
2022
pub use range_peer::*;
2123
pub use range::*;
2224
pub use reference::*;

packages/zpm-primitives/src/locator.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,8 @@ impl_file_string_serialization!(Locator);
158158

159159
#[rstest]
160160
#[case("foo@npm:1.0.0")]
161-
#[case("foo@npm:1.0.0::parent=root@workspace:")]
161+
#[case("foo@pypi:1.0.0")]
162+
#[case("foo@npm:1.0.0::parent=root@workspace:.")]
162163
fn test_locator_serialization(#[case] str: &str) {
163164
assert_eq!(str, Locator::from_file_string(str).unwrap().to_file_string());
164165
}
Lines changed: 232 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,232 @@
1+
use std::{cmp::Ordering, str::FromStr};
2+
3+
use rkyv::Archive;
4+
use zpm_semver::VersionRc;
5+
use zpm_utils::{DataType, EcoVec, FromFileString, ToFileString, ToHumanString, impl_file_string_from_str, impl_file_string_serialization};
6+
7+
#[derive(thiserror::Error, Clone, Debug, PartialEq, Eq)]
8+
pub enum PypiError {
9+
#[error("Invalid PEP 440 version: {0}")]
10+
InvalidVersion(String),
11+
12+
#[error("Invalid PEP 440 specifier set: {0}")]
13+
InvalidSpecifier(String),
14+
15+
#[error("Cannot project PEP 440 version to semver: {0}")]
16+
InvalidSemverProjection(String),
17+
}
18+
19+
#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Archive, rkyv::Serialize, rkyv::Deserialize)]
20+
#[rkyv(derive(PartialEq, Eq, PartialOrd, Ord, Hash))]
21+
pub struct PypiVersion {
22+
raw: String,
23+
}
24+
25+
impl PypiVersion {
26+
pub fn as_str(&self) -> &str {
27+
&self.raw
28+
}
29+
30+
pub fn is_stable(&self) -> Result<bool, PypiError> {
31+
Ok(self.parse()?.is_stable())
32+
}
33+
34+
pub fn cmp_pep440(&self, other: &Self) -> Result<Ordering, PypiError> {
35+
Ok(self.parse()?.cmp(&other.parse()?))
36+
}
37+
38+
pub fn satisfies(&self, specifiers: &PypiSpecifierSet) -> Result<bool, PypiError> {
39+
specifiers.contains(self)
40+
}
41+
42+
pub fn to_lossy_semver(&self) -> Result<zpm_semver::Version, PypiError> {
43+
let parsed
44+
= self.parse()?;
45+
let release
46+
= parsed.release();
47+
48+
let to_u32
49+
= |n: Option<u64>| -> Result<u32, PypiError> {
50+
let n
51+
= n.unwrap_or(0);
52+
n.try_into().map_err(|_| PypiError::InvalidSemverProjection(self.raw.clone()))
53+
};
54+
55+
let major
56+
= to_u32(release.first().copied())?;
57+
let minor
58+
= to_u32(release.get(1).copied())?;
59+
let patch
60+
= to_u32(release.get(2).copied())?;
61+
62+
let mut prerelease_segments
63+
= Vec::new();
64+
65+
if let Some(pre) = parsed.pre() {
66+
prerelease_segments.push(pre.kind.to_string());
67+
prerelease_segments.push(pre.number.to_string());
68+
}
69+
70+
if let Some(dev) = parsed.dev() {
71+
prerelease_segments.push("dev".to_string());
72+
prerelease_segments.push(dev.to_string());
73+
}
74+
75+
if let Some(post) = parsed.post() {
76+
prerelease_segments.push("post".to_string());
77+
prerelease_segments.push(post.to_string());
78+
}
79+
80+
if !parsed.local().is_empty() {
81+
prerelease_segments.push("local".to_string());
82+
83+
for segment in parsed.local() {
84+
prerelease_segments.push(segment.to_string().to_ascii_lowercase());
85+
}
86+
}
87+
88+
let rc
89+
= if prerelease_segments.is_empty() {
90+
None
91+
} else {
92+
let rc_segments
93+
= prerelease_segments.into_iter().map(|segment| {
94+
match segment.parse::<u32>() {
95+
Ok(number) => VersionRc::Number(number),
96+
Err(_) => VersionRc::String(segment.into()),
97+
}
98+
}).collect::<Vec<_>>();
99+
100+
Some(EcoVec::from(rc_segments))
101+
};
102+
103+
Ok(zpm_semver::Version::new_from_components(major, minor, patch, rc))
104+
}
105+
106+
fn parse(&self) -> Result<pep440_rs::Version, PypiError> {
107+
pep440_rs::Version::from_str(&self.raw)
108+
.map_err(|_| PypiError::InvalidVersion(self.raw.clone()))
109+
}
110+
}
111+
112+
impl FromFileString for PypiVersion {
113+
type Error = PypiError;
114+
115+
fn from_file_string(src: &str) -> Result<Self, Self::Error> {
116+
let src
117+
= src.trim();
118+
119+
let parsed
120+
= pep440_rs::Version::from_str(src)
121+
.map_err(|_| PypiError::InvalidVersion(src.to_string()))?;
122+
123+
Ok(Self {
124+
raw: parsed.to_string(),
125+
})
126+
}
127+
}
128+
129+
impl ToFileString for PypiVersion {
130+
fn to_file_string(&self) -> String {
131+
self.raw.clone()
132+
}
133+
}
134+
135+
impl ToHumanString for PypiVersion {
136+
fn to_print_string(&self) -> String {
137+
DataType::Reference.colorize(&self.raw)
138+
}
139+
}
140+
141+
impl_file_string_from_str!(PypiVersion);
142+
impl_file_string_serialization!(PypiVersion);
143+
144+
#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Archive, rkyv::Serialize, rkyv::Deserialize)]
145+
#[rkyv(derive(PartialEq, Eq, PartialOrd, Ord, Hash))]
146+
pub struct PypiSpecifierSet {
147+
raw: String,
148+
}
149+
150+
impl PypiSpecifierSet {
151+
pub fn any() -> Self {
152+
Self {
153+
raw: "*".to_string(),
154+
}
155+
}
156+
157+
pub fn is_any(&self) -> bool {
158+
self.raw == "*"
159+
}
160+
161+
pub fn as_str(&self) -> &str {
162+
&self.raw
163+
}
164+
165+
pub fn contains(&self, version: &PypiVersion) -> Result<bool, PypiError> {
166+
if self.is_any() {
167+
return Ok(true);
168+
}
169+
170+
let parsed_version
171+
= pep440_rs::Version::from_str(version.as_str())
172+
.map_err(|_| PypiError::InvalidVersion(version.as_str().to_string()))?;
173+
174+
if let Ok(specifiers) = pep440_rs::VersionSpecifiers::from_str(&self.raw) {
175+
return Ok(specifiers.contains(&parsed_version));
176+
}
177+
178+
let pinned
179+
= pep440_rs::Version::from_str(&self.raw)
180+
.map_err(|_| PypiError::InvalidSpecifier(self.raw.clone()))?;
181+
182+
Ok(parsed_version == pinned)
183+
}
184+
}
185+
186+
impl Default for PypiSpecifierSet {
187+
fn default() -> Self {
188+
Self::any()
189+
}
190+
}
191+
192+
impl FromFileString for PypiSpecifierSet {
193+
type Error = PypiError;
194+
195+
fn from_file_string(src: &str) -> Result<Self, Self::Error> {
196+
let src
197+
= src.trim();
198+
199+
if src.is_empty() || src == "*" {
200+
return Ok(Self::any());
201+
}
202+
203+
if let Ok(specifiers) = pep440_rs::VersionSpecifiers::from_str(src) {
204+
return Ok(Self {
205+
raw: specifiers.to_string(),
206+
});
207+
}
208+
209+
let version
210+
= pep440_rs::Version::from_str(src)
211+
.map_err(|_| PypiError::InvalidSpecifier(src.to_string()))?;
212+
213+
Ok(Self {
214+
raw: version.to_string(),
215+
})
216+
}
217+
}
218+
219+
impl ToFileString for PypiSpecifierSet {
220+
fn to_file_string(&self) -> String {
221+
self.raw.clone()
222+
}
223+
}
224+
225+
impl ToHumanString for PypiSpecifierSet {
226+
fn to_print_string(&self) -> String {
227+
DataType::Range.colorize(&self.raw)
228+
}
229+
}
230+
231+
impl_file_string_from_str!(PypiSpecifierSet);
232+
impl_file_string_serialization!(PypiSpecifierSet);

0 commit comments

Comments
 (0)