Skip to content

Commit fc1102d

Browse files
committed
Support on-demand content in repair_metadata
1 parent 134afaa commit fc1102d

File tree

3 files changed

+150
-5
lines changed

3 files changed

+150
-5
lines changed

pulp_python/app/tasks/repair.py

Lines changed: 59 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,11 @@
77
from pulpcore.plugin.util import get_domain
88

99
from pulp_python.app.models import PythonPackageContent, PythonRepository
10-
from pulp_python.app.utils import artifact_to_python_content_data
10+
from pulp_python.app.utils import (
11+
artifact_to_python_content_data,
12+
fetch_json_release_metadata,
13+
parse_metadata,
14+
)
1115

1216
log = logging.getLogger(__name__)
1317

@@ -47,8 +51,16 @@ def repair_metadata(content: QuerySet[PythonPackageContent]) -> int:
4751
Returns:
4852
int: The number of packages that were repaired.
4953
"""
50-
# TODO: Add on_demand content repair
51-
immediate_content = content.filter(contentartifact__artifact__isnull=False)
54+
immediate_content = (
55+
content.filter(contentartifact__artifact__isnull=False)
56+
.distinct()
57+
.prefetch_related("_artifacts")
58+
)
59+
on_demand_content = (
60+
content.filter(contentartifact__artifact__isnull=True)
61+
.distinct()
62+
.prefetch_related("contentartifact_set__remoteartifact_set")
63+
)
5264
domain = get_domain()
5365

5466
batch = []
@@ -58,12 +70,12 @@ def repair_metadata(content: QuerySet[PythonPackageContent]) -> int:
5870
progress_report = ProgressReport(
5971
message="Repairing packages' metadata",
6072
code="repair.metadata",
61-
total=immediate_content.count(),
73+
total=content.count(),
6274
)
6375
progress_report.save()
6476
with progress_report:
6577
for package in progress_report.iter(
66-
immediate_content.prefetch_related("_artifacts").iterator(chunk_size=1000)
78+
immediate_content.iterator(chunk_size=1000)
6779
):
6880
new_data = artifact_to_python_content_data(
6981
package.filename, package._artifacts.get(), domain
@@ -82,6 +94,48 @@ def repair_metadata(content: QuerySet[PythonPackageContent]) -> int:
8294
batch = []
8395
set_of_update_fields.clear()
8496

97+
for package in progress_report.iter(
98+
on_demand_content.iterator(chunk_size=1000)
99+
):
100+
remote_artifacts = (
101+
package.contentartifact_set.get().remoteartifact_set.all()
102+
)
103+
# We expect that PythonPackageContent always has correct name and version,
104+
# and RemoteArtifact always has correct sha256
105+
json_data = fetch_json_release_metadata(
106+
package.name, package.version, remote_artifacts.get().remote
107+
)
108+
dist_data = next(
109+
(
110+
dist
111+
for ra in remote_artifacts
112+
for dist in json_data["urls"]
113+
if ra.sha256 == dist["digests"]["sha256"]
114+
),
115+
None,
116+
)
117+
if not dist_data:
118+
log.warning(
119+
_("No matching distribution for {} was found.").format(package.name)
120+
)
121+
continue
122+
123+
new_data = parse_metadata(json_data["info"], package.version, dist_data)
124+
new_data.pop("url") # belongs to RemoteArtifact, not PythonPackageContent
125+
changed = False
126+
for field, value in new_data.items():
127+
if getattr(package, field) != value:
128+
setattr(package, field, value)
129+
set_of_update_fields.add(field)
130+
changed = True
131+
if changed:
132+
batch.append(package)
133+
if len(batch) == 1000:
134+
total_repaired += len(batch)
135+
PythonPackageContent.objects.bulk_update(batch, set_of_update_fields)
136+
batch = []
137+
set_of_update_fields.clear()
138+
85139
if batch:
86140
total_repaired += len(batch)
87141
PythonPackageContent.objects.bulk_update(batch, set_of_update_fields)

pulp_python/app/utils.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import pkginfo
22
import re
3+
import requests
34
import shutil
45
import tempfile
56
import json
@@ -10,6 +11,8 @@
1011
from packaging.requirements import Requirement
1112
from packaging.version import parse, InvalidVersion
1213

14+
from pulpcore.plugin.models import Remote
15+
1316

1417
PYPI_LAST_SERIAL = "X-PYPI-LAST-SERIAL"
1518
"""TODO This serial constant is temporary until Python repositories implements serials"""
@@ -189,6 +192,19 @@ def artifact_to_python_content_data(filename, artifact, domain=None):
189192
return data
190193

191194

195+
def fetch_json_release_metadata(name: str, version: str, remote: Remote) -> dict:
196+
"""
197+
Fetches metadata for a specific release from PyPI's JSON API. A release can contain
198+
multiple distributions. See https://docs.pypi.org/api/json/#get-a-release for more details.
199+
200+
Returns dict containing "info", "last_serial", "urls", and "vulnerabilities" keys.
201+
"""
202+
url = f"{remote.url}pypi/{name}/{version}/json"
203+
response = requests.get(url, timeout=10)
204+
response.raise_for_status()
205+
return response.json()
206+
207+
192208
def python_content_to_json(base_path, content_query, version=None, domain=None):
193209
"""
194210
Converts a QuerySet of PythonPackageContent into the PyPi JSON format

pulp_python/tests/functional/api/test_repair.py

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,33 @@ def _create(artifact_filename, filename, content_data):
3232
return _create
3333

3434

35+
@pytest.fixture
36+
def create_content_remote(python_bindings):
37+
def _create(filename, ra_url, ra_sha256, content_data, remote):
38+
commands = (
39+
"from pulpcore.plugin.models import ContentArtifact, RemoteArtifact; "
40+
"from pulpcore.plugin.util import extract_pk, get_url; "
41+
"from pulp_python.app.models import PythonPackageContent, PythonRemote; "
42+
f"c = PythonPackageContent(filename={filename!r}, **{content_data!r}); "
43+
"c.save(); "
44+
f"ca = ContentArtifact(artifact=None, content=c, relative_path={filename!r}); "
45+
"ca.save(); "
46+
f"r = PythonRemote.objects.get(pk=extract_pk({remote.pulp_href!r})); "
47+
f"ra = RemoteArtifact(content_artifact=ca, remote=r, sha256={ra_sha256!r}, url={ra_url!r}); " # noqa: E501
48+
"ra.save(); "
49+
"print(get_url(c))"
50+
)
51+
process = subprocess.run(
52+
["pulpcore-manager", "shell", "-c", commands], capture_output=True
53+
)
54+
55+
assert process.returncode == 0
56+
content_href = process.stdout.decode().strip()
57+
return python_bindings.ContentPackagesApi.read(content_href)
58+
59+
return _create
60+
61+
3562
@pytest.fixture
3663
def move_to_repository(python_bindings, monitor_task):
3764
def _move(repo_href, content_hrefs):
@@ -124,3 +151,51 @@ def test_metadata_repair_endpoint(
124151
assert content.packagetype == "sdist"
125152
assert content.requires_python == ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*"
126153
assert content.author == ""
154+
155+
156+
def test_metadata_repair_endpoint_on_demand(
157+
create_content_remote,
158+
monitor_task,
159+
move_to_repository,
160+
python_bindings,
161+
python_remote_factory,
162+
python_repo_factory,
163+
):
164+
"""
165+
Test repairing of package metadata via `Repositories.repair_metadata` endpoint
166+
when only RemoteArtifact is present.
167+
"""
168+
python_egg_filename = "scipy-1.1.0.tar.gz"
169+
python_egg_url = urljoin(
170+
urljoin(PYTHON_FIXTURES_URL, "packages/"), python_egg_filename
171+
)
172+
python_egg_sha256 = (
173+
"878352408424dffaa695ffedf2f9f92844e116686923ed9aa8626fc30d32cfd1"
174+
)
175+
data = {
176+
"name": "scipy",
177+
"version": "1.1.0",
178+
# Wrong metadata
179+
"author": "ME",
180+
"packagetype": "bdist",
181+
"requires_python": ">=3.8",
182+
}
183+
remote = python_remote_factory(includes=["scipy"])
184+
repo = python_repo_factory(remote=remote)
185+
186+
content = create_content_remote(
187+
python_egg_filename, python_egg_url, python_egg_sha256, data, remote
188+
)
189+
for field, test_value in data.items():
190+
assert getattr(content, field) == test_value
191+
move_to_repository(repo.pulp_href, [content.pulp_href])
192+
193+
response = python_bindings.RepositoriesPythonApi.repair_metadata(repo.pulp_href)
194+
monitor_task(response.task)
195+
196+
new_content = python_bindings.ContentPackagesApi.read(content.pulp_href)
197+
assert new_content.author == ""
198+
assert new_content.name == "scipy"
199+
assert new_content.packagetype == "sdist"
200+
assert new_content.requires_python == ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*"
201+
assert new_content.version == "1.1.0"

0 commit comments

Comments
 (0)