Skip to content

Commit 2ba5acc

Browse files
authored
Merge pull request #11143 from itamarst/2984-new-cache-lower-memory
New HTTP cache with lower memory usage
2 parents ed113ff + cc14055 commit 2ba5acc

23 files changed

+528
-356
lines changed

docs/html/topics/caching.md

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,13 @@ While this cache attempts to minimize network activity, it does not prevent
2727
network access altogether. If you want a local install solution that
2828
circumvents accessing PyPI, see {ref}`Installing from local packages`.
2929

30+
```{versionchanged} 23.3
31+
A new cache format is now used, stored in a directory called `http-v2` (see
32+
below for this directory's location). Previously this cache was stored in a
33+
directory called `http` in the main cache directory. If you have completely
34+
switched to newer versions of `pip`, you may wish to delete the old directory.
35+
```
36+
3037
(wheel-caching)=
3138

3239
### Locally built wheels
@@ -124,11 +131,11 @@ The {ref}`pip cache` command can be used to manage pip's cache.
124131

125132
### Removing a single package
126133

127-
`pip cache remove setuptools` removes all wheel files related to setuptools from pip's cache.
134+
`pip cache remove setuptools` removes all wheel files related to setuptools from pip's cache. HTTP cache files are not removed at this time.
128135

129136
### Removing the cache
130137

131-
`pip cache purge` will clear all wheel files from pip's cache.
138+
`pip cache purge` will clear all files from pip's wheel and HTTP caches.
132139

133140
### Listing cached files
134141

news/2984.bugfix.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
pip uses less memory when caching large packages. As a result, there is a new on-disk cache format stored in a new directory ($PIP_CACHE_DIR/http-v2).

src/pip/_internal/cli/req_command.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ def _build_session(
120120
ssl_context = None
121121

122122
session = PipSession(
123-
cache=os.path.join(cache_dir, "http") if cache_dir else None,
123+
cache=os.path.join(cache_dir, "http-v2") if cache_dir else None,
124124
retries=retries if retries is not None else options.retries,
125125
trusted_hosts=options.trusted_hosts,
126126
index_urls=self._get_index_urls(options),

src/pip/_internal/commands/cache.py

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -93,24 +93,30 @@ def get_cache_info(self, options: Values, args: List[Any]) -> None:
9393
num_http_files = len(self._find_http_files(options))
9494
num_packages = len(self._find_wheels(options, "*"))
9595

96-
http_cache_location = self._cache_dir(options, "http")
96+
http_cache_location = self._cache_dir(options, "http-v2")
97+
old_http_cache_location = self._cache_dir(options, "http")
9798
wheels_cache_location = self._cache_dir(options, "wheels")
98-
http_cache_size = filesystem.format_directory_size(http_cache_location)
99+
http_cache_size = filesystem.format_size(
100+
filesystem.directory_size(http_cache_location)
101+
+ filesystem.directory_size(old_http_cache_location)
102+
)
99103
wheels_cache_size = filesystem.format_directory_size(wheels_cache_location)
100104

101105
message = (
102106
textwrap.dedent(
103107
"""
104-
Package index page cache location: {http_cache_location}
108+
Package index page cache location (pip v23.3+): {http_cache_location}
109+
Package index page cache location (older pips): {old_http_cache_location}
105110
Package index page cache size: {http_cache_size}
106111
Number of HTTP files: {num_http_files}
107112
Locally built wheels location: {wheels_cache_location}
108113
Locally built wheels size: {wheels_cache_size}
109114
Number of locally built wheels: {package_count}
110-
"""
115+
""" # noqa: E501
111116
)
112117
.format(
113118
http_cache_location=http_cache_location,
119+
old_http_cache_location=old_http_cache_location,
114120
http_cache_size=http_cache_size,
115121
num_http_files=num_http_files,
116122
wheels_cache_location=wheels_cache_location,
@@ -189,8 +195,11 @@ def _cache_dir(self, options: Values, subdir: str) -> str:
189195
return os.path.join(options.cache_dir, subdir)
190196

191197
def _find_http_files(self, options: Values) -> List[str]:
192-
http_dir = self._cache_dir(options, "http")
193-
return filesystem.find_files(http_dir, "*")
198+
old_http_dir = self._cache_dir(options, "http")
199+
new_http_dir = self._cache_dir(options, "http-v2")
200+
return filesystem.find_files(old_http_dir, "*") + filesystem.find_files(
201+
new_http_dir, "*"
202+
)
194203

195204
def _find_wheels(self, options: Values, pattern: str) -> List[str]:
196205
wheel_dir = self._cache_dir(options, "wheels")

src/pip/_internal/network/cache.py

Lines changed: 25 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,11 @@
33

44
import os
55
from contextlib import contextmanager
6-
from typing import Generator, Optional
6+
from datetime import datetime
7+
from typing import BinaryIO, Generator, Optional, Union
78

8-
from pip._vendor.cachecontrol.cache import BaseCache
9-
from pip._vendor.cachecontrol.caches import FileCache
9+
from pip._vendor.cachecontrol.cache import SeparateBodyBaseCache
10+
from pip._vendor.cachecontrol.caches import SeparateBodyFileCache
1011
from pip._vendor.requests.models import Response
1112

1213
from pip._internal.utils.filesystem import adjacent_tmp_file, replace
@@ -28,7 +29,7 @@ def suppressed_cache_errors() -> Generator[None, None, None]:
2829
pass
2930

3031

31-
class SafeFileCache(BaseCache):
32+
class SafeFileCache(SeparateBodyBaseCache):
3233
"""
3334
A file based cache which is safe to use even when the target directory may
3435
not be accessible or writable.
@@ -43,7 +44,7 @@ def _get_cache_path(self, name: str) -> str:
4344
# From cachecontrol.caches.file_cache.FileCache._fn, brought into our
4445
# class for backwards-compatibility and to avoid using a non-public
4546
# method.
46-
hashed = FileCache.encode(name)
47+
hashed = SeparateBodyFileCache.encode(name)
4748
parts = list(hashed[:5]) + [hashed]
4849
return os.path.join(self.directory, *parts)
4950

@@ -53,17 +54,33 @@ def get(self, key: str) -> Optional[bytes]:
5354
with open(path, "rb") as f:
5455
return f.read()
5556

56-
def set(self, key: str, value: bytes, expires: Optional[int] = None) -> None:
57-
path = self._get_cache_path(key)
57+
def _write(self, path: str, data: bytes) -> None:
5858
with suppressed_cache_errors():
5959
ensure_dir(os.path.dirname(path))
6060

6161
with adjacent_tmp_file(path) as f:
62-
f.write(value)
62+
f.write(data)
6363

6464
replace(f.name, path)
6565

66+
def set(
67+
self, key: str, value: bytes, expires: Union[int, datetime, None] = None
68+
) -> None:
69+
path = self._get_cache_path(key)
70+
self._write(path, value)
71+
6672
def delete(self, key: str) -> None:
6773
path = self._get_cache_path(key)
6874
with suppressed_cache_errors():
6975
os.remove(path)
76+
with suppressed_cache_errors():
77+
os.remove(path + ".body")
78+
79+
def get_body(self, key: str) -> Optional[BinaryIO]:
80+
path = self._get_cache_path(key) + ".body"
81+
with suppressed_cache_errors():
82+
return open(path, "rb")
83+
84+
def set_body(self, key: str, body: bytes) -> None:
85+
path = self._get_cache_path(key) + ".body"
86+
self._write(path, body)

src/pip/_vendor/cachecontrol.pyi

Lines changed: 0 additions & 1 deletion
This file was deleted.

src/pip/_vendor/cachecontrol/__init__.py

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,21 @@
88
"""
99
__author__ = "Eric Larson"
1010
__email__ = "[email protected]"
11-
__version__ = "0.12.11"
11+
__version__ = "0.13.1"
1212

13-
from .wrapper import CacheControl
14-
from .adapter import CacheControlAdapter
15-
from .controller import CacheController
13+
from pip._vendor.cachecontrol.adapter import CacheControlAdapter
14+
from pip._vendor.cachecontrol.controller import CacheController
15+
from pip._vendor.cachecontrol.wrapper import CacheControl
16+
17+
__all__ = [
18+
"__author__",
19+
"__email__",
20+
"__version__",
21+
"CacheControlAdapter",
22+
"CacheController",
23+
"CacheControl",
24+
]
1625

1726
import logging
27+
1828
logging.getLogger(__name__).addHandler(logging.NullHandler())

src/pip/_vendor/cachecontrol/_cmd.py

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,43 +1,49 @@
11
# SPDX-FileCopyrightText: 2015 Eric Larson
22
#
33
# SPDX-License-Identifier: Apache-2.0
4+
from __future__ import annotations
45

56
import logging
7+
from argparse import ArgumentParser
8+
from typing import TYPE_CHECKING
69

710
from pip._vendor import requests
811

912
from pip._vendor.cachecontrol.adapter import CacheControlAdapter
1013
from pip._vendor.cachecontrol.cache import DictCache
1114
from pip._vendor.cachecontrol.controller import logger
1215

13-
from argparse import ArgumentParser
16+
if TYPE_CHECKING:
17+
from argparse import Namespace
1418

19+
from pip._vendor.cachecontrol.controller import CacheController
1520

16-
def setup_logging():
21+
22+
def setup_logging() -> None:
1723
logger.setLevel(logging.DEBUG)
1824
handler = logging.StreamHandler()
1925
logger.addHandler(handler)
2026

2127

22-
def get_session():
28+
def get_session() -> requests.Session:
2329
adapter = CacheControlAdapter(
2430
DictCache(), cache_etags=True, serializer=None, heuristic=None
2531
)
2632
sess = requests.Session()
2733
sess.mount("http://", adapter)
2834
sess.mount("https://", adapter)
2935

30-
sess.cache_controller = adapter.controller
36+
sess.cache_controller = adapter.controller # type: ignore[attr-defined]
3137
return sess
3238

3339

34-
def get_args():
40+
def get_args() -> Namespace:
3541
parser = ArgumentParser()
3642
parser.add_argument("url", help="The URL to try and cache")
3743
return parser.parse_args()
3844

3945

40-
def main(args=None):
46+
def main() -> None:
4147
args = get_args()
4248
sess = get_session()
4349

@@ -48,10 +54,13 @@ def main(args=None):
4854
setup_logging()
4955

5056
# try setting the cache
51-
sess.cache_controller.cache_response(resp.request, resp.raw)
57+
cache_controller: CacheController = (
58+
sess.cache_controller # type: ignore[attr-defined]
59+
)
60+
cache_controller.cache_response(resp.request, resp.raw)
5261

5362
# Now try to get it
54-
if sess.cache_controller.cached_request(resp.request):
63+
if cache_controller.cached_request(resp.request):
5564
print("Cached!")
5665
else:
5766
print("Not cached :(")

0 commit comments

Comments
 (0)