From 308a8de963960d943cd4298730b0796c949bc4f9 Mon Sep 17 00:00:00 2001 From: Ali Nazzal Date: Mon, 11 Aug 2025 12:27:15 +0300 Subject: [PATCH 1/4] test(download): regression for gzip + Content-Length (no overshoot, correct size) --- tests/test_download_gzip_regression.py | 61 ++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 tests/test_download_gzip_regression.py diff --git a/tests/test_download_gzip_regression.py b/tests/test_download_gzip_regression.py new file mode 100644 index 0000000000..e6e59c92fd --- /dev/null +++ b/tests/test_download_gzip_regression.py @@ -0,0 +1,61 @@ +import io +import gzip +import sys +import shutil +import subprocess +from http.server import HTTPServer, BaseHTTPRequestHandler +import threading + +# محتوى نضغطه gzip +RAW = b"A" * 50000 +buf = io.BytesIO() +with gzip.GzipFile(fileobj=buf, mode="wb") as gz: + gz.write(RAW) +GZ = buf.getvalue() + +class Handler(BaseHTTPRequestHandler): + def do_GET(self): + if self.path != "/file.gz": + self.send_error(404); return + self.send_response(200) + self.send_header("Content-Type", "application/octet-stream") + self.send_header("Content-Encoding", "gzip") + self.send_header("Content-Length", str(len(GZ))) # طول النسخة المضغوطة + self.end_headers() + self.wfile.write(GZ) + + def log_message(self, *args): + pass # نسكت اللوج + +def start_server(): + srv = HTTPServer(("127.0.0.1", 0), Handler) # بورت حر + th = threading.Thread(target=srv.serve_forever, daemon=True) + th.start() + return srv, th + +def test_download_gzip_content_length(tmp_path): + srv, _ = start_server() + try: + url = f"http://127.0.0.1:{srv.server_port}/file.gz" + out = tmp_path / "out.gz" + + # نفضّل GET صراحةً ونستعمل --output + exe = shutil.which("http") + if exe: + cmd = [exe, "GET", url, "--download", "--output", str(out)] + else: + cmd = [sys.executable, "-m", "httpie", "GET", url, "--download", "--output", str(out)] + + p = subprocess.run(cmd, capture_output=True, text=True) + assert p.returncode == 0, f"stderr:\\n{p.stderr}\\nstdout:\\n{p.stdout}" + assert out.exists(), "output file not created" + + size = out.stat().st_size + assert size == len(GZ), f"saved size {size} != expected {len(GZ)}" + + # تحسّبي: نتأكد ماكو إشارات لتجاوز 100% أو incomplete + combined = (p.stdout or "") + (p.stderr or "") + assert ">100%" not in combined + assert "Incomplete download" not in combined + finally: + srv.shutdown() From 97115bfd133e53ddf85eb719f646941facd2926c Mon Sep 17 00:00:00 2001 From: Ali Nazzal <89179776+ali90h@users.noreply.github.com> Date: Wed, 6 Aug 2025 19:28:28 +0300 Subject: [PATCH 2/4] fix(download): respect Content-Length when Content-Encoding is present MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per RFC 9110 § 8.6 the Content-Length header reflects the **encoded** size. The previous logic compared it to the decoded size, yielding false "Incomplete download" errors for gzip responses. --- CHANGELOG.md | 5 + docs/download.md | 8 ++ httpie/downloads.py | 142 ++++++++++++------------- tests/test_downloads.py | 224 ++++++++++++++++++++++------------------ 4 files changed, 210 insertions(+), 169 deletions(-) create mode 100644 docs/download.md diff --git a/CHANGELOG.md b/CHANGELOG.md index 0497ac3508..afbf5bd2b3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,11 @@ This document records all notable changes to [HTTPie](https://httpie.io). This project adheres to [Semantic Versioning](https://semver.org/). +## Unreleased + +### Fixed +- Respect `Content-Length` with `--download` when `Content-Encoding` is present to avoid false "Incomplete download" errors. ([#423](https://github.com/httpie/cli/issues/423)) + ## [3.2.4](https://github.com/httpie/cli/compare/3.2.3...3.2.4) (2024-11-01) - Fix default certs loading and unpin `requests`. ([#1596](https://github.com/httpie/cli/issues/1596)) diff --git a/docs/download.md b/docs/download.md new file mode 100644 index 0000000000..59e9981c38 --- /dev/null +++ b/docs/download.md @@ -0,0 +1,8 @@ +# Download mode + +HTTPie's `--download` option saves response bodies to files. When a server +returns a `Content-Encoding` (for example `gzip`), the `Content-Length` header +is treated as the size of the encoded payload as defined in RFC 9110 § 8.6. +HTTPie writes the body exactly as received and no longer compares the header to +the post-decompression size. + diff --git a/httpie/downloads.py b/httpie/downloads.py index 9c4b895e6f..205379e102 100644 --- a/httpie/downloads.py +++ b/httpie/downloads.py @@ -2,6 +2,7 @@ Download mode implementation. """ + import mimetypes import os import re @@ -12,10 +13,9 @@ import requests +from .context import Environment from .models import HTTPResponse, OutputOptions from .output.streams import RawStream -from .context import Environment - PARTIAL_CONTENT = 206 @@ -37,24 +37,23 @@ def parse_content_range(content_range: str, resumed_from: int) -> int: """ if content_range is None: - raise ContentRangeError('Missing Content-Range') + raise ContentRangeError("Missing Content-Range") pattern = ( - r'^bytes (?P\d+)-(?P\d+)' - r'/(\*|(?P\d+))$' + r"^bytes (?P\d+)-(?P\d+)" + r"/(\*|(?P\d+))$" ) match = re.match(pattern, content_range) if not match: - raise ContentRangeError( - f'Invalid Content-Range format {content_range!r}') + raise ContentRangeError(f"Invalid Content-Range format {content_range!r}") content_range_dict = match.groupdict() - first_byte_pos = int(content_range_dict['first_byte_pos']) - last_byte_pos = int(content_range_dict['last_byte_pos']) + first_byte_pos = int(content_range_dict["first_byte_pos"]) + last_byte_pos = int(content_range_dict["last_byte_pos"]) instance_length = ( - int(content_range_dict['instance_length']) - if content_range_dict['instance_length'] + int(content_range_dict["instance_length"]) + if content_range_dict["instance_length"] else None ) @@ -64,27 +63,24 @@ def parse_content_range(content_range: str, resumed_from: int) -> int: # last-byte-pos value, is invalid. The recipient of an invalid # byte-content-range- spec MUST ignore it and any content # transferred along with it." - if (first_byte_pos > last_byte_pos - or (instance_length is not None - and instance_length <= last_byte_pos)): - raise ContentRangeError( - f'Invalid Content-Range returned: {content_range!r}') + if first_byte_pos > last_byte_pos or ( + instance_length is not None and instance_length <= last_byte_pos + ): + raise ContentRangeError(f"Invalid Content-Range returned: {content_range!r}") - if (first_byte_pos != resumed_from - or (instance_length is not None - and last_byte_pos + 1 != instance_length)): + if first_byte_pos != resumed_from or ( + instance_length is not None and last_byte_pos + 1 != instance_length + ): # Not what we asked for. raise ContentRangeError( - f'Unexpected Content-Range returned ({content_range!r})' + f"Unexpected Content-Range returned ({content_range!r})" f' for the requested Range ("bytes={resumed_from}-")' ) return last_byte_pos + 1 -def filename_from_content_disposition( - content_disposition: str -) -> Optional[str]: +def filename_from_content_disposition(content_disposition: str) -> Optional[str]: """ Extract and validate filename from a Content-Disposition header. @@ -94,28 +90,28 @@ def filename_from_content_disposition( """ # attachment; filename=jakubroztocil-httpie-0.4.1-20-g40bd8f6.tar.gz - msg = Message(f'Content-Disposition: {content_disposition}') + msg = Message(f"Content-Disposition: {content_disposition}") filename = msg.get_filename() if filename: # Basic sanitation. - filename = os.path.basename(filename).lstrip('.').strip() + filename = os.path.basename(filename).lstrip(".").strip() if filename: return filename def filename_from_url(url: str, content_type: Optional[str]) -> str: - fn = urlsplit(url).path.rstrip('/') - fn = os.path.basename(fn) if fn else 'index' - if '.' not in fn and content_type: - content_type = content_type.split(';')[0] - if content_type == 'text/plain': + fn = urlsplit(url).path.rstrip("/") + fn = os.path.basename(fn) if fn else "index" + if "." not in fn and content_type: + content_type = content_type.split(";")[0] + if content_type == "text/plain": # mimetypes returns '.ksh' - ext = '.txt' + ext = ".txt" else: ext = mimetypes.guess_extension(content_type) - if ext == '.htm': - ext = '.html' + if ext == ".htm": + ext = ".html" if ext: fn += ext @@ -136,12 +132,12 @@ def trim_filename(filename: str, max_len: int) -> str: def get_filename_max_length(directory: str) -> int: max_len = 255 - if hasattr(os, 'pathconf') and 'PC_NAME_MAX' in os.pathconf_names: - max_len = os.pathconf(directory, 'PC_NAME_MAX') + if hasattr(os, "pathconf") and "PC_NAME_MAX" in os.pathconf_names: + max_len = os.pathconf(directory, "PC_NAME_MAX") return max_len -def trim_filename_if_needed(filename: str, directory='.', extra=0) -> str: +def trim_filename_if_needed(filename: str, directory=".", extra=0) -> str: max_len = get_filename_max_length(directory) - extra if len(filename) > max_len: filename = trim_filename(filename, max_len) @@ -151,7 +147,7 @@ def trim_filename_if_needed(filename: str, directory='.', extra=0) -> str: def get_unique_filename(filename: str, exists=os.path.exists) -> str: attempt = 0 while True: - suffix = f'-{attempt}' if attempt > 0 else '' + suffix = f"-{attempt}" if attempt > 0 else "" try_filename = trim_filename_if_needed(filename, extra=len(suffix)) try_filename += suffix if not exists(try_filename): @@ -161,12 +157,7 @@ def get_unique_filename(filename: str, exists=os.path.exists) -> str: class Downloader: - def __init__( - self, - env: Environment, - output_file: IO = None, - resume: bool = False - ): + def __init__(self, env: Environment, output_file: IO = None, resume: bool = False): """ :param resume: Should the download resume if partial download already exists. @@ -190,19 +181,17 @@ def pre_request(self, request_headers: dict): """ # Ask the server not to encode the content so that we can resume, etc. - request_headers['Accept-Encoding'] = 'identity' + request_headers["Accept-Encoding"] = "identity" if self._resume: bytes_have = os.path.getsize(self._output_file.name) if bytes_have: # Set ``Range`` header to resume the download # TODO: Use "If-Range: mtime" to make sure it's fresh? - request_headers['Range'] = f'bytes={bytes_have}-' + request_headers["Range"] = f"bytes={bytes_have}-" self._resumed_from = bytes_have def start( - self, - initial_url: str, - final_response: requests.Response + self, initial_url: str, final_response: requests.Response ) -> Tuple[RawStream, IO]: """ Initiate and return a stream for `response` body with progress @@ -216,13 +205,27 @@ def start( """ assert not self.status.time_started - # FIXME: some servers still might sent Content-Encoding: gzip - # + # Some servers may still send a compressed body even though + # we ask for identity encoding. In that case, ``Content-Length`` + # refers to the encoded size (RFC 9110 § 8.6), so we disable + # automatic decoding to make our byte tracking match. try: - total_size = int(final_response.headers['Content-Length']) + total_size = int(final_response.headers["Content-Length"]) except (KeyError, ValueError, TypeError): total_size = None + content_encoding = final_response.headers.get("Content-Encoding") + if content_encoding: + final_response.raw.decode_content = False + + class EncodedHTTPResponse(HTTPResponse): + def iter_body(self, chunk_size=1): # type: ignore[override] + return final_response.raw.stream(chunk_size, decode_content=False) + + response_msg = EncodedHTTPResponse(final_response) + else: + response_msg = HTTPResponse(final_response) + if not self._output_file: self._output_file = self._get_output_file_from_response( initial_url=initial_url, @@ -232,8 +235,7 @@ def start( # `--output, -o` provided if self._resume and final_response.status_code == PARTIAL_CONTENT: total_size = parse_content_range( - final_response.headers.get('Content-Range'), - self._resumed_from + final_response.headers.get("Content-Range"), self._resumed_from ) else: @@ -244,9 +246,11 @@ def start( except OSError: pass # stdout - output_options = OutputOptions.from_message(final_response, headers=False, body=True) + output_options = OutputOptions.from_message( + final_response, headers=False, body=True + ) stream = RawStream( - msg=HTTPResponse(final_response), + msg=response_msg, output_options=output_options, on_body_chunk_downloaded=self.chunk_downloaded, ) @@ -254,7 +258,7 @@ def start( self.status.started( output_file=self._output_file, resumed_from=self._resumed_from, - total_size=total_size + total_size=total_size, ) return stream, self._output_file @@ -292,16 +296,17 @@ def _get_output_file_from_response( ) -> IO: # Output file not specified. Pick a name that doesn't exist yet. filename = None - if 'Content-Disposition' in final_response.headers: + if "Content-Disposition" in final_response.headers: filename = filename_from_content_disposition( - final_response.headers['Content-Disposition']) + final_response.headers["Content-Disposition"] + ) if not filename: filename = filename_from_url( url=initial_url, - content_type=final_response.headers.get('Content-Type'), + content_type=final_response.headers.get("Content-Type"), ) unique_filename = get_unique_filename(filename) - return open(unique_filename, buffering=0, mode='a+b') + return open(unique_filename, buffering=0, mode="a+b") class DownloadStatus: @@ -325,11 +330,11 @@ def started(self, output_file, resumed_from=0, total_size=None): def start_display(self, output_file): from httpie.output.ui.rich_progress import ( DummyDisplay, + ProgressDisplay, StatusDisplay, - ProgressDisplay ) - message = f'Downloading to {output_file.name}' + message = f"Downloading to {output_file.name}" if self.env.show_displays: if self.total_size is None: # Rich does not support progress bars without a total @@ -341,9 +346,7 @@ def start_display(self, output_file): self.display = DummyDisplay(self.env) self.display.start( - total=self.total_size, - at=self.downloaded, - description=message + total=self.total_size, at=self.downloaded, description=message ) def chunk_downloaded(self, size): @@ -357,10 +360,7 @@ def has_finished(self): @property def time_spent(self): - if ( - self.time_started is not None - and self.time_finished is not None - ): + if self.time_started is not None and self.time_finished is not None: return self.time_finished - self.time_started else: return None @@ -369,9 +369,9 @@ def finished(self): assert self.time_started is not None assert self.time_finished is None self.time_finished = monotonic() - if hasattr(self, 'display'): + if hasattr(self, "display"): self.display.stop(self.time_spent) def terminate(self): - if hasattr(self, 'display'): + if hasattr(self, "display"): self.display.stop(self.time_spent) diff --git a/tests/test_downloads.py b/tests/test_downloads.py index b646a0e6a5..20680818cd 100644 --- a/tests/test_downloads.py +++ b/tests/test_downloads.py @@ -1,18 +1,27 @@ +import gzip import os import tempfile import time -import requests from unittest import mock from urllib.request import urlopen import pytest +import requests from requests.structures import CaseInsensitiveDict from httpie.downloads import ( - parse_content_range, filename_from_content_disposition, filename_from_url, - get_unique_filename, ContentRangeError, Downloader, PARTIAL_CONTENT + PARTIAL_CONTENT, + ContentRangeError, + Downloader, + filename_from_content_disposition, + filename_from_url, + get_unique_filename, + parse_content_range, ) -from .utils import http, MockEnvironment +from httpie.status import ExitStatus +from tests.utils.http_server import TestHandler + +from .utils import MockEnvironment, http class Response(requests.Response): @@ -23,85 +32,95 @@ def __init__(self, url, headers={}, status_code=200): self.status_code = status_code +@TestHandler.handler("GET", "/gzip") +def gzip_handler(handler): + payload = b"Hello, world!" + compressed = gzip.compress(payload) + handler.send_response(200) + handler.send_header("Content-Length", str(len(compressed))) + handler.send_header("Content-Encoding", "gzip") + handler.end_headers() + handler.wfile.write(compressed) + + class TestDownloadUtils: def test_Content_Range_parsing(self): parse = parse_content_range - assert parse('bytes 100-199/200', 100) == 200 - assert parse('bytes 100-199/*', 100) == 200 + assert parse("bytes 100-199/200", 100) == 200 + assert parse("bytes 100-199/*", 100) == 200 # single byte - assert parse('bytes 100-100/*', 100) == 101 + assert parse("bytes 100-100/*", 100) == 101 # missing pytest.raises(ContentRangeError, parse, None, 100) # syntax error - pytest.raises(ContentRangeError, parse, 'beers 100-199/*', 100) + pytest.raises(ContentRangeError, parse, "beers 100-199/*", 100) # unexpected range - pytest.raises(ContentRangeError, parse, 'bytes 100-199/*', 99) + pytest.raises(ContentRangeError, parse, "bytes 100-199/*", 99) # invalid instance-length - pytest.raises(ContentRangeError, parse, 'bytes 100-199/199', 100) + pytest.raises(ContentRangeError, parse, "bytes 100-199/199", 100) # invalid byte-range-resp-spec - pytest.raises(ContentRangeError, parse, 'bytes 100-99/199', 100) - - @pytest.mark.parametrize('header, expected_filename', [ - ('attachment; filename=hello-WORLD_123.txt', 'hello-WORLD_123.txt'), - ('attachment; filename=".hello-WORLD_123.txt"', 'hello-WORLD_123.txt'), - ('attachment; filename="white space.txt"', 'white space.txt'), - (r'attachment; filename="\"quotes\".txt"', '"quotes".txt'), - ('attachment; filename=/etc/hosts', 'hosts'), - ('attachment; filename=', None) - ]) + pytest.raises(ContentRangeError, parse, "bytes 100-99/199", 100) + + @pytest.mark.parametrize( + "header, expected_filename", + [ + ("attachment; filename=hello-WORLD_123.txt", "hello-WORLD_123.txt"), + ('attachment; filename=".hello-WORLD_123.txt"', "hello-WORLD_123.txt"), + ('attachment; filename="white space.txt"', "white space.txt"), + (r'attachment; filename="\"quotes\".txt"', '"quotes".txt'), + ("attachment; filename=/etc/hosts", "hosts"), + ("attachment; filename=", None), + ], + ) def test_Content_Disposition_parsing(self, header, expected_filename): assert filename_from_content_disposition(header) == expected_filename def test_filename_from_url(self): - assert 'foo.txt' == filename_from_url( - url='http://example.org/foo', - content_type='text/plain' + assert "foo.txt" == filename_from_url( + url="http://example.org/foo", content_type="text/plain" ) - assert 'foo.html' == filename_from_url( - url='http://example.org/foo', - content_type='text/html; charset=UTF-8' + assert "foo.html" == filename_from_url( + url="http://example.org/foo", content_type="text/html; charset=UTF-8" ) - assert 'foo' == filename_from_url( - url='http://example.org/foo', - content_type=None + assert "foo" == filename_from_url( + url="http://example.org/foo", content_type=None ) - assert 'foo' == filename_from_url( - url='http://example.org/foo', - content_type='x-foo/bar' + assert "foo" == filename_from_url( + url="http://example.org/foo", content_type="x-foo/bar" ) @pytest.mark.parametrize( - 'orig_name, unique_on_attempt, expected', + "orig_name, unique_on_attempt, expected", [ # Simple - ('foo.bar', 0, 'foo.bar'), - ('foo.bar', 1, 'foo.bar-1'), - ('foo.bar', 10, 'foo.bar-10'), + ("foo.bar", 0, "foo.bar"), + ("foo.bar", 1, "foo.bar-1"), + ("foo.bar", 10, "foo.bar-10"), # Trim - ('A' * 20, 0, 'A' * 10), - ('A' * 20, 1, 'A' * 8 + '-1'), - ('A' * 20, 10, 'A' * 7 + '-10'), + ("A" * 20, 0, "A" * 10), + ("A" * 20, 1, "A" * 8 + "-1"), + ("A" * 20, 10, "A" * 7 + "-10"), # Trim before ext - ('A' * 20 + '.txt', 0, 'A' * 6 + '.txt'), - ('A' * 20 + '.txt', 1, 'A' * 4 + '.txt-1'), + ("A" * 20 + ".txt", 0, "A" * 6 + ".txt"), + ("A" * 20 + ".txt", 1, "A" * 4 + ".txt-1"), # Trim at the end - ('foo.' + 'A' * 20, 0, 'foo.' + 'A' * 6), - ('foo.' + 'A' * 20, 1, 'foo.' + 'A' * 4 + '-1'), - ('foo.' + 'A' * 20, 10, 'foo.' + 'A' * 3 + '-10'), - ] + ("foo." + "A" * 20, 0, "foo." + "A" * 6), + ("foo." + "A" * 20, 1, "foo." + "A" * 4 + "-1"), + ("foo." + "A" * 20, 10, "foo." + "A" * 3 + "-10"), + ], ) - @mock.patch('httpie.downloads.get_filename_max_length') - def test_unique_filename(self, get_filename_max_length, - orig_name, unique_on_attempt, - expected): + @mock.patch("httpie.downloads.get_filename_max_length") + def test_unique_filename( + self, get_filename_max_length, orig_name, unique_on_attempt, expected + ): def attempts(unique_on_attempt=0): # noinspection PyUnresolvedReferences,PyUnusedLocal @@ -123,39 +142,50 @@ def exists(filename): class TestDownloads: def test_actual_download(self, httpbin_both, httpbin): - robots_txt = '/robots.txt' + robots_txt = "/robots.txt" body = urlopen(httpbin + robots_txt).read().decode() - env = MockEnvironment(stdin_isatty=True, stdout_isatty=False, show_displays=True) - r = http('--download', httpbin_both.url + robots_txt, env=env) - assert 'Downloading' in r.stderr + env = MockEnvironment( + stdin_isatty=True, stdout_isatty=False, show_displays=True + ) + r = http("--download", httpbin_both.url + robots_txt, env=env) + assert "Downloading" in r.stderr assert body == r + def test_download_with_gzip_content_encoding(self, http_server, tmp_path): + orig_cwd = os.getcwd() + os.chdir(tmp_path) + try: + r = http("--download", f"http://{http_server}/gzip") + assert r.exit_status == ExitStatus.SUCCESS + with open("gzip", "rb") as f: + assert gzip.decompress(f.read()) == b"Hello, world!" + finally: + os.chdir(orig_cwd) + def test_download_with_Content_Length(self, mock_env, httpbin_both): - with open(os.devnull, 'w') as devnull: + with open(os.devnull, "w") as devnull: downloader = Downloader(mock_env, output_file=devnull) downloader.start( - initial_url='/', + initial_url="/", final_response=Response( - url=httpbin_both.url + '/', - headers={'Content-Length': 10} - ) + url=httpbin_both.url + "/", headers={"Content-Length": 10} + ), ) time.sleep(1.1) - downloader.chunk_downloaded(b'12345') + downloader.chunk_downloaded(b"12345") time.sleep(1.1) - downloader.chunk_downloaded(b'12345') + downloader.chunk_downloaded(b"12345") downloader.finish() assert not downloader.interrupted def test_download_no_Content_Length(self, mock_env, httpbin_both): - with open(os.devnull, 'w') as devnull: + with open(os.devnull, "w") as devnull: downloader = Downloader(mock_env, output_file=devnull) downloader.start( - final_response=Response(url=httpbin_both.url + '/'), - initial_url='/' + final_response=Response(url=httpbin_both.url + "/"), initial_url="/" ) time.sleep(1.1) - downloader.chunk_downloaded(b'12345') + downloader.chunk_downloaded(b"12345") downloader.finish() assert not downloader.interrupted @@ -164,98 +194,96 @@ def test_download_output_from_content_disposition(self, mock_env, httpbin_both): orig_cwd = os.getcwd() os.chdir(tmp_dirname) try: - assert not os.path.isfile('filename.bin') + assert not os.path.isfile("filename.bin") downloader = Downloader(mock_env) downloader.start( final_response=Response( - url=httpbin_both.url + '/', + url=httpbin_both.url + "/", headers={ - 'Content-Length': 5, - 'Content-Disposition': 'attachment; filename="filename.bin"', - } + "Content-Length": 5, + "Content-Disposition": 'attachment; filename="filename.bin"', + }, ), - initial_url='/' + initial_url="/", ) - downloader.chunk_downloaded(b'12345') + downloader.chunk_downloaded(b"12345") downloader.finish() downloader.failed() # Stop the reporter assert not downloader.interrupted # TODO: Auto-close the file in that case? downloader._output_file.close() - assert os.path.isfile('filename.bin') + assert os.path.isfile("filename.bin") finally: os.chdir(orig_cwd) def test_download_interrupted(self, mock_env, httpbin_both): - with open(os.devnull, 'w') as devnull: + with open(os.devnull, "w") as devnull: downloader = Downloader(mock_env, output_file=devnull) downloader.start( final_response=Response( - url=httpbin_both.url + '/', - headers={'Content-Length': 5} + url=httpbin_both.url + "/", headers={"Content-Length": 5} ), - initial_url='/' + initial_url="/", ) - downloader.chunk_downloaded(b'1234') + downloader.chunk_downloaded(b"1234") downloader.finish() assert downloader.interrupted def test_download_resumed(self, mock_env, httpbin_both): with tempfile.TemporaryDirectory() as tmp_dirname: - file = os.path.join(tmp_dirname, 'file.bin') - with open(file, 'a'): + file = os.path.join(tmp_dirname, "file.bin") + with open(file, "a"): pass - with open(file, 'a+b') as output_file: + with open(file, "a+b") as output_file: # Start and interrupt the transfer after 3 bytes written downloader = Downloader(mock_env, output_file=output_file) downloader.start( final_response=Response( - url=httpbin_both.url + '/', - headers={'Content-Length': 5} + url=httpbin_both.url + "/", headers={"Content-Length": 5} ), - initial_url='/' + initial_url="/", ) - downloader.chunk_downloaded(b'123') + downloader.chunk_downloaded(b"123") downloader.finish() downloader.failed() assert downloader.interrupted # Write bytes - with open(file, 'wb') as fh: - fh.write(b'123') + with open(file, "wb") as fh: + fh.write(b"123") - with open(file, 'a+b') as output_file: + with open(file, "a+b") as output_file: # Resume the transfer downloader = Downloader(mock_env, output_file=output_file, resume=True) # Ensure `pre_request()` is working as expected too headers = {} downloader.pre_request(headers) - assert headers['Accept-Encoding'] == 'identity' - assert headers['Range'] == 'bytes=3-' + assert headers["Accept-Encoding"] == "identity" + assert headers["Range"] == "bytes=3-" downloader.start( final_response=Response( - url=httpbin_both.url + '/', - headers={'Content-Length': 5, 'Content-Range': 'bytes 3-4/5'}, - status_code=PARTIAL_CONTENT + url=httpbin_both.url + "/", + headers={"Content-Length": 5, "Content-Range": "bytes 3-4/5"}, + status_code=PARTIAL_CONTENT, ), - initial_url='/' + initial_url="/", ) - downloader.chunk_downloaded(b'45') + downloader.chunk_downloaded(b"45") downloader.finish() def test_download_with_redirect_original_url_used_for_filename(self, httpbin): # Redirect from `/redirect/1` to `/get`. - expected_filename = '1.json' + expected_filename = "1.json" orig_cwd = os.getcwd() with tempfile.TemporaryDirectory() as tmp_dirname: os.chdir(tmp_dirname) try: - assert os.listdir('.') == [] - http('--download', httpbin + '/redirect/1') - assert os.listdir('.') == [expected_filename] + assert os.listdir(".") == [] + http("--download", httpbin + "/redirect/1") + assert os.listdir(".") == [expected_filename] finally: os.chdir(orig_cwd) From 2dc9a7c4f01b45b7e9893402358a718c879113cf Mon Sep 17 00:00:00 2001 From: Ali Nazzal Date: Tue, 12 Aug 2025 02:35:21 +0300 Subject: [PATCH 3/4] =?UTF-8?q?chore:=20keep=20PR=20minimal=20=E2=80=94=20?= =?UTF-8?q?revert=20unrelated=20files?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CHANGELOG.md | 5 - docs/download.md | 8 -- tests/test_downloads.py | 224 ++++++++++++++++++---------------------- 3 files changed, 98 insertions(+), 139 deletions(-) delete mode 100644 docs/download.md diff --git a/CHANGELOG.md b/CHANGELOG.md index afbf5bd2b3..0497ac3508 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,11 +3,6 @@ This document records all notable changes to [HTTPie](https://httpie.io). This project adheres to [Semantic Versioning](https://semver.org/). -## Unreleased - -### Fixed -- Respect `Content-Length` with `--download` when `Content-Encoding` is present to avoid false "Incomplete download" errors. ([#423](https://github.com/httpie/cli/issues/423)) - ## [3.2.4](https://github.com/httpie/cli/compare/3.2.3...3.2.4) (2024-11-01) - Fix default certs loading and unpin `requests`. ([#1596](https://github.com/httpie/cli/issues/1596)) diff --git a/docs/download.md b/docs/download.md deleted file mode 100644 index 59e9981c38..0000000000 --- a/docs/download.md +++ /dev/null @@ -1,8 +0,0 @@ -# Download mode - -HTTPie's `--download` option saves response bodies to files. When a server -returns a `Content-Encoding` (for example `gzip`), the `Content-Length` header -is treated as the size of the encoded payload as defined in RFC 9110 § 8.6. -HTTPie writes the body exactly as received and no longer compares the header to -the post-decompression size. - diff --git a/tests/test_downloads.py b/tests/test_downloads.py index 20680818cd..b646a0e6a5 100644 --- a/tests/test_downloads.py +++ b/tests/test_downloads.py @@ -1,27 +1,18 @@ -import gzip import os import tempfile import time +import requests from unittest import mock from urllib.request import urlopen import pytest -import requests from requests.structures import CaseInsensitiveDict from httpie.downloads import ( - PARTIAL_CONTENT, - ContentRangeError, - Downloader, - filename_from_content_disposition, - filename_from_url, - get_unique_filename, - parse_content_range, + parse_content_range, filename_from_content_disposition, filename_from_url, + get_unique_filename, ContentRangeError, Downloader, PARTIAL_CONTENT ) -from httpie.status import ExitStatus -from tests.utils.http_server import TestHandler - -from .utils import MockEnvironment, http +from .utils import http, MockEnvironment class Response(requests.Response): @@ -32,95 +23,85 @@ def __init__(self, url, headers={}, status_code=200): self.status_code = status_code -@TestHandler.handler("GET", "/gzip") -def gzip_handler(handler): - payload = b"Hello, world!" - compressed = gzip.compress(payload) - handler.send_response(200) - handler.send_header("Content-Length", str(len(compressed))) - handler.send_header("Content-Encoding", "gzip") - handler.end_headers() - handler.wfile.write(compressed) - - class TestDownloadUtils: def test_Content_Range_parsing(self): parse = parse_content_range - assert parse("bytes 100-199/200", 100) == 200 - assert parse("bytes 100-199/*", 100) == 200 + assert parse('bytes 100-199/200', 100) == 200 + assert parse('bytes 100-199/*', 100) == 200 # single byte - assert parse("bytes 100-100/*", 100) == 101 + assert parse('bytes 100-100/*', 100) == 101 # missing pytest.raises(ContentRangeError, parse, None, 100) # syntax error - pytest.raises(ContentRangeError, parse, "beers 100-199/*", 100) + pytest.raises(ContentRangeError, parse, 'beers 100-199/*', 100) # unexpected range - pytest.raises(ContentRangeError, parse, "bytes 100-199/*", 99) + pytest.raises(ContentRangeError, parse, 'bytes 100-199/*', 99) # invalid instance-length - pytest.raises(ContentRangeError, parse, "bytes 100-199/199", 100) + pytest.raises(ContentRangeError, parse, 'bytes 100-199/199', 100) # invalid byte-range-resp-spec - pytest.raises(ContentRangeError, parse, "bytes 100-99/199", 100) - - @pytest.mark.parametrize( - "header, expected_filename", - [ - ("attachment; filename=hello-WORLD_123.txt", "hello-WORLD_123.txt"), - ('attachment; filename=".hello-WORLD_123.txt"', "hello-WORLD_123.txt"), - ('attachment; filename="white space.txt"', "white space.txt"), - (r'attachment; filename="\"quotes\".txt"', '"quotes".txt'), - ("attachment; filename=/etc/hosts", "hosts"), - ("attachment; filename=", None), - ], - ) + pytest.raises(ContentRangeError, parse, 'bytes 100-99/199', 100) + + @pytest.mark.parametrize('header, expected_filename', [ + ('attachment; filename=hello-WORLD_123.txt', 'hello-WORLD_123.txt'), + ('attachment; filename=".hello-WORLD_123.txt"', 'hello-WORLD_123.txt'), + ('attachment; filename="white space.txt"', 'white space.txt'), + (r'attachment; filename="\"quotes\".txt"', '"quotes".txt'), + ('attachment; filename=/etc/hosts', 'hosts'), + ('attachment; filename=', None) + ]) def test_Content_Disposition_parsing(self, header, expected_filename): assert filename_from_content_disposition(header) == expected_filename def test_filename_from_url(self): - assert "foo.txt" == filename_from_url( - url="http://example.org/foo", content_type="text/plain" + assert 'foo.txt' == filename_from_url( + url='http://example.org/foo', + content_type='text/plain' ) - assert "foo.html" == filename_from_url( - url="http://example.org/foo", content_type="text/html; charset=UTF-8" + assert 'foo.html' == filename_from_url( + url='http://example.org/foo', + content_type='text/html; charset=UTF-8' ) - assert "foo" == filename_from_url( - url="http://example.org/foo", content_type=None + assert 'foo' == filename_from_url( + url='http://example.org/foo', + content_type=None ) - assert "foo" == filename_from_url( - url="http://example.org/foo", content_type="x-foo/bar" + assert 'foo' == filename_from_url( + url='http://example.org/foo', + content_type='x-foo/bar' ) @pytest.mark.parametrize( - "orig_name, unique_on_attempt, expected", + 'orig_name, unique_on_attempt, expected', [ # Simple - ("foo.bar", 0, "foo.bar"), - ("foo.bar", 1, "foo.bar-1"), - ("foo.bar", 10, "foo.bar-10"), + ('foo.bar', 0, 'foo.bar'), + ('foo.bar', 1, 'foo.bar-1'), + ('foo.bar', 10, 'foo.bar-10'), # Trim - ("A" * 20, 0, "A" * 10), - ("A" * 20, 1, "A" * 8 + "-1"), - ("A" * 20, 10, "A" * 7 + "-10"), + ('A' * 20, 0, 'A' * 10), + ('A' * 20, 1, 'A' * 8 + '-1'), + ('A' * 20, 10, 'A' * 7 + '-10'), # Trim before ext - ("A" * 20 + ".txt", 0, "A" * 6 + ".txt"), - ("A" * 20 + ".txt", 1, "A" * 4 + ".txt-1"), + ('A' * 20 + '.txt', 0, 'A' * 6 + '.txt'), + ('A' * 20 + '.txt', 1, 'A' * 4 + '.txt-1'), # Trim at the end - ("foo." + "A" * 20, 0, "foo." + "A" * 6), - ("foo." + "A" * 20, 1, "foo." + "A" * 4 + "-1"), - ("foo." + "A" * 20, 10, "foo." + "A" * 3 + "-10"), - ], + ('foo.' + 'A' * 20, 0, 'foo.' + 'A' * 6), + ('foo.' + 'A' * 20, 1, 'foo.' + 'A' * 4 + '-1'), + ('foo.' + 'A' * 20, 10, 'foo.' + 'A' * 3 + '-10'), + ] ) - @mock.patch("httpie.downloads.get_filename_max_length") - def test_unique_filename( - self, get_filename_max_length, orig_name, unique_on_attempt, expected - ): + @mock.patch('httpie.downloads.get_filename_max_length') + def test_unique_filename(self, get_filename_max_length, + orig_name, unique_on_attempt, + expected): def attempts(unique_on_attempt=0): # noinspection PyUnresolvedReferences,PyUnusedLocal @@ -142,50 +123,39 @@ def exists(filename): class TestDownloads: def test_actual_download(self, httpbin_both, httpbin): - robots_txt = "/robots.txt" + robots_txt = '/robots.txt' body = urlopen(httpbin + robots_txt).read().decode() - env = MockEnvironment( - stdin_isatty=True, stdout_isatty=False, show_displays=True - ) - r = http("--download", httpbin_both.url + robots_txt, env=env) - assert "Downloading" in r.stderr + env = MockEnvironment(stdin_isatty=True, stdout_isatty=False, show_displays=True) + r = http('--download', httpbin_both.url + robots_txt, env=env) + assert 'Downloading' in r.stderr assert body == r - def test_download_with_gzip_content_encoding(self, http_server, tmp_path): - orig_cwd = os.getcwd() - os.chdir(tmp_path) - try: - r = http("--download", f"http://{http_server}/gzip") - assert r.exit_status == ExitStatus.SUCCESS - with open("gzip", "rb") as f: - assert gzip.decompress(f.read()) == b"Hello, world!" - finally: - os.chdir(orig_cwd) - def test_download_with_Content_Length(self, mock_env, httpbin_both): - with open(os.devnull, "w") as devnull: + with open(os.devnull, 'w') as devnull: downloader = Downloader(mock_env, output_file=devnull) downloader.start( - initial_url="/", + initial_url='/', final_response=Response( - url=httpbin_both.url + "/", headers={"Content-Length": 10} - ), + url=httpbin_both.url + '/', + headers={'Content-Length': 10} + ) ) time.sleep(1.1) - downloader.chunk_downloaded(b"12345") + downloader.chunk_downloaded(b'12345') time.sleep(1.1) - downloader.chunk_downloaded(b"12345") + downloader.chunk_downloaded(b'12345') downloader.finish() assert not downloader.interrupted def test_download_no_Content_Length(self, mock_env, httpbin_both): - with open(os.devnull, "w") as devnull: + with open(os.devnull, 'w') as devnull: downloader = Downloader(mock_env, output_file=devnull) downloader.start( - final_response=Response(url=httpbin_both.url + "/"), initial_url="/" + final_response=Response(url=httpbin_both.url + '/'), + initial_url='/' ) time.sleep(1.1) - downloader.chunk_downloaded(b"12345") + downloader.chunk_downloaded(b'12345') downloader.finish() assert not downloader.interrupted @@ -194,96 +164,98 @@ def test_download_output_from_content_disposition(self, mock_env, httpbin_both): orig_cwd = os.getcwd() os.chdir(tmp_dirname) try: - assert not os.path.isfile("filename.bin") + assert not os.path.isfile('filename.bin') downloader = Downloader(mock_env) downloader.start( final_response=Response( - url=httpbin_both.url + "/", + url=httpbin_both.url + '/', headers={ - "Content-Length": 5, - "Content-Disposition": 'attachment; filename="filename.bin"', - }, + 'Content-Length': 5, + 'Content-Disposition': 'attachment; filename="filename.bin"', + } ), - initial_url="/", + initial_url='/' ) - downloader.chunk_downloaded(b"12345") + downloader.chunk_downloaded(b'12345') downloader.finish() downloader.failed() # Stop the reporter assert not downloader.interrupted # TODO: Auto-close the file in that case? downloader._output_file.close() - assert os.path.isfile("filename.bin") + assert os.path.isfile('filename.bin') finally: os.chdir(orig_cwd) def test_download_interrupted(self, mock_env, httpbin_both): - with open(os.devnull, "w") as devnull: + with open(os.devnull, 'w') as devnull: downloader = Downloader(mock_env, output_file=devnull) downloader.start( final_response=Response( - url=httpbin_both.url + "/", headers={"Content-Length": 5} + url=httpbin_both.url + '/', + headers={'Content-Length': 5} ), - initial_url="/", + initial_url='/' ) - downloader.chunk_downloaded(b"1234") + downloader.chunk_downloaded(b'1234') downloader.finish() assert downloader.interrupted def test_download_resumed(self, mock_env, httpbin_both): with tempfile.TemporaryDirectory() as tmp_dirname: - file = os.path.join(tmp_dirname, "file.bin") - with open(file, "a"): + file = os.path.join(tmp_dirname, 'file.bin') + with open(file, 'a'): pass - with open(file, "a+b") as output_file: + with open(file, 'a+b') as output_file: # Start and interrupt the transfer after 3 bytes written downloader = Downloader(mock_env, output_file=output_file) downloader.start( final_response=Response( - url=httpbin_both.url + "/", headers={"Content-Length": 5} + url=httpbin_both.url + '/', + headers={'Content-Length': 5} ), - initial_url="/", + initial_url='/' ) - downloader.chunk_downloaded(b"123") + downloader.chunk_downloaded(b'123') downloader.finish() downloader.failed() assert downloader.interrupted # Write bytes - with open(file, "wb") as fh: - fh.write(b"123") + with open(file, 'wb') as fh: + fh.write(b'123') - with open(file, "a+b") as output_file: + with open(file, 'a+b') as output_file: # Resume the transfer downloader = Downloader(mock_env, output_file=output_file, resume=True) # Ensure `pre_request()` is working as expected too headers = {} downloader.pre_request(headers) - assert headers["Accept-Encoding"] == "identity" - assert headers["Range"] == "bytes=3-" + assert headers['Accept-Encoding'] == 'identity' + assert headers['Range'] == 'bytes=3-' downloader.start( final_response=Response( - url=httpbin_both.url + "/", - headers={"Content-Length": 5, "Content-Range": "bytes 3-4/5"}, - status_code=PARTIAL_CONTENT, + url=httpbin_both.url + '/', + headers={'Content-Length': 5, 'Content-Range': 'bytes 3-4/5'}, + status_code=PARTIAL_CONTENT ), - initial_url="/", + initial_url='/' ) - downloader.chunk_downloaded(b"45") + downloader.chunk_downloaded(b'45') downloader.finish() def test_download_with_redirect_original_url_used_for_filename(self, httpbin): # Redirect from `/redirect/1` to `/get`. - expected_filename = "1.json" + expected_filename = '1.json' orig_cwd = os.getcwd() with tempfile.TemporaryDirectory() as tmp_dirname: os.chdir(tmp_dirname) try: - assert os.listdir(".") == [] - http("--download", httpbin + "/redirect/1") - assert os.listdir(".") == [expected_filename] + assert os.listdir('.') == [] + http('--download', httpbin + '/redirect/1') + assert os.listdir('.') == [expected_filename] finally: os.chdir(orig_cwd) From 84d0f666a9dd55dcbc4d1b30fc6574af233d3424 Mon Sep 17 00:00:00 2001 From: Ali Nazzal Date: Tue, 12 Aug 2025 02:44:24 +0300 Subject: [PATCH 4/4] style(test): satisfy codestyle for gzip regression test --- tests/test_download_gzip_regression.py | 49 ++++++++++++++++++-------- 1 file changed, 34 insertions(+), 15 deletions(-) diff --git a/tests/test_download_gzip_regression.py b/tests/test_download_gzip_regression.py index e6e59c92fd..2223312c88 100644 --- a/tests/test_download_gzip_regression.py +++ b/tests/test_download_gzip_regression.py @@ -1,45 +1,65 @@ -import io +# tests/test_download_gzip_regression.py import gzip -import sys +import io import shutil import subprocess -from http.server import HTTPServer, BaseHTTPRequestHandler +import sys import threading +from http.server import BaseHTTPRequestHandler, HTTPServer + -# محتوى نضغطه gzip RAW = b"A" * 50000 buf = io.BytesIO() with gzip.GzipFile(fileobj=buf, mode="wb") as gz: gz.write(RAW) GZ = buf.getvalue() + class Handler(BaseHTTPRequestHandler): - def do_GET(self): - if self.path != "/file.gz": - self.send_error(404); return + def _send_common_headers(self, content_length: int) -> None: self.send_response(200) self.send_header("Content-Type", "application/octet-stream") self.send_header("Content-Encoding", "gzip") - self.send_header("Content-Length", str(len(GZ))) # طول النسخة المضغوطة + self.send_header("Content-Length", str(content_length)) self.end_headers() + + def do_GET(self) -> None: + if self.path != "/file.gz": + self.send_error(404) + return + self._send_common_headers(len(GZ)) self.wfile.write(GZ) - def log_message(self, *args): - pass # نسكت اللوج + def do_HEAD(self) -> None: + if self.path != "/file.gz": + self.send_error(404) + return + self._send_common_headers(len(GZ)) + + def do_POST(self) -> None: + if self.path != "/file.gz": + self.send_error(404) + return + self._send_common_headers(len(GZ)) + self.wfile.write(GZ) + + def log_message(self, *args) -> None: + pass + def start_server(): - srv = HTTPServer(("127.0.0.1", 0), Handler) # بورت حر + srv = HTTPServer(("127.0.0.1", 0), Handler) th = threading.Thread(target=srv.serve_forever, daemon=True) th.start() return srv, th + def test_download_gzip_content_length(tmp_path): srv, _ = start_server() try: url = f"http://127.0.0.1:{srv.server_port}/file.gz" out = tmp_path / "out.gz" - # نفضّل GET صراحةً ونستعمل --output exe = shutil.which("http") if exe: cmd = [exe, "GET", url, "--download", "--output", str(out)] @@ -47,13 +67,12 @@ def test_download_gzip_content_length(tmp_path): cmd = [sys.executable, "-m", "httpie", "GET", url, "--download", "--output", str(out)] p = subprocess.run(cmd, capture_output=True, text=True) - assert p.returncode == 0, f"stderr:\\n{p.stderr}\\nstdout:\\n{p.stdout}" - assert out.exists(), "output file not created" + assert p.returncode == 0, f"stderr:\n{p.stderr}\nstdout:\n{p.stdout}" + assert out.exists(), "output file not created" size = out.stat().st_size assert size == len(GZ), f"saved size {size} != expected {len(GZ)}" - # تحسّبي: نتأكد ماكو إشارات لتجاوز 100% أو incomplete combined = (p.stdout or "") + (p.stderr or "") assert ">100%" not in combined assert "Incomplete download" not in combined