Skip to content

Commit 2c7b705

Browse files
committed
Enhance PyodideHttpClient to support binary data handling and improve response processing
1 parent 8db4988 commit 2c7b705

File tree

1 file changed

+21
-17
lines changed

1 file changed

+21
-17
lines changed

datalab_kernel/backends/pyfetch.py

Lines changed: 21 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,10 @@ def _request(self, method: str, path: str, **kwargs) -> HttpResponse:
8383
"""Make a synchronous HTTP request using JavaScript XMLHttpRequest.
8484
8585
This works in pyodide because we can call synchronous JS APIs.
86+
87+
For binary data support, we use overrideMimeType("text/plain; charset=x-user-defined")
88+
which forces the browser to treat the response as binary, preserving all byte values.
89+
We then convert the response text to bytes using ord(c) & 0xFF for each character.
8690
"""
8791
import js # pyodide's JavaScript bridge
8892

@@ -101,11 +105,14 @@ def _request(self, method: str, path: str, **kwargs) -> HttpResponse:
101105
body = None
102106
if "content" in kwargs:
103107
body = kwargs["content"]
104-
# For binary content, we need to convert to Uint8Array
108+
# For binary content, use ArrayBuffer + Uint8Array
109+
# Blob doesn't work, and Uint8Array.new(list(...)) fails with std::string error
105110
if isinstance(body, bytes):
106-
import js
107-
108-
body = js.Uint8Array.new(list(body))
111+
buffer = js.ArrayBuffer.new(len(body))
112+
view = js.Uint8Array.new(buffer)
113+
for i, b in enumerate(body):
114+
view[i] = b
115+
body = view
109116
elif "json" in kwargs:
110117
body = json.dumps(kwargs["json"])
111118
xhr.setRequestHeader("Content-Type", "application/json")
@@ -115,6 +122,11 @@ def _request(self, method: str, path: str, **kwargs) -> HttpResponse:
115122
for key, value in kwargs["headers"].items():
116123
xhr.setRequestHeader(key, value)
117124

125+
# Force binary mode for response - this is critical for NPZ data
126+
# The charset=x-user-defined trick preserves all byte values 0x00-0xFF
127+
# in the low byte of each UTF-16 character.
128+
xhr.overrideMimeType("text/plain; charset=x-user-defined")
129+
118130
try:
119131
# Send request
120132
if body is not None:
@@ -129,19 +141,11 @@ def _request(self, method: str, path: str, **kwargs) -> HttpResponse:
129141
raise HttpError(0, "Network error - request blocked or server unreachable")
130142

131143
# Get response content as bytes
132-
# Use responseText for text, or response for binary
133-
try:
134-
# Try to get as bytes via ArrayBuffer
135-
if xhr.responseType == "arraybuffer" or hasattr(xhr.response, "byteLength"):
136-
import js
137-
138-
arr = js.Uint8Array.new(xhr.response)
139-
content = bytes(arr.to_py())
140-
else:
141-
# Fall back to text
142-
content = (xhr.responseText or "").encode("utf-8")
143-
except Exception:
144-
content = (xhr.responseText or "").encode("utf-8")
144+
# With overrideMimeType("text/plain; charset=x-user-defined"),
145+
# each byte is stored in the low byte of a UTF-16 character.
146+
# We extract it using ord(c) & 0xFF.
147+
response_text = xhr.responseText or ""
148+
content = bytes([ord(c) & 0xFF for c in response_text])
145149

146150
# Parse response headers
147151
response_headers = {}

0 commit comments

Comments
 (0)