-
-
Notifications
You must be signed in to change notification settings - Fork 181
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
New is_pickled_module()
function
#556
Open
leogama
wants to merge
2
commits into
uqfoundation:master
Choose a base branch
from
leogama:is-pickled-module
base: master
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
Show all changes
2 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
#!/usr/bin/env python | ||
# | ||
# Author: Leonardo Gama (@leogama) | ||
# Copyright (c) 2022 The Uncertainty Quantification Foundation. | ||
# License: 3-clause BSD. The full license text is available at: | ||
# - https://github.com/uqfoundation/dill/blob/master/LICENSE | ||
""" | ||
Auxiliary classes and functions used in more than one module, defined here to | ||
avoid circular import problems. | ||
""" | ||
|
||
import contextlib | ||
import io | ||
from contextlib import suppress | ||
|
||
|
||
## File-related utilities ## | ||
|
||
class _PeekableReader(contextlib.AbstractContextManager): | ||
"""lightweight readable stream wrapper that implements peek()""" | ||
def __init__(self, stream, closing=True): | ||
self.stream = stream | ||
self.closing = closing | ||
def __exit__(self, *exc_info): | ||
if self.closing: | ||
self.stream.close() | ||
def read(self, n): | ||
return self.stream.read(n) | ||
def readline(self): | ||
return self.stream.readline() | ||
def tell(self): | ||
return self.stream.tell() | ||
def close(self): | ||
return self.stream.close() | ||
def peek(self, n): | ||
stream = self.stream | ||
try: | ||
if hasattr(stream, 'flush'): | ||
stream.flush() | ||
position = stream.tell() | ||
stream.seek(position) # assert seek() works before reading | ||
chunk = stream.read(n) | ||
stream.seek(position) | ||
return chunk | ||
except (AttributeError, OSError): | ||
raise NotImplementedError("stream is not peekable: %r", stream) from None | ||
|
||
class _SeekableWriter(io.BytesIO, contextlib.AbstractContextManager): | ||
"""works as an unlimited buffer, writes to file on close""" | ||
def __init__(self, stream, closing=True, *args, **kwds): | ||
super().__init__(*args, **kwds) | ||
self.stream = stream | ||
self.closing = closing | ||
def __exit__(self, *exc_info): | ||
self.close() | ||
def close(self): | ||
self.stream.write(self.getvalue()) | ||
with suppress(AttributeError): | ||
self.stream.flush() | ||
super().close() | ||
if self.closing: | ||
self.stream.close() | ||
|
||
def _open(file, mode, *, peekable=False, seekable=False): | ||
"""return a context manager with an opened file-like object""" | ||
readonly = ('r' in mode and '+' not in mode) | ||
if not readonly and peekable: | ||
raise ValueError("the 'peekable' option is invalid for writable files") | ||
if readonly and seekable: | ||
raise ValueError("the 'seekable' option is invalid for read-only files") | ||
should_close = not hasattr(file, 'read' if readonly else 'write') | ||
if should_close: | ||
file = open(file, mode) | ||
# Wrap stream in a helper class if necessary. | ||
if peekable and not hasattr(file, 'peek'): | ||
# Try our best to return it as an object with a peek() method. | ||
if hasattr(file, 'seekable'): | ||
file_seekable = file.seekable() | ||
elif hasattr(file, 'seek') and hasattr(file, 'tell'): | ||
try: | ||
file.seek(file.tell()) | ||
file_seekable = True | ||
except Exception: | ||
file_seekable = False | ||
else: | ||
file_seekable = False | ||
if file_seekable: | ||
file = _PeekableReader(file, closing=should_close) | ||
else: | ||
try: | ||
file = io.BufferedReader(file) | ||
except Exception: | ||
# It won't be peekable, but will fail gracefully in _identify_module(). | ||
file = _PeekableReader(file, closing=should_close) | ||
elif seekable and ( | ||
not hasattr(file, 'seek') | ||
or not hasattr(file, 'truncate') | ||
or (hasattr(file, 'seekable') and not file.seekable()) | ||
): | ||
file = _SeekableWriter(file, closing=should_close) | ||
if should_close or isinstance(file, (_PeekableReader, _SeekableWriter)): | ||
return file | ||
else: | ||
return contextlib.nullcontext(file) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,20 +11,22 @@ | |
""" | ||
|
||
__all__ = [ | ||
'dump_module', 'load_module', 'load_module_asdict', | ||
'dump_module', 'load_module', 'load_module_asdict', 'is_pickled_module', | ||
'dump_session', 'load_session' # backward compatibility | ||
] | ||
|
||
import re | ||
import sys | ||
import warnings | ||
|
||
from dill import _dill, Pickler, Unpickler | ||
from dill import _dill | ||
from dill import Pickler, Unpickler, UnpicklingError | ||
from ._dill import ( | ||
BuiltinMethodType, FunctionType, MethodType, ModuleType, TypeType, | ||
_import_module, _is_builtin_module, _is_imported_module, _main_module, | ||
_reverse_typemap, __builtin__, | ||
) | ||
from ._utils import _open | ||
|
||
# Type hints. | ||
from typing import Optional, Union | ||
|
@@ -285,26 +287,95 @@ def _make_peekable(stream): | |
|
||
def _identify_module(file, main=None): | ||
"""identify the name of the module stored in the given file-type object""" | ||
from pickletools import genops | ||
UNICODE = {'UNICODE', 'BINUNICODE', 'SHORT_BINUNICODE'} | ||
found_import = False | ||
import pickletools | ||
NEUTRAL = {'PROTO', 'FRAME', 'PUT', 'BINPUT', 'MEMOIZE', 'MARK', 'STACK_GLOBAL'} | ||
try: | ||
for opcode, arg, pos in genops(file.peek(256)): | ||
if not found_import: | ||
if opcode.name in ('GLOBAL', 'SHORT_BINUNICODE') and \ | ||
arg.endswith('_import_module'): | ||
found_import = True | ||
else: | ||
if opcode.name in UNICODE: | ||
return arg | ||
else: | ||
raise UnpicklingError("reached STOP without finding main module") | ||
opcodes = ((opcode.name, arg) for opcode, arg, pos in pickletools.genops(file.peek(256)) | ||
if opcode.name not in NEUTRAL) | ||
opcode, arg = next(opcodes) | ||
if (opcode, arg) == ('SHORT_BINUNICODE', 'dill._dill'): | ||
# The file uses STACK_GLOBAL instead of GLOBAL. | ||
opcode, arg = next(opcodes) | ||
if not (opcode in ('SHORT_BINUNICODE', 'GLOBAL') and arg.split()[-1] == '_import_module'): | ||
raise ValueError | ||
opcode, arg = next(opcodes) | ||
if not opcode in ('SHORT_BINUNICODE', 'BINUNICODE', 'UNICODE'): | ||
raise ValueError | ||
module_name = arg | ||
if not ( | ||
next(opcodes)[0] in ('TUPLE1', 'TUPLE') and | ||
next(opcodes)[0] == 'REDUCE' and | ||
next(opcodes)[0] in ('EMPTY_DICT', 'DICT') | ||
): | ||
raise ValueError | ||
return module_name | ||
except StopIteration: | ||
raise UnpicklingError("reached STOP without finding module") from None | ||
except (NotImplementedError, ValueError) as error: | ||
# ValueError occours when the end of the chunk is reached (without a STOP). | ||
# ValueError also occours when the end of the chunk is reached (without a STOP). | ||
if isinstance(error, NotImplementedError) and main is not None: | ||
# file is not peekable, but we have main. | ||
# The file is not peekable, but we have the argument main. | ||
return None | ||
raise UnpicklingError("unable to identify main module") from error | ||
raise UnpicklingError("unable to identify module") from error | ||
|
||
def is_pickled_module( | ||
filename, importable: bool = True, identify: bool = False | ||
) -> Union[bool, str]: | ||
"""Check if a file can be loaded with :func:`load_module`. | ||
|
||
Check if the file is a pickle file generated with :func:`dump_module`, | ||
and thus can be loaded with :func:`load_module`. | ||
|
||
Parameters: | ||
filename: a path-like object or a readable stream. | ||
importable: expected kind of the file's saved module. Use `True` for | ||
importable modules (the default) or `False` for module-type objects. | ||
identify: if `True`, return the module name if the test succeeds. | ||
|
||
Returns: | ||
`True` if the pickle file at ``filename`` was generated with | ||
:func:`dump_module` **AND** the module whose state is saved in it is | ||
of the kind specified by the ``importable`` argument. `False` otherwise. | ||
If `identify` is set, return the name of the module instead of `True`. | ||
|
||
Examples: | ||
Create three types of pickle files: | ||
|
||
>>> import dill | ||
>>> import types | ||
>>> dill.dump_module('module_session.pkl') # saves __main__ | ||
>>> dill.dump_module('module_object.pkl', module=types.ModuleType('example')) | ||
>>> with open('common_object.pkl', 'wb') as file: | ||
>>> dill.dump('example', file) | ||
|
||
Test each file's kind: | ||
|
||
>>> dill.is_pickled_module('module_session.pkl') # the module is importable | ||
True | ||
>>> dill.is_pickled_module('module_session.pkl', importable=False) | ||
False | ||
>>> dill.is_pickled_module('module_object.pkl') # the module is not importable | ||
False | ||
>>> dill.is_pickled_module('module_object.pkl', importable=False) | ||
True | ||
>>> dill.is_pickled_module('module_object.pkl', importable=False, identify=True) | ||
'example' | ||
>>> dill.is_pickled_module('common_object.pkl') # always return False | ||
False | ||
>>> dill.is_pickled_module('common_object.pkl', importable=False) | ||
False | ||
""" | ||
with _open(filename, 'rb', peekable=True) as file: | ||
try: | ||
pickle_main = _identify_module(file) | ||
except UnpicklingError: | ||
return False | ||
is_runtime_mod = pickle_main.startswith('__runtime__.') | ||
res = importable ^ is_runtime_mod | ||
if res and identify: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this function could be named |
||
return pickle_main.partition('.')[-1] if is_runtime_mod else pickle_main | ||
else: | ||
return res | ||
|
||
def load_module( | ||
filename = str(TEMPDIR/'session.pkl'), | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
#!/usr/bin/env python | ||
|
||
# Author: Leonardo Gama (@leogama) | ||
# Copyright (c) 2022 The Uncertainty Quantification Foundation. | ||
# License: 3-clause BSD. The full license text is available at: | ||
# - https://github.com/uqfoundation/dill/blob/master/LICENSE | ||
|
||
"""test general utilities in _utils.py""" | ||
|
||
import io | ||
import os | ||
|
||
from dill import _utils | ||
|
||
def test_open(): | ||
file_unpeekable = open(__file__, 'rb', buffering=0) | ||
assert not hasattr(file_unpeekable, 'peek') | ||
|
||
content = file_unpeekable.read() | ||
peeked_chars = content[:10] | ||
first_line = content[:100].partition(b'\n')[0] + b'\n' | ||
file_unpeekable.seek(0) | ||
|
||
# Test _PeekableReader for seekable stream | ||
with _utils._open(file_unpeekable, 'r', peekable=True) as file: | ||
assert isinstance(file, _utils._PeekableReader) | ||
assert file.peek(10)[:10] == peeked_chars | ||
assert file.readline() == first_line | ||
assert not file_unpeekable.closed | ||
file_unpeekable.close() | ||
|
||
_pipe_r, _pipe_w = os.pipe() | ||
pipe_r = io.FileIO(_pipe_r, closefd=False) | ||
pipe_w = io.FileIO(_pipe_w, mode='w') | ||
assert not hasattr(pipe_r, 'peek') | ||
assert not pipe_r.seekable() | ||
assert not pipe_w.seekable() | ||
|
||
# Test io.BufferedReader for unseekable stream | ||
with _utils._open(pipe_r, 'r', peekable=True) as file: | ||
assert isinstance(file, io.BufferedReader) | ||
pipe_w.write(content[:100]) | ||
assert file.peek(10)[:10] == peeked_chars | ||
assert file.readline() == first_line | ||
assert not pipe_r.closed | ||
|
||
# Test _SeekableWriter for unseekable stream | ||
with _utils._open(pipe_w, 'w', seekable=True) as file: | ||
# pipe_r is closed here for some reason... | ||
assert isinstance(file, _utils._SeekableWriter) | ||
file.write(content) | ||
file.flush() | ||
file.seek(0) | ||
file.truncate() | ||
file.write(b'a line of text\n') | ||
assert not pipe_w.closed | ||
pipe_r = io.FileIO(_pipe_r) | ||
assert pipe_r.readline() == b'a line of text\n' | ||
pipe_r.close() | ||
pipe_w.close() | ||
|
||
if __name__ == '__main__': | ||
test_open() |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This doc is slightly confusing, especially if people might think that the file itself is importable (without first unpickling). Fundamentally, it's either a file-like module object or a module class instance... so is something in that vein a better name? It's a bit of an unusual thing for people to think about, so lets give the name some extra thought.