diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b1f255f4..dbd541c4 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -22,7 +22,6 @@ repos: - repo: https://github.com/charliermarsh/ruff-pre-commit rev: v0.0.261 - hooks: - id: ruff # args: [ --fix, --exit-non-zero-on-fix ] @@ -31,9 +30,7 @@ repos: rev: 23.3.0 hooks: - id: black - language_version: python3 args: - - --diff - --skip-string-normalization - repo: https://github.com/codespell-project/codespell diff --git a/docs/source/conf.py b/docs/source/conf.py index 7189a2f3..80bc32de 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -27,7 +27,7 @@ # -- General configuration ------------------------------------------------ # If your documentation needs a minimal Sphinx version, state it here. -#needs_sphinx = '1.0' +# needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom @@ -50,7 +50,7 @@ source_suffix = '.rst' # The encoding of source files. -#source_encoding = 'utf-8-sig' +# source_encoding = 'utf-8-sig' # The master toctree document. master_doc = 'index' @@ -70,13 +70,13 @@ # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. -#language = None +# language = None # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: -#today = '' +# today = '' # Else, today_fmt is used as the format for a strftime call. -#today_fmt = '%B %d, %Y' +# today_fmt = '%B %d, %Y' # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. @@ -84,10 +84,10 @@ # The reST default role (used for this markup: `text`) to use for all # documents. -#default_role = None +# default_role = None # If true, '()' will be appended to :func: etc. cross-reference text. -#add_function_parentheses = True +# add_function_parentheses = True # If true, the current module name will be prepended to all description # unit titles (such as .. function::). @@ -95,16 +95,16 @@ # If true, sectionauthor and moduleauthor directives will be shown in the # output. They are ignored by default. -#show_authors = False +# show_authors = False # The name of the Pygments (syntax highlighting) style to use. pygments_style = 'sphinx' # A list of ignored prefixes for module index sorting. -#modindex_common_prefix = [] +# modindex_common_prefix = [] # If true, keep warnings as "system message" paragraphs in the built documents. -#keep_warnings = False +# keep_warnings = False # -- Options for HTML output ---------------------------------------------- @@ -133,26 +133,26 @@ # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. -#html_theme_options = {} +# html_theme_options = {} # Add any paths that contain custom themes here, relative to this directory. -#html_theme_path = [] +# html_theme_path = [] # The name for this set of Sphinx documents. If None, it defaults to # " v documentation". -#html_title = None +# html_title = None # A shorter title for the navigation bar. Default is the same as html_title. -#html_short_title = None +# html_short_title = None # The name of an image file (relative to this directory) to place at the top # of the sidebar. -#html_logo = '_static/ia.png' +# html_logo = '_static/ia.png' # The name of an image file (within the static path) to use as favicon of the # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. -#html_favicon = None +# html_favicon = None # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, @@ -162,48 +162,48 @@ # Add any extra paths that contain custom files (such as robots.txt or # .htaccess) here, relative to this directory. These files are copied # directly to the root of the documentation. -#html_extra_path = [] +# html_extra_path = [] # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, # using the given strftime format. -#html_last_updated_fmt = '%b %d, %Y' +# html_last_updated_fmt = '%b %d, %Y' # If true, SmartyPants will be used to convert quotes and dashes to # typographically correct entities. -#html_use_smartypants = True +# html_use_smartypants = True # Custom sidebar templates, maps document names to template names. -#html_sidebars = {} +# html_sidebars = {} # Additional templates that should be rendered to pages, maps page names to # template names. -#html_additional_pages = {} +# html_additional_pages = {} # If false, no module index is generated. -#html_domain_indices = True +# html_domain_indices = True # If false, no index is generated. -#html_use_index = True +# html_use_index = True # If true, the index is split into individual pages for each letter. -#html_split_index = False +# html_split_index = False # If true, links to the reST sources are added to the pages. -#html_show_sourcelink = True +# html_show_sourcelink = True # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. -#html_show_sphinx = True +# html_show_sphinx = True # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. -#html_show_copyright = True +# html_show_copyright = True # If true, an OpenSearch description file will be output, and all pages will # contain a tag referring to it. The value of this option must be the # base URL from which the finished HTML is served. -#html_use_opensearch = '' +# html_use_opensearch = '' # This is the file name suffix for HTML files (e.g. ".xhtml"). -#html_file_suffix = None +# html_file_suffix = None # Output file base name for HTML help builder. htmlhelp_basename = 'internetarchivedoc' @@ -212,43 +212,46 @@ # -- Options for LaTeX output --------------------------------------------- latex_elements: dict[str, str] = { -# The paper size ('letterpaper' or 'a4paper'). -#'papersize': 'letterpaper', - -# The font size ('10pt', '11pt' or '12pt'). -#'pointsize': '10pt', - -# Additional stuff for the LaTeX preamble. -#'preamble': '', + # The paper size ('letterpaper' or 'a4paper'). + #'papersize': 'letterpaper', + # The font size ('10pt', '11pt' or '12pt'). + #'pointsize': '10pt', + # Additional stuff for the LaTeX preamble. + #'preamble': '', } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, # author, documentclass [howto/manual]). latex_documents = [ - ('index', 'internetarchive.tex', 'internetarchive Documentation', - 'Jacob M. Johnson', 'manual'), + ( + 'index', + 'internetarchive.tex', + 'internetarchive Documentation', + 'Jacob M. Johnson', + 'manual', + ), ] # The name of an image file (relative to this directory) to place at the top of # the title page. -#latex_logo = None +# latex_logo = None # For "manual" documents, if this is true, then toplevel headings are parts, # not chapters. -#latex_use_parts = False +# latex_use_parts = False # If true, show page references after internal links. -#latex_show_pagerefs = False +# latex_show_pagerefs = False # If true, show URL addresses after external links. -#latex_show_urls = False +# latex_show_urls = False # Documents to append as an appendix to all manuals. -#latex_appendices = [] +# latex_appendices = [] # If false, no module index is generated. -#latex_domain_indices = True +# latex_domain_indices = True # -- Options for manual page output --------------------------------------- @@ -256,12 +259,17 @@ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [ - ('index', 'internetarchive', 'internetarchive Documentation', - ['Jacob M. Johnson'], 1) + ( + 'index', + 'internetarchive', + 'internetarchive Documentation', + ['Jacob M. Johnson'], + 1, + ) ] # If true, show URL addresses after external links. -#man_show_urls = False +# man_show_urls = False # -- Options for Texinfo output ------------------------------------------- @@ -270,26 +278,32 @@ # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ - ('index', 'internetarchive', 'internetarchive Documentation', - 'Jacob M. Johnson', 'internetarchive', 'One line description of project.', - 'Miscellaneous'), + ( + 'index', + 'internetarchive', + 'internetarchive Documentation', + 'Jacob M. Johnson', + 'internetarchive', + 'One line description of project.', + 'Miscellaneous', + ), ] # Documents to append as an appendix to all manuals. -#texinfo_appendices = [] +# texinfo_appendices = [] # If false, no module index is generated. -#texinfo_domain_indices = True +# texinfo_domain_indices = True # How to display URL addresses: 'footnote', 'no', or 'inline'. -#texinfo_show_urls = 'footnote' +# texinfo_show_urls = 'footnote' # If true, do not generate a @detailmenu in the "Top" node's menu. -#texinfo_no_detailmenu = False +# texinfo_no_detailmenu = False -#autodoc_member_order = 'bysource' +# autodoc_member_order = 'bysource' intersphinx_mapping = { 'python': ('https://docs.python.org/3', None), - 'requests': ('http://docs.python-requests.org/en/latest/', None) + 'requests': ('http://docs.python-requests.org/en/latest/', None), } diff --git a/internetarchive/__init__.py b/internetarchive/__init__.py index 473922e5..fe658f05 100644 --- a/internetarchive/__init__.py +++ b/internetarchive/__init__.py @@ -61,14 +61,12 @@ __all__ = [ '__version__', - # Classes. 'ArchiveSession', 'Item', 'File', 'Search', 'Catalog', - # API. 'get_item', 'get_files', diff --git a/internetarchive/auth.py b/internetarchive/auth.py index f6e7dc1e..f007ae48 100644 --- a/internetarchive/auth.py +++ b/internetarchive/auth.py @@ -34,6 +34,7 @@ class S3Auth(AuthBase): """Attaches S3 Basic Authentication to the given Request object.""" + def __init__(self, access_key: str | None = None, secret_key: str | None = None): self.access_key = access_key self.secret_key = secret_key @@ -41,15 +42,18 @@ def __init__(self, access_key: str | None = None, secret_key: str | None = None) def __call__(self, r): if not self.access_key: if self.secret_key: - raise AuthenticationError('No access_key set!' - ' Have you run `ia configure`?') + raise AuthenticationError( + 'No access_key set! Have you run `ia configure`?' + ) if not self.secret_key: if self.access_key: - raise AuthenticationError('No secret_key set!' - ' Have you run `ia configure`?') + raise AuthenticationError( + 'No secret_key set! Have you run `ia configure`?' + ) else: - raise AuthenticationError('No access_key or secret_key set!' - ' Have you run `ia configure`?') + raise AuthenticationError( + 'No access_key or secret_key set! Have you run `ia configure`?' + ) auth_str = f'LOW {self.access_key}:{self.secret_key}' r.headers['Authorization'] = auth_str @@ -58,6 +62,7 @@ def __call__(self, r): class S3PostAuth(AuthBase): """Attaches S3 Basic Authentication to the given Request object.""" + def __init__(self, access_key: str | None = None, secret_key: str | None = None): self.access_key = access_key self.secret_key = secret_key diff --git a/internetarchive/catalog.py b/internetarchive/catalog.py index d8ddd483..6ab20731 100644 --- a/internetarchive/catalog.py +++ b/internetarchive/catalog.py @@ -126,10 +126,9 @@ def make_tasks_request(self, params: Mapping | None) -> Response: :returns: :class:`requests.Response` """ - r = self.session.get(self.url, - params=params, - auth=self.auth, - **self.request_kwargs) + r = self.session.get( + self.url, params=params, auth=self.auth, **self.request_kwargs + ) try: r.raise_for_status() except HTTPError as exc: @@ -177,7 +176,9 @@ def get_rate_limit(self, cmd: str = 'derive.php'): j = json.loads(line) return j - def get_tasks(self, identifier: str = "", params: dict | None = None) -> list[CatalogTask]: + def get_tasks( + self, identifier: str = "", params: dict | None = None + ) -> list[CatalogTask]: """Get a list of all tasks meeting all criteria. The list is ordered by submission time. @@ -216,11 +217,15 @@ def get_tasks(self, identifier: str = "", params: dict | None = None) -> list[Ca all_tasks = sorted(tasks, key=sort_by_date, reverse=True) return all_tasks - def submit_task(self, identifier: str, cmd: str, - comment: str | None = None, - priority: int = 0, - data: dict | None = None, - headers: dict | None = None) -> Response: + def submit_task( + self, + identifier: str, + cmd: str, + comment: str | None = None, + priority: int = 0, + data: dict | None = None, + headers: dict | None = None, + ) -> Response: """Submit an archive.org task. :param identifier: Item identifier. @@ -252,11 +257,9 @@ def submit_task(self, identifier: str, cmd: str, data['args'] = {'comment': comment} if priority: data['priority'] = priority - r = self.session.post(self.url, - json=data, - auth=self.auth, - headers=headers, - **self.request_kwargs) + r = self.session.post( + self.url, json=data, auth=self.auth, headers=headers, **self.request_kwargs + ) return r @@ -264,6 +267,7 @@ class CatalogTask: """This class represents an Archive.org catalog task. It is primarily used by :class:`Catalog`, and should not be used directly. """ + def __init__(self, task_dict: Mapping, catalog_obj: Catalog): self.session = catalog_obj.session self.request_kwargs = catalog_obj.request_kwargs @@ -274,11 +278,13 @@ def __init__(self, task_dict: Mapping, catalog_obj: Catalog): def __repr__(self): color = self.task_dict.get('color', 'done') - return ('CatalogTask(identifier={identifier},' - ' task_id={task_id!r}, server={server!r},' - ' cmd={cmd!r},' - ' submitter={submitter!r},' - ' color={task_color!r})'.format(task_color=color, **self.task_dict)) + return ( + 'CatalogTask(identifier={identifier},' + ' task_id={task_id!r}, server={server!r},' + ' cmd={cmd!r},' + ' submitter={submitter!r},' + ' color={task_color!r})'.format(task_color=color, **self.task_dict) + ) def __getitem__(self, key: str): """Dict-like access provided as backward compatibility.""" @@ -302,7 +308,7 @@ def task_log(self) -> str: def get_task_log( task_id: int | str | None, session: ia_session.ArchiveSession, - request_kwargs: Mapping | None = None + request_kwargs: Mapping | None = None, ) -> str: """Static method for getting a task log, given a task_id. diff --git a/internetarchive/cli/argparser.py b/internetarchive/cli/argparser.py index 1466c553..ac390ee9 100644 --- a/internetarchive/cli/argparser.py +++ b/internetarchive/cli/argparser.py @@ -31,7 +31,9 @@ from urllib.parse import parse_qsl -def get_args_dict(args: list[str], query_string: bool = False, header: bool = False) -> dict: +def get_args_dict( + args: list[str], query_string: bool = False, header: bool = False +) -> dict: args = args or [] metadata: dict[str, list | str] = defaultdict(list) for md in args: diff --git a/internetarchive/cli/ia.py b/internetarchive/cli/ia.py index 8e044c36..40ff8876 100755 --- a/internetarchive/cli/ia.py +++ b/internetarchive/cli/ia.py @@ -102,8 +102,7 @@ def load_ia_module(cmd: str): return ep.load() raise ImportError except (ImportError, DistributionNotFound): - print(f"error: '{cmd}' is not an ia command! See 'ia help'", - file=sys.stderr) + print(f"error: '{cmd}' is not an ia command! See 'ia help'", file=sys.stderr) matches = '\t'.join(difflib.get_close_matches(cmd, cmd_aliases.values())) if matches: print(f'\nDid you mean one of these?\n\t{matches}', file=sys.stderr) @@ -115,13 +114,15 @@ def main() -> None: args = docopt(__doc__, version=__version__, options_first=True) # Validate args. - s = Schema({ - str: bool, - '--config-file': Or(None, str), - '--host': Or(None, str), - '': list, - '': Or(str, lambda _: 'help'), - }) + s = Schema( + { + str: bool, + '--config-file': Or(None, str), + '--host': Or(None, str), + '': list, + '': Or(str, lambda _: 'help'), + } + ) try: args = s.validate(args) except SchemaError as exc: @@ -142,8 +143,10 @@ def main() -> None: if cmd != 'configure' and args['--config-file']: if not os.path.isfile(args['--config-file']): - print(f'--config-file should be a readable file.\n{printable_usage(__doc__)}', - file=sys.stderr) + print( + f'--config-file should be a readable file.\n{printable_usage(__doc__)}', + file=sys.stderr, + ) sys.exit(1) argv = [cmd] + args[''] @@ -162,9 +165,9 @@ def main() -> None: else: config['general'] = {'host': args['--host']} - session = get_session(config_file=args['--config-file'], - config=config, - debug=args['--debug']) + session = get_session( + config_file=args['--config-file'], config=config, debug=args['--debug'] + ) ia_module = load_ia_module(cmd) try: diff --git a/internetarchive/cli/ia_configure.py b/internetarchive/cli/ia_configure.py index cb313087..b0a22109 100644 --- a/internetarchive/cli/ia_configure.py +++ b/internetarchive/cli/ia_configure.py @@ -52,24 +52,35 @@ def main(argv: list[str], session: ArchiveSession) -> None: sig = session.config.get('cookies', {}).get('logged-in-sig') if not user or not sig: if not user and not sig: - print('error: "logged-in-user" and "logged-in-sig" cookies ' - 'not found in config file, try reconfiguring.', file=sys.stderr) + print( + 'error: "logged-in-user" and "logged-in-sig" cookies ' + 'not found in config file, try reconfiguring.', + file=sys.stderr, + ) elif not user: - print('error: "logged-in-user" cookie not found in config file, ' - 'try reconfiguring.', file=sys.stderr) + print( + 'error: "logged-in-user" cookie not found in config file, ' + 'try reconfiguring.', + file=sys.stderr, + ) elif not sig: - print('error: "logged-in-sig" cookie not found in config file, ' - 'try reconfiguring.', file=sys.stderr) + print( + 'error: "logged-in-sig" cookie not found in config file, ' + 'try reconfiguring.', + file=sys.stderr, + ) sys.exit(1) print(f'logged-in-user={user}; logged-in-sig={sig}') sys.exit() try: # CLI params. if args['--username'] and args['--password']: - config_file_path = configure(args['--username'], - args['--password'], - config_file=session.config_file, - host=session.host) + config_file_path = configure( + args['--username'], + args['--password'], + config_file=session.config_file, + host=session.host, + ) print(f'Config saved to: {config_file_path}', file=sys.stderr) # Netrc @@ -78,20 +89,26 @@ def main(argv: list[str], session: ArchiveSession) -> None: try: n = netrc.netrc() except netrc.NetrcParseError as exc: - print('error: netrc.netrc() cannot parse your .netrc file.', file=sys.stderr) + print( + 'error: netrc.netrc() cannot parse your .netrc file.', + file=sys.stderr, + ) sys.exit(1) username, _, password = n.hosts['archive.org'] - config_file_path = configure(username, - password or "", - config_file=session.config_file, - host=session.host) + config_file_path = configure( + username, + password or "", + config_file=session.config_file, + host=session.host, + ) print(f'Config saved to: {config_file_path}', file=sys.stderr) # Interactive input. else: print("Enter your Archive.org credentials below to configure 'ia'.\n") - config_file_path = configure(config_file=session.config_file, - host=session.host) + config_file_path = configure( + config_file=session.config_file, host=session.host + ) print(f'\nConfig saved to: {config_file_path}') except AuthenticationError as exc: diff --git a/internetarchive/cli/ia_copy.py b/internetarchive/cli/ia_copy.py index 8b3ee11a..eff250e4 100644 --- a/internetarchive/cli/ia_copy.py +++ b/internetarchive/cli/ia_copy.py @@ -69,30 +69,53 @@ def main( try: assert src_path != dest_path except AssertionError: - print('error: The source and destination files cannot be the same!', - file=sys.stderr) + print( + 'error: The source and destination files cannot be the same!', + file=sys.stderr, + ) sys.exit(1) global SRC_ITEM SRC_ITEM = session.get_item(src_path.split('/')[0]) # type: ignore # Validate args. - s = Schema({ - str: Use(bool), - '/': And(str, And(And(str, lambda x: '/' in x, - error='Destination not formatted correctly. See usage example.'), - assert_src_file_exists, error=( - f'https://{session.host}/download/{src_path} does not exist. ' - 'Please check the identifier and filepath and retry.'))), - '/': And(str, lambda x: '/' in x, - error='Destination not formatted correctly. See usage example.'), - '--metadata': Or(None, And(Use(get_args_dict), dict), - error='--metadata must be formatted as --metadata="key:value"'), - '--replace-metadata': Use(bool), - '--header': Or(None, And(Use(get_args_dict), dict), - error='--header must be formatted as --header="key:value"'), - '--ignore-file-metadata': Use(bool), - }) + s = Schema( + { + str: Use(bool), + '/': And( + str, + And( + And( + str, + lambda x: '/' in x, + error='Destination not formatted correctly. See usage example.', + ), + assert_src_file_exists, + error=( + f'https://{session.host}/download/{src_path} does not exist. ' + 'Please check the identifier and filepath and retry.' + ), + ), + ), + '/': And( + str, + lambda x: '/' in x, + error='Destination not formatted correctly. See usage example.', + ), + '--metadata': Or( + None, + And(Use(get_args_dict), dict), + error='--metadata must be formatted as --metadata="key:value"', + ), + '--replace-metadata': Use(bool), + '--header': Or( + None, + And(Use(get_args_dict), dict), + error='--header must be formatted as --header="key:value"', + ), + '--ignore-file-metadata': Use(bool), + } + ) try: args = s.validate(args) @@ -113,26 +136,32 @@ def main( # New metadata takes precedence over old metadata. if not args['--replace-metadata']: - args['--metadata'] = merge_dictionaries(SRC_ITEM.metadata, # type: ignore - args['--metadata']) + args['--metadata'] = merge_dictionaries( + SRC_ITEM.metadata, args['--metadata'] # type: ignore + ) # File metadata is copied by default but can be dropped. file_metadata = None if args['--ignore-file-metadata'] else SRC_FILE.metadata # type: ignore # Add keep-old-version by default. - if not args['--header'].get('x-archive-keep-old-version') and not args['--no-backup']: + if ( + not args['--header'].get('x-archive-keep-old-version') + and not args['--no-backup'] + ): args['--header']['x-archive-keep-old-version'] = '1' url = f'{session.protocol}//s3.us.archive.org/{quote(dest_path)}' queue_derive = True if args['--no-derive'] is False else False - req = ia.iarequest.S3Request(url=url, - method='PUT', - metadata=args['--metadata'], - file_metadata=file_metadata, - headers=args['--header'], - queue_derive=queue_derive, - access_key=session.access_key, - secret_key=session.secret_key) + req = ia.iarequest.S3Request( + url=url, + method='PUT', + metadata=args['--metadata'], + file_metadata=file_metadata, + headers=args['--header'], + queue_derive=queue_derive, + access_key=session.access_key, + secret_key=session.secret_key, + ) p = req.prepare() r = session.send(p) if r.status_code != 200: @@ -140,7 +169,10 @@ def main( msg = get_s3_xml_text(r.text) except Exception as e: msg = r.text - print(f'error: failed to {cmd} "{src_path}" to "{dest_path}" - {msg}', file=sys.stderr) + print( + f'error: failed to {cmd} "{src_path}" to "{dest_path}" - {msg}', + file=sys.stderr, + ) sys.exit(1) elif cmd == 'copy': print(f'success: copied "{src_path}" to "{dest_path}".', file=sys.stderr) diff --git a/internetarchive/cli/ia_delete.py b/internetarchive/cli/ia_delete.py index cd2d81d9..969d0400 100644 --- a/internetarchive/cli/ia_delete.py +++ b/internetarchive/cli/ia_delete.py @@ -58,22 +58,29 @@ def main(argv, session: ArchiveSession) -> None: args = docopt(__doc__, argv=argv) # Validation error messages. - invalid_id_msg = (' should be between 3 and 80 characters in length, and ' - 'can only contain alphanumeric characters, underscores ( _ ), or ' - 'dashes ( - )') + invalid_id_msg = ( + ' should be between 3 and 80 characters in length, and ' + 'can only contain alphanumeric characters, underscores ( _ ), or ' + 'dashes ( - )' + ) # Validate args. - s = Schema({ - str: Use(bool), - '': list, - '--format': list, - '--header': Or(None, And(Use(get_args_dict), dict), - error='--header must be formatted as --header="key:value"'), - '--glob': list, - 'delete': bool, - '--retries': Use(lambda i: int(i[0])), - '': str, - }) + s = Schema( + { + str: Use(bool), + '': list, + '--format': list, + '--header': Or( + None, + And(Use(get_args_dict), dict), + error='--header must be formatted as --header="key:value"', + ), + '--glob': list, + 'delete': bool, + '--retries': Use(lambda i: int(i[0])), + '': str, + } + ) try: args = s.validate(args) except SchemaError as exc: @@ -89,7 +96,10 @@ def main(argv, session: ArchiveSession) -> None: no_delete = ['_meta.xml', '_files.xml', '_meta.sqlite'] # Add keep-old-version by default. - if not args['--header'].get('x-archive-keep-old-version') and not args['--no-backup']: + if ( + not args['--header'].get('x-archive-keep-old-version') + and not args['--no-backup'] + ): args['--header']['x-archive-keep-old-version'] = '1' if verbose: @@ -128,10 +138,12 @@ def main(argv, session: ArchiveSession) -> None: print(f' will delete: {item.identifier}/{f.name}', file=sys.stderr) continue try: - resp = f.delete(verbose=verbose, - cascade_delete=args['--cascade'], - headers=args['--header'], - retries=args['--retries']) + resp = f.delete( + verbose=verbose, + cascade_delete=args['--cascade'], + headers=args['--header'], + retries=args['--retries'], + ) except requests.exceptions.RetryError as e: print(f' error: max retries exceeded for {f.name}', file=sys.stderr) errors = True diff --git a/internetarchive/cli/ia_download.py b/internetarchive/cli/ia_download.py index 335bb89b..2112eb4f 100644 --- a/internetarchive/cli/ia_download.py +++ b/internetarchive/cli/ia_download.py @@ -93,27 +93,39 @@ def main(argv, session: ArchiveSession) -> None: timeout_msg = '--timeout must be an int or float.' # Validate args. - s = Schema({ - str: Use(bool), - '--destdir': Or([], And(Use(lambda d: d[0]), dir_exists), error=destdir_msg), - '--format': list, - '--glob': Use(lambda item: item[0] if item else None), - '--exclude': Use(lambda item: item[0] if item else None), - '': list, - '--search': Or(str, None), - '--itemlist': Or(None, And(lambda f: os.path.isfile(f)), error=itemlist_msg), - '': Or(str, None), - '--retries': Use(lambda x: x[0]), - '--search-parameters': Use(lambda x: get_args_dict(x, query_string=True)), - '--on-the-fly': Use(bool), - '--no-change-timestamp': Use(bool), - '--download-history': Use(bool), - '--parameters': Use(lambda x: get_args_dict(x, query_string=True)), - '--source': list, - '--exclude-source': list, - '--timeout': Or([], And(Use(lambda t: ast.literal_eval(t[0])), Or(int, float), - error=timeout_msg)) - }) + s = Schema( + { + str: Use(bool), + '--destdir': Or( + [], And(Use(lambda d: d[0]), dir_exists), error=destdir_msg + ), + '--format': list, + '--glob': Use(lambda item: item[0] if item else None), + '--exclude': Use(lambda item: item[0] if item else None), + '': list, + '--search': Or(str, None), + '--itemlist': Or( + None, And(lambda f: os.path.isfile(f)), error=itemlist_msg + ), + '': Or(str, None), + '--retries': Use(lambda x: x[0]), + '--search-parameters': Use(lambda x: get_args_dict(x, query_string=True)), + '--on-the-fly': Use(bool), + '--no-change-timestamp': Use(bool), + '--download-history': Use(bool), + '--parameters': Use(lambda x: get_args_dict(x, query_string=True)), + '--source': list, + '--exclude-source': list, + '--timeout': Or( + [], + And( + Use(lambda t: ast.literal_eval(t[0])), + Or(int, float), + error=timeout_msg, + ), + ), + } + ) try: args = s.validate(args) @@ -122,7 +134,9 @@ def main(argv, session: ArchiveSession) -> None: elif args['--exclude'] and args['--format']: raise SchemaError(None, '--exclude and --format cannot be used together.') elif args['--exclude'] and not args['--glob']: - raise SchemaError(None, '--exclude should only be used in conjunction with --glob.') + raise SchemaError( + None, '--exclude should only be used in conjunction with --glob.' + ) except SchemaError as exc: print(f'{exc}\n{printable_usage(__doc__)}', file=sys.stderr) @@ -137,11 +151,15 @@ def main(argv, session: ArchiveSession) -> None: total_ids = len(ids) elif args['--search']: try: - _search = session.search_items(args['--search'], - params=args['--search-parameters']) + _search = session.search_items( + args['--search'], params=args['--search-parameters'] + ) total_ids = _search.num_found if total_ids == 0: - print(f'error: the query "{args["--search"]}" returned no results', file=sys.stderr) + print( + f'error: the query "{args["--search"]}" returned no results', + file=sys.stderr, + ) sys.exit(1) ids = _search except ValueError as e: @@ -179,7 +197,10 @@ def main(argv, session: ArchiveSession) -> None: try: item = session.get_item(identifier) except Exception as exc: - print(f'{identifier}: failed to retrieve item metadata - errors', file=sys.stderr) + print( + f'{identifier}: failed to retrieve item metadata - errors', + file=sys.stderr, + ) raise if 'You are attempting to make an HTTPS' in str(exc): print(f'\n{exc}', file=sys.stderr) diff --git a/internetarchive/cli/ia_list.py b/internetarchive/cli/ia_list.py index 334c493e..3870dc34 100644 --- a/internetarchive/cli/ia_list.py +++ b/internetarchive/cli/ia_list.py @@ -56,7 +56,9 @@ def main(argv, session: ArchiveSession) -> None: columns.remove('name') columns.insert(0, 'name') - dict_writer = csv.DictWriter(sys.stdout, columns, delimiter='\t', lineterminator='\n') + dict_writer = csv.DictWriter( + sys.stdout, columns, delimiter='\t', lineterminator='\n' + ) if args.get('--glob'): patterns = args['--glob'].split('|') @@ -72,7 +74,9 @@ def main(argv, session: ArchiveSession) -> None: if isinstance(val, (list, tuple, set)): val = ';'.join(val) if key == 'name' and args.get('--location'): - file_dict[key] = f'https://{session.host}/download/{item.identifier}/{val}' + file_dict[ + key + ] = f'https://{session.host}/download/{item.identifier}/{val}' else: file_dict[key] = val output.append(file_dict) diff --git a/internetarchive/cli/ia_metadata.py b/internetarchive/cli/ia_metadata.py index 8b435cab..1a625535 100644 --- a/internetarchive/cli/ia_metadata.py +++ b/internetarchive/cli/ia_metadata.py @@ -79,9 +79,15 @@ def modify_metadata(item: item.Item, metadata: Mapping, args: Mapping) -> Respon append_list = bool(args['--append-list']) insert = bool(args['--insert']) try: - r = item.modify_metadata(metadata, target=args['--target'], append=append, - priority=args['--priority'], append_list=append_list, - headers=args['--header'], insert=insert) + r = item.modify_metadata( + metadata, + target=args['--target'], + append=append, + priority=args['--priority'], + append_list=append_list, + headers=args['--header'], + insert=insert, + ) assert isinstance(r, Response) # mypy: modify_metadata() -> Request | Response except ItemLocateError as exc: print(f'{item.identifier} - error: {exc}', file=sys.stderr) @@ -89,7 +95,10 @@ def modify_metadata(item: item.Item, metadata: Mapping, args: Mapping) -> Respon if not r.json()['success']: error_msg = r.json()['error'] etype = 'warning' if 'no changes' in r.text else 'error' - print(f'{item.identifier} - {etype} ({r.status_code}): {error_msg}', file=sys.stderr) + print( + f'{item.identifier} - {etype} ({r.status_code}): {error_msg}', + file=sys.stderr, + ) return r print(f'{item.identifier} - success: {r.json()["log"]}', file=sys.stderr) return r @@ -100,7 +109,10 @@ def remove_metadata(item: item.Item, metadata: Mapping, args: Mapping) -> Respon for key in metadata: src_md = copy(item.metadata.get(key)) if not src_md: - print(f'{item.identifier}/metadata/{key} does not exist, skipping.', file=sys.stderr) + print( + f'{item.identifier}/metadata/{key} does not exist, skipping.', + file=sys.stderr, + ) continue if key == 'collection': @@ -115,23 +127,33 @@ def remove_metadata(item: item.Item, metadata: Mapping, args: Mapping) -> Respon r = item.remove_from_simplelist(c, 'holdings') j = r.json() if j.get('success'): - print(f'{item.identifier} - success: {item.identifier} no longer in {c}', - file=sys.stderr) + print( + f'{item.identifier} - success: {item.identifier} no longer in {c}', + file=sys.stderr, + ) sys.exit(0) elif j.get('error', '').startswith('no row to delete for'): - print(f'{item.identifier} - success: {item.identifier} no longer in {c}', - file=sys.stderr) + print( + f'{item.identifier} - success: {item.identifier} no longer in {c}', + file=sys.stderr, + ) sys.exit(0) else: - print(f'{item.identifier} - error: {j.get("error")}', file=sys.stderr) + print( + f'{item.identifier} - error: {j.get("error")}', + file=sys.stderr, + ) sys.exit(1) if not isinstance(src_md, list): if key == 'subject': src_md = src_md.split(';') elif key == 'collection': - print(f'{item.identifier} - error: all collections would be removed, ' - 'not submitting task.', file=sys.stderr) + print( + f'{item.identifier} - error: all collections would be removed, ' + 'not submitting task.', + file=sys.stderr, + ) sys.exit(1) if src_md == metadata[key]: @@ -155,11 +177,16 @@ def remove_metadata(item: item.Item, metadata: Mapping, args: Mapping) -> Respon md[key] = 'REMOVE_TAG' if md.get('collection') == []: - print(f'{item.identifier} - error: all collections would be removed, not submitting task.', - file=sys.stderr) + print( + f'{item.identifier} - error: all collections would be removed, not submitting task.', + file=sys.stderr, + ) sys.exit(1) elif not md: - print(f'{item.identifier} - warning: nothing needed to be removed.', file=sys.stderr) + print( + f'{item.identifier} - warning: nothing needed to be removed.', + file=sys.stderr, + ) sys.exit(0) r = modify_metadata(item, md, args) @@ -170,21 +197,31 @@ def main(argv: dict, session: session.ArchiveSession) -> None: args = docopt(__doc__, argv=argv) # Validate args. - s = Schema({ - str: bool, - '': list, - '--modify': list, - '--header': Or(None, And(Use(get_args_header_dict), dict), - error='--header must be formatted as --header="key:value"'), - '--append': list, - '--append-list': list, - '--insert': list, - '--remove': list, - '--spreadsheet': Or(None, And(lambda f: os.path.exists(f), - error=' should be a readable file or directory.')), - '--target': Or(None, str), - '--priority': Or(None, Use(int, error=' should be an integer.')), - }) + s = Schema( + { + str: bool, + '': list, + '--modify': list, + '--header': Or( + None, + And(Use(get_args_header_dict), dict), + error='--header must be formatted as --header="key:value"', + ), + '--append': list, + '--append-list': list, + '--insert': list, + '--remove': list, + '--spreadsheet': Or( + None, + And( + lambda f: os.path.exists(f), + error=' should be a readable file or directory.', + ), + ), + '--target': Or(None, str), + '--priority': Or(None, Use(int, error=' should be an integer.')), + } + ) try: args = s.validate(args) except SchemaError as exc: @@ -212,8 +249,13 @@ def main(argv: dict, session: session.ArchiveSession) -> None: sys.exit(1) # Modify metadata. - elif (args['--modify'] or args['--append'] or args['--append-list'] - or args['--remove'] or args['--insert']): + elif ( + args['--modify'] + or args['--append'] + or args['--append-list'] + or args['--remove'] + or args['--insert'] + ): if args['--modify']: metadata_args = args['--modify'] elif args['--append']: @@ -229,10 +271,12 @@ def main(argv: dict, session: session.ArchiveSession) -> None: if any('/' in k for k in metadata): metadata = get_args_dict_many_write(metadata) except ValueError: - print('error: The value of --modify, --remove, --append, --append-list ' - 'or --insert is invalid. It must be formatted as: ' - '--modify=key:value', - file=sys.stderr) + print( + 'error: The value of --modify, --remove, --append, --append-list ' + 'or --insert is invalid. It must be formatted as: ' + '--modify=key:value', + file=sys.stderr, + ) sys.exit(1) if args['--remove']: diff --git a/internetarchive/cli/ia_move.py b/internetarchive/cli/ia_move.py index b2c97972..eb497153 100644 --- a/internetarchive/cli/ia_move.py +++ b/internetarchive/cli/ia_move.py @@ -48,16 +48,27 @@ def main(argv, session: ArchiveSession) -> None: dest_path = args['/'] # Validate args. - s = Schema({ - str: Use(bool), - '--metadata': list, - '--header': Or(None, And(Use(get_args_dict), dict), - error='--header must be formatted as --header="key:value"'), - '/': And(str, lambda x: '/' in x, - error='Source not formatted correctly. See usage example.'), - '/': And(str, lambda x: '/' in x, - error='Destination not formatted correctly. See usage example.'), - }) + s = Schema( + { + str: Use(bool), + '--metadata': list, + '--header': Or( + None, + And(Use(get_args_dict), dict), + error='--header must be formatted as --header="key:value"', + ), + '/': And( + str, + lambda x: '/' in x, + error='Source not formatted correctly. See usage example.', + ), + '/': And( + str, + lambda x: '/' in x, + error='Destination not formatted correctly. See usage example.', + ), + } + ) try: args = s.validate(args) except SchemaError as exc: @@ -65,7 +76,10 @@ def main(argv, session: ArchiveSession) -> None: sys.exit(1) # Add keep-old-version by default. - if not args['--header'].get('x-archive-keep-old-version') and not args['--no-backup']: + if ( + not args['--header'].get('x-archive-keep-old-version') + and not args['--no-backup'] + ): args['--header']['x-archive-keep-old-version'] = '1' # First we use ia_copy, prep argv for ia_copy. diff --git a/internetarchive/cli/ia_reviews.py b/internetarchive/cli/ia_reviews.py index d06de76d..4b4280dc 100644 --- a/internetarchive/cli/ia_reviews.py +++ b/internetarchive/cli/ia_reviews.py @@ -59,9 +59,11 @@ def main(argv, session: ArchiveSession) -> None: item = session.get_item(args['']) if args['--delete']: - r = item.delete_review(username=args['--username'], - screenname=args['--screenname'], - itemname=args['--itemname']) + r = item.delete_review( + username=args['--username'], + screenname=args['--screenname'], + itemname=args['--itemname'], + ) elif not args['--body']: try: r = item.get_review() @@ -78,8 +80,10 @@ def main(argv, session: ArchiveSession) -> None: if j.get('success') or 'no change detected' in j.get('error', '').lower(): task_id = j.get('value', {}).get('task_id') if task_id: - print(f'{item.identifier} - success: https://catalogd.archive.org/log/{task_id}', - file=sys.stderr) + print( + f'{item.identifier} - success: https://catalogd.archive.org/log/{task_id}', + file=sys.stderr, + ) else: print(f'{item.identifier} - warning: no changes detected!', file=sys.stderr) sys.exit(0) diff --git a/internetarchive/cli/ia_search.py b/internetarchive/cli/ia_search.py index bc121216..007d8663 100644 --- a/internetarchive/cli/ia_search.py +++ b/internetarchive/cli/ia_search.py @@ -60,17 +60,23 @@ def main(argv, session: ArchiveSession | None = None) -> None: args = docopt(__doc__, argv=argv) # Validate args. - s = Schema({ - str: Use(bool), - '': Use(lambda x: ' '.join(x)), - '--parameters': Use(lambda x: get_args_dict(x, query_string=True)), - '--header': Or(None, And(Use(get_args_dict), dict), - error='--header must be formatted as --header="key:value"'), - '--sort': list, - '--field': list, - '--timeout': Use(lambda x: float(x[0]), - error='--timeout must be integer or float.') - }) + s = Schema( + { + str: Use(bool), + '': Use(lambda x: ' '.join(x)), + '--parameters': Use(lambda x: get_args_dict(x, query_string=True)), + '--header': Or( + None, + And(Use(get_args_dict), dict), + error='--header must be formatted as --header="key:value"', + ), + '--sort': list, + '--field': list, + '--timeout': Use( + lambda x: float(x[0]), error='--timeout must be integer or float.' + ), + } + ) try: args = s.validate(args) except SchemaError as exc: @@ -86,13 +92,16 @@ def main(argv, session: ArchiveSession | None = None) -> None: 'timeout': args['--timeout'], } - search = session.search_items(args[''], # type: ignore - fields=fields, - sorts=sorts, - params=args['--parameters'], - full_text_search=args['--fts'], - dsl_fts=args['--dsl-fts'], - request_kwargs=r_kwargs) + assert session # placate mypy + search = session.search_items( + args[''], # type: ignore + fields=fields, + sorts=sorts, + params=args['--parameters'], + full_text_search=args['--fts'], + dsl_fts=args['--dsl-fts'], + request_kwargs=r_kwargs, + ) try: if args['--num-found']: @@ -113,12 +122,17 @@ def main(argv, session: ArchiveSession | None = None) -> None: except ValueError as e: print(f'error: {e}', file=sys.stderr) except ConnectTimeout as exc: - print('error: Request timed out. Increase the --timeout and try again.', - file=sys.stderr) + print( + 'error: Request timed out. Increase the --timeout and try again.', + file=sys.stderr, + ) sys.exit(1) except ReadTimeout as exc: - print('error: The server timed out and failed to return all search results,' - ' please try again', file=sys.stderr) + print( + 'error: The server timed out and failed to return all search results,' + ' please try again', + file=sys.stderr, + ) sys.exit(1) except AuthenticationError as exc: print(f'error: {exc}', file=sys.stderr) diff --git a/internetarchive/cli/ia_tasks.py b/internetarchive/cli/ia_tasks.py index 9b61ffd7..3cdd8ccf 100644 --- a/internetarchive/cli/ia_tasks.py +++ b/internetarchive/cli/ia_tasks.py @@ -86,19 +86,23 @@ def main(argv, session: ArchiveSession) -> None: data = get_args_dict(args['--data'], query_string=True) task_args = get_args_dict(args['--task-args'], query_string=True) data['args'] = task_args - r = session.submit_task(args[''], - args['--cmd'], - comment=args['--comment'], - priority=int(data.get('priority', 0)), - reduced_priority=args['--reduced-priority'], - data=data) + r = session.submit_task( + args[''], + args['--cmd'], + comment=args['--comment'], + priority=int(data.get('priority', 0)), + reduced_priority=args['--reduced-priority'], + data=data, + ) j = r.json() if j.get('success'): task_log_url = j.get('value', {}).get('log') print(f'success: {task_log_url}', file=sys.stderr) sys.exit(0) elif 'already queued/running' in j.get('error', ''): - print(f'success: {args["--cmd"]} task already queued/running', file=sys.stderr) + print( + f'success: {args["--cmd"]} task already queued/running', file=sys.stderr + ) sys.exit(0) else: print(f'error: {j.get("error")}', file=sys.stderr) @@ -112,8 +116,11 @@ def main(argv, session: ArchiveSession) -> None: params = _params elif args['--get-task-log']: log = session.get_task_log(args['--get-task-log'], params) - print(log.encode('utf-8', errors='surrogateescape') - .decode('utf-8', errors='replace')) + print( + log.encode('utf-8', errors='surrogateescape').decode( + 'utf-8', errors='replace' + ) + ) sys.exit(0) queryable_params = [ @@ -128,20 +135,26 @@ def main(argv, session: ArchiveSession) -> None: 'submittime', ] - if not (args[''] - or params.get('task_id')): + if not (args[''] or params.get('task_id')): _params = {'catalog': 1, 'history': 0} _params.update(params) params = _params if not any(x in params for x in queryable_params): - _params = {'submitter': session.user_email, 'catalog': 1, 'history': 0, 'summary': 0} + _params = { + 'submitter': session.user_email, + 'catalog': 1, + 'history': 0, + 'summary': 0, + } _params.update(params) params = _params if args['--tab-output']: - warn_msg = ('tab-delimited output will be removed in a future release. ' - 'Please switch to the default JSON output.') + warn_msg = ( + 'tab-delimited output will be removed in a future release. ' + 'Please switch to the default JSON output.' + ) warnings.warn(warn_msg, stacklevel=2) for t in session.get_tasks(params=params): # Legacy support for tab-delimted output. @@ -149,16 +162,22 @@ def main(argv, session: ArchiveSession) -> None: if args['--tab-output']: color = t.color if t.color else 'done' task_args = '\t'.join([f'{k}={v}' for k, v in t.args.items()]) # type: ignore - output = '\t'.join([str(x) for x in [ # type: ignore - t.identifier, # type: ignore - t.task_id, # type: ignore - t.server, # type: ignore - t.submittime, # type: ignore - t.cmd, # type: ignore - color, # type: ignore - t.submitter, # type: ignore - task_args, - ] if x]) + output = '\t'.join( + [ + str(x) + for x in [ # type: ignore + t.identifier, # type: ignore + t.task_id, # type: ignore + t.server, # type: ignore + t.submittime, # type: ignore + t.cmd, # type: ignore + color, # type: ignore + t.submitter, # type: ignore + task_args, + ] + if x + ] + ) print(output, flush=True) else: print(t.json(), flush=True) diff --git a/internetarchive/cli/ia_upload.py b/internetarchive/cli/ia_upload.py index 08117d08..87bb6378 100644 --- a/internetarchive/cli/ia_upload.py +++ b/internetarchive/cli/ia_upload.py @@ -86,7 +86,9 @@ ) -def _upload_files(item, files, upload_kwargs, prev_identifier=None, archive_session=None): +def _upload_files( + item, files, upload_kwargs, prev_identifier=None, archive_session=None +): """Helper function for calling :meth:`Item.upload`""" # Check if the list has any element. if not files: @@ -110,9 +112,7 @@ def _upload_files(item, files, upload_kwargs, prev_identifier=None, archive_sess for i, r in enumerate(responses): if i != 0: print('---', file=sys.stderr) - headers = '\n'.join( - [f' {k}:{v}' for (k, v) in r.headers.items()] - ) + headers = '\n'.join([f' {k}:{v}' for (k, v) in r.headers.items()]) print(f'Endpoint:\n {r.url}\n', file=sys.stderr) print(f'HTTP Headers:\n{headers}', file=sys.stderr) @@ -124,33 +124,64 @@ def main(argv, session): # noqa: C901 ERRORS = False # Validate args. - s = Schema({ - str: Use(bool), - '': Or(None, And(str, validate_s3_identifier, - error=(' should be between 3 and 80 characters in length, and ' - 'can only contain alphanumeric characters, periods ".", ' - 'underscores "_", or dashes "-". However, cannot begin ' - 'with periods, underscores, or dashes.'))), - '': And( - And(lambda f: all(os.path.exists(x) for x in f if x != '-'), - error=' should be a readable file or directory.'), - And(lambda f: False if f == ['-'] and not args['--remote-name'] else True, - error='--remote-name must be provided when uploading from stdin.')), - '--remote-name': Or(None, str), - '--spreadsheet': Or(None, os.path.isfile, - error='--spreadsheet should be a readable file.'), - '--file-metadata': Or(None, os.path.isfile, - error='--file-metadata should be a readable file.'), - '--metadata': Or(None, And(Use(get_args_dict), dict), - error='--metadata must be formatted as --metadata="key:value"'), - '--header': Or(None, And(Use(get_args_dict), dict), - error='--header must be formatted as --header="key:value"'), - '--retries': Use(lambda x: int(x[0]) if x else 0), - '--sleep': Use(lambda lst: int(lst[0]), error='--sleep value must be an integer.'), - '--size-hint': Or(Use(lambda lst: str(lst[0]) if lst else None), int, None, - error='--size-hint value must be an integer.'), - '--status-check': bool, - }) + s = Schema( + { + str: Use(bool), + '': Or( + None, + And( + str, + validate_s3_identifier, + error=( + ' should be between 3 and 80 characters in length, and ' + 'can only contain alphanumeric characters, periods ".", ' + 'underscores "_", or dashes "-". However, cannot begin ' + 'with periods, underscores, or dashes.' + ), + ), + ), + '': And( + And( + lambda f: all(os.path.exists(x) for x in f if x != '-'), + error=' should be a readable file or directory.', + ), + And( + lambda f: False + if f == ['-'] and not args['--remote-name'] + else True, + error='--remote-name must be provided when uploading from stdin.', + ), + ), + '--remote-name': Or(None, str), + '--spreadsheet': Or( + None, os.path.isfile, error='--spreadsheet should be a readable file.' + ), + '--file-metadata': Or( + None, os.path.isfile, error='--file-metadata should be a readable file.' + ), + '--metadata': Or( + None, + And(Use(get_args_dict), dict), + error='--metadata must be formatted as --metadata="key:value"', + ), + '--header': Or( + None, + And(Use(get_args_dict), dict), + error='--header must be formatted as --header="key:value"', + ), + '--retries': Use(lambda x: int(x[0]) if x else 0), + '--sleep': Use( + lambda lst: int(lst[0]), error='--sleep value must be an integer.' + ), + '--size-hint': Or( + Use(lambda lst: str(lst[0]) if lst else None), + int, + None, + error='--size-hint value must be an integer.', + ), + '--status-check': bool, + } + ) try: args = s.validate(args) except SchemaError as exc: @@ -159,25 +190,36 @@ def main(argv, session): # noqa: C901 # Make sure the collection being uploaded to exists. collection_id = args['--metadata'].get('collection') - if collection_id and not args['--no-collection-check'] and not args['--status-check']: + if ( + collection_id + and not args['--no-collection-check'] + and not args['--status-check'] + ): if isinstance(collection_id, list): collection_id = collection_id[0] collection = session.get_item(collection_id) if not collection.exists: - print('You must upload to a collection that exists. ' - f'"{collection_id}" does not exist.\n{printable_usage(__doc__)}', - file=sys.stderr) + print( + 'You must upload to a collection that exists. ' + f'"{collection_id}" does not exist.\n{printable_usage(__doc__)}', + file=sys.stderr, + ) sys.exit(1) # Status check. if args['--status-check']: if session.s3_is_overloaded(): - print(f'warning: {args[""]} is over limit, and not accepting requests. ' - 'Expect 503 SlowDown errors.', - file=sys.stderr) + print( + f'warning: {args[""]} is over limit, and not accepting requests. ' + 'Expect 503 SlowDown errors.', + file=sys.stderr, + ) sys.exit(1) else: - print(f'success: {args[""]} is accepting requests.', file=sys.stderr) + print( + f'success: {args[""]} is accepting requests.', + file=sys.stderr, + ) sys.exit() elif args['']: @@ -187,7 +229,10 @@ def main(argv, session): # noqa: C901 if args['--size-hint']: args['--header']['x-archive-size-hint'] = args['--size-hint'] # Upload with backups turned on by default. - if not args['--header'].get('x-archive-keep-old-version') and not args['--no-backup']: + if ( + not args['--header'].get('x-archive-keep-old-version') + and not args['--no-backup'] + ): args['--header']['x-archive-keep-old-version'] = '1' queue_derive = True if args['--no-derive'] is False else False @@ -227,13 +272,16 @@ def main(argv, session): # noqa: C901 # Note that the encoding attribute might also be None. In that case, fall back to # locale.getpreferredencoding, the default of io.TextIOWrapper and open(). if hasattr(sys.stdin, 'buffer'): + def read(): return sys.stdin.buffer.read(1048576) + else: encoding = sys.stdin.encoding or getpreferredencoding(False) def read(): return sys.stdin.read(1048576).encode(encoding) + while True: data = read() if not data: @@ -263,7 +311,9 @@ def read(): ERRORS = True else: if args['--open-after-upload']: - url = f'{session.protocol}//{session.host}/details/{item.identifier}' + url = ( + f'{session.protocol}//{session.host}/details/{item.identifier}' + ) webbrowser.open_new_tab(url) # Bulk upload using spreadsheet. @@ -275,8 +325,10 @@ def read(): for row in spreadsheet: for metadata_key in row: if not is_valid_metadata_key(metadata_key): - print(f'error: "{metadata_key}" is not a valid metadata key.', - file=sys.stderr) + print( + f'error: "{metadata_key}" is not a valid metadata key.', + file=sys.stderr, + ) sys.exit(1) upload_kwargs_copy = deepcopy(upload_kwargs) if row.get('REMOTE_NAME'): @@ -289,8 +341,10 @@ def read(): identifier = row.get('item', row.get('identifier')) if not identifier: if not prev_identifier: - print('error: no identifier column on spreadsheet.', - file=sys.stderr) + print( + 'error: no identifier column on spreadsheet.', + file=sys.stderr, + ) sys.exit(1) identifier = prev_identifier del row['file'] @@ -304,8 +358,9 @@ def read(): md_args = [f'{k.lower()}:{v}' for (k, v) in row.items() if v] metadata = get_args_dict(md_args) upload_kwargs_copy['metadata'].update(metadata) - r = _upload_files(item, local_file, upload_kwargs_copy, prev_identifier, - session) + r = _upload_files( + item, local_file, upload_kwargs_copy, prev_identifier, session + ) for _r in r: if args['--debug']: break diff --git a/internetarchive/config.py b/internetarchive/config.py index c267d83e..036e2224 100644 --- a/internetarchive/config.py +++ b/internetarchive/config.py @@ -66,7 +66,7 @@ def get_auth_config(email: str, password: str, host: str = 'archive.org') -> dic }, 'general': { 'screenname': j['values']['screenname'], - } + }, } return auth_config diff --git a/internetarchive/files.py b/internetarchive/files.py index c8029469..055d9412 100644 --- a/internetarchive/files.py +++ b/internetarchive/files.py @@ -45,7 +45,6 @@ class BaseFile: - def __init__(self, item_metadata, name, file_metadata=None): if file_metadata is None: file_metadata = {} @@ -103,6 +102,7 @@ class File(BaseFile): `__ """ + def __init__(self, item, name, file_metadata=None): """ :type item: Item @@ -125,22 +125,38 @@ def __init__(self, item, name, file_metadata=None): } self.url = '{protocol}//{host}/download/{id}/{name}'.format(**url_parts) if self.item.session.access_key and self.item.session.secret_key: - self.auth = auth.S3Auth(self.item.session.access_key, - self.item.session.secret_key) + self.auth = auth.S3Auth( + self.item.session.access_key, self.item.session.secret_key + ) else: self.auth = None def __repr__(self): - return (f'File(identifier={self.identifier!r}, ' - f'filename={self.name!r}, ' - f'size={self.size!r}, ' - f'format={self.format!r})') - - def download(self, file_path=None, verbose=None, ignore_existing=None, - checksum=None, destdir=None, retries=None, ignore_errors=None, - fileobj=None, return_responses=None, no_change_timestamp=None, - params=None, chunk_size=None, stdout=None, ors=None, - timeout=None): + return ( + f'File(identifier={self.identifier!r}, ' + f'filename={self.name!r}, ' + f'size={self.size!r}, ' + f'format={self.format!r})' + ) + + def download( + self, + file_path=None, + verbose=None, + ignore_existing=None, + checksum=None, + destdir=None, + retries=None, + ignore_errors=None, + fileobj=None, + return_responses=None, + no_change_timestamp=None, + params=None, + chunk_size=None, + stdout=None, + ors=None, + timeout=None, + ): """Download the file into the current working directory. :type file_path: str @@ -230,15 +246,21 @@ def download(self, file_path=None, verbose=None, ignore_existing=None, md5_sum = utils.get_md5(fp) if md5_sum == self.md5: - msg = f'skipping {file_path}, file already exists based on checksum.' + msg = ( + f'skipping {file_path}, file already exists based on checksum.' + ) log.info(msg) if verbose: print(f' {msg}', file=sys.stderr) return elif not fileobj: st = os.stat(file_path.encode('utf-8')) - if (st.st_mtime == self.mtime) and (st.st_size == self.size) \ - or self.name.endswith('_files.xml') and st.st_size != 0: + if ( + (st.st_mtime == self.mtime) + and (st.st_size == self.size) + or self.name.endswith('_files.xml') + and st.st_size != 0 + ): msg = f'skipping {file_path}, file already exists based on length and date.' log.info(msg) if verbose: @@ -250,22 +272,22 @@ def download(self, file_path=None, verbose=None, ignore_existing=None, if parent_dir != '' and return_responses is not True: os.makedirs(parent_dir, exist_ok=True) - response = self.item.session.get(self.url, - stream=True, - timeout=timeout, - auth=self.auth, - params=params) + response = self.item.session.get( + self.url, stream=True, timeout=timeout, auth=self.auth, params=params + ) response.raise_for_status() if return_responses: return response if verbose: total = int(response.headers.get('content-length', 0)) or None - progress_bar = tqdm(desc=f' downloading {self.name}', - total=total, - unit='iB', - unit_scale=True, - unit_divisor=1024) + progress_bar = tqdm( + desc=f' downloading {self.name}', + total=total, + unit='iB', + unit_scale=True, + unit_divisor=1024, + ) else: progress_bar = nullcontext() @@ -308,8 +330,16 @@ def download(self, file_path=None, verbose=None, ignore_existing=None, log.info(msg) return True - def delete(self, cascade_delete=None, access_key=None, secret_key=None, verbose=None, - debug=None, retries=None, headers=None): + def delete( + self, + cascade_delete=None, + access_key=None, + secret_key=None, + verbose=None, + debug=None, + retries=None, + headers=None, + ): """Delete a file from the Archive. Note: Some files -- such as _meta.xml -- cannot be deleted. @@ -345,15 +375,15 @@ def delete(self, cascade_delete=None, access_key=None, secret_key=None, verbose= headers['x-archive-cascade-delete'] = cascade_delete url = f'{self.item.session.protocol}//s3.us.archive.org/{self.identifier}/{quote(self.name)}' - self.item.session.mount_http_adapter(max_retries=max_retries, - status_forcelist=[503], - host='s3.us.archive.org') + self.item.session.mount_http_adapter( + max_retries=max_retries, status_forcelist=[503], host='s3.us.archive.org' + ) request = iarequest.S3Request( method='DELETE', url=url, headers=headers, access_key=access_key, - secret_key=secret_key + secret_key=secret_key, ) if debug: return request @@ -368,8 +398,7 @@ def delete(self, cascade_delete=None, access_key=None, secret_key=None, verbose= try: resp = self.item.session.send(prepared_request) resp.raise_for_status() - except (RetryError, HTTPError, ConnectTimeout, - OSError, ReadTimeout) as exc: + except (RetryError, HTTPError, ConnectTimeout, OSError, ReadTimeout) as exc: error_msg = f'Error deleting {url}, {exc}' log.error(error_msg) raise diff --git a/internetarchive/iarequest.py b/internetarchive/iarequest.py index b3fa88f3..19b90b4f 100644 --- a/internetarchive/iarequest.py +++ b/internetarchive/iarequest.py @@ -40,14 +40,15 @@ class S3Request(requests.models.Request): - def __init__(self, - metadata=None, - file_metadata=None, - queue_derive=True, - access_key=None, - secret_key=None, - **kwargs): - + def __init__( + self, + metadata=None, + file_metadata=None, + queue_derive=True, + access_key=None, + secret_key=None, + **kwargs, + ): super().__init__(**kwargs) if not self.auth: @@ -72,7 +73,6 @@ def prepare(self): auth=self.auth, cookies=self.cookies, hooks=self.hooks, - # S3Request kwargs. metadata=self.metadata, file_metadata=self.file_metadata, @@ -82,14 +82,26 @@ def prepare(self): class S3PreparedRequest(requests.models.PreparedRequest): - def prepare(self, method=None, url=None, headers=None, files=None, data=None, - params=None, auth=None, cookies=None, hooks=None, queue_derive=None, - metadata=None, file_metadata=None): + def prepare( + self, + method=None, + url=None, + headers=None, + files=None, + data=None, + params=None, + auth=None, + cookies=None, + hooks=None, + queue_derive=None, + metadata=None, + file_metadata=None, + ): self.prepare_method(method) self.prepare_url(url, params) - self.prepare_headers(headers, metadata, - file_metadata=file_metadata, - queue_derive=queue_derive) + self.prepare_headers( + headers, metadata, file_metadata=file_metadata, queue_derive=queue_derive + ) self.prepare_cookies(cookies) self.prepare_body(data, files) self.prepare_auth(auth, url) @@ -136,7 +148,7 @@ def _prepare_metadata_headers(prepared_metadata, meta_type='meta'): meta_value = json.dumps(meta_value) # Convert the metadata value into a list if it is not already # iterable. - if (isinstance(meta_value, str) or not hasattr(meta_value, '__iter__')): + if isinstance(meta_value, str) or not hasattr(meta_value, '__iter__'): meta_value = [meta_value] # Convert metadata items into HTTP headers and add to # ``headers`` dict. @@ -144,7 +156,7 @@ def _prepare_metadata_headers(prepared_metadata, meta_type='meta'): if not value: continue header_key = f'x-archive-{meta_type}{i:02d}-{meta_key}' - if (isinstance(value, str) and needs_quote(value)): + if isinstance(value, str) and needs_quote(value): value = f'uri({quote(value)})' # because rfc822 http headers disallow _ in names, IA-S3 will # translate two hyphens in a row (--) into an underscore (_). @@ -160,18 +172,19 @@ def _prepare_metadata_headers(prepared_metadata, meta_type='meta'): class MetadataRequest(requests.models.Request): - def __init__(self, - metadata=None, - source_metadata=None, - target=None, - priority=None, - access_key=None, - secret_key=None, - append=None, - append_list=None, - insert=None, - **kwargs): - + def __init__( + self, + metadata=None, + source_metadata=None, + target=None, + priority=None, + access_key=None, + secret_key=None, + append=None, + append_list=None, + insert=None, + **kwargs, + ): super().__init__(**kwargs) if not self.auth: @@ -198,7 +211,6 @@ def prepare(self): auth=self.auth, cookies=self.cookies, hooks=self.hooks, - # MetadataRequest kwargs. metadata=self.metadata, priority=self.priority, @@ -212,16 +224,32 @@ def prepare(self): class MetadataPreparedRequest(requests.models.PreparedRequest): - def prepare(self, method=None, url=None, headers=None, files=None, data=None, - params=None, auth=None, cookies=None, hooks=None, metadata={}, # noqa: B006 - source_metadata=None, target=None, priority=None, append=None, - append_list=None, insert=None): + def prepare( + self, + method=None, + url=None, + headers=None, + files=None, + data=None, + params=None, + auth=None, + cookies=None, + hooks=None, + metadata={}, # noqa: B006 + source_metadata=None, + target=None, + priority=None, + append=None, + append_list=None, + insert=None, + ): self.prepare_method(method) self.prepare_url(url, params) self.prepare_headers(headers) self.prepare_cookies(cookies) - self.prepare_body(metadata, source_metadata, target, priority, append, - append_list, insert) + self.prepare_body( + metadata, source_metadata, target, priority, append, append_list, insert + ) self.prepare_auth(auth, url) # Note that prepare_auth must be last to enable authentication schemes # such as OAuth to work on a fully prepared request. @@ -229,8 +257,9 @@ def prepare(self, method=None, url=None, headers=None, files=None, data=None, # This MUST go after prepare_auth. Authenticators could add a hook self.prepare_hooks(hooks) - def prepare_body(self, metadata, source_metadata, target, priority, append, - append_list, insert): + def prepare_body( + self, metadata, source_metadata, target, priority, append, append_list, insert + ): priority = priority or -5 if not source_metadata: @@ -238,38 +267,52 @@ def prepare_body(self, metadata, source_metadata, target, priority, append, source_metadata = r.json() # Write to many targets - if (isinstance(metadata, list) - or any('/' in k for k in metadata) - or all(isinstance(k, dict) for k in metadata.values())): + if ( + isinstance(metadata, list) + or any('/' in k for k in metadata) + or all(isinstance(k, dict) for k in metadata.values()) + ): changes = [] if any(not k for k in metadata): - raise ValueError('Invalid metadata provided, ' - 'check your input and try again') + raise ValueError( + 'Invalid metadata provided, check your input and try again' + ) if target: metadata = {target: metadata} for key in metadata: if key == 'metadata': try: - patch = prepare_patch(metadata[key], - source_metadata['metadata'], - append, - append_list, - insert) + patch = prepare_patch( + metadata[key], + source_metadata['metadata'], + append, + append_list, + insert, + ) except KeyError: raise ItemLocateError elif key.startswith('files'): - patch = prepare_files_patch(metadata[key], - source_metadata['files'], - append, - key, - append_list, - insert) + patch = prepare_files_patch( + metadata[key], + source_metadata['files'], + append, + key, + append_list, + insert, + ) else: key = key.split('/')[0] - patch = prepare_target_patch(metadata, source_metadata, append, - target, append_list, key, insert) + patch = prepare_target_patch( + metadata, + source_metadata, + append, + target, + append_list, + key, + insert, + ) changes.append({'target': key, 'patch': patch}) self.data = { '-changes': json.dumps(changes), @@ -281,17 +324,35 @@ def prepare_body(self, metadata, source_metadata, target, priority, append, if not target or 'metadata' in target: target = 'metadata' try: - patch = prepare_patch(metadata, source_metadata['metadata'], append, - append_list, insert) + patch = prepare_patch( + metadata, + source_metadata['metadata'], + append, + append_list, + insert, + ) except KeyError: raise ItemLocateError elif 'files' in target: - patch = prepare_files_patch(metadata, source_metadata['files'], append, - target, append_list, insert) + patch = prepare_files_patch( + metadata, + source_metadata['files'], + append, + target, + append_list, + insert, + ) else: metadata = {target: metadata} - patch = prepare_target_patch(metadata, source_metadata, append, - target, append_list, target, insert) + patch = prepare_target_patch( + metadata, + source_metadata, + append, + target, + append_list, + target, + insert, + ) self.data = { '-patch': json.dumps(patch), '-target': target, @@ -308,8 +369,9 @@ def prepare_patch(metadata, source_metadata, append, append_list=None, insert=No if not destination_metadata: destination_metadata = [] else: - prepared_metadata = prepare_metadata(metadata, source_metadata, append, - append_list, insert) + prepared_metadata = prepare_metadata( + metadata, source_metadata, append, append_list, insert + ) if isinstance(destination_metadata, dict): destination_metadata.update(prepared_metadata) elif isinstance(metadata, list) and not destination_metadata: @@ -328,9 +390,9 @@ def prepare_patch(metadata, source_metadata, append, append_list=None, insert=No return patch -def prepare_target_patch(metadata, source_metadata, append, target, append_list, key, - insert): - +def prepare_target_patch( + metadata, source_metadata, append, target, append_list, key, insert +): def dictify(lst, key=None, value=None): if not lst: return value @@ -350,8 +412,7 @@ def dictify(lst, key=None, value=None): return patch -def prepare_files_patch(metadata, source_metadata, append, target, append_list, - insert): +def prepare_files_patch(metadata, source_metadata, append, target, append_list, insert): filename = '/'.join(target.split('/')[1:]) for f in source_metadata: if f.get('name') == filename: @@ -361,8 +422,9 @@ def prepare_files_patch(metadata, source_metadata, append, target, append_list, return patch -def prepare_metadata(metadata, source_metadata=None, append=False, append_list=False, - insert=False): +def prepare_metadata( + metadata, source_metadata=None, append=False, append_list=False, insert=False +): """Prepare a metadata dict for an :class:`S3PreparedRequest ` or :class:`MetadataPreparedRequest ` object. diff --git a/internetarchive/item.py b/internetarchive/item.py index c1aca631..c69a78bd 100644 --- a/internetarchive/item.py +++ b/internetarchive/item.py @@ -118,21 +118,29 @@ def load(self, item_metadata: Mapping | None = None) -> None: self.collection = IdentifierListAsItems(mc, self.session) # type: ignore def __eq__(self, other) -> bool: - return (self.item_metadata == other.item_metadata - or (self.item_metadata.keys() == other.item_metadata.keys() - and all(self.item_metadata[x] == other.item_metadata[x] - for x in self.item_metadata - if x not in self.EXCLUDED_ITEM_METADATA_KEYS))) + return self.item_metadata == other.item_metadata or ( + self.item_metadata.keys() == other.item_metadata.keys() + and all( + self.item_metadata[x] == other.item_metadata[x] + for x in self.item_metadata + if x not in self.EXCLUDED_ITEM_METADATA_KEYS + ) + ) def __le__(self, other) -> bool: return self.identifier <= other.identifier def __hash__(self) -> int: without_excluded_keys = { - k: v for k, v in self.item_metadata.items() - if k not in self.EXCLUDED_ITEM_METADATA_KEYS} - return hash(json.dumps(without_excluded_keys, - sort_keys=True, check_circular=False)) # type: ignore + k: v + for k, v in self.item_metadata.items() + if k not in self.EXCLUDED_ITEM_METADATA_KEYS + } + return hash( + json.dumps( + without_excluded_keys, sort_keys=True, check_circular=False + ) # type: ignore[call-arg] + ) class Item(BaseItem): @@ -197,7 +205,9 @@ def __init__( if self.metadata.get('title'): # A copyable link to the item, in MediaWiki format details = self.urls.details # type: ignore - self.wikilink = f'* [{details} {self.identifier}] -- {self.metadata["title"]}' + self.wikilink = ( + f'* [{details} {self.identifier}] -- {self.metadata["title"]}' + ) class URLs: def __init__(self, itm_obj): @@ -218,8 +228,9 @@ def _make_tab_URL(self, tab: str) -> None: """Make URLs for the separate tabs of Collections details page.""" self._make_URL(tab, self.details + f'&tab={tab}') # type: ignore - DEFAULT_URL_FORMAT = ('{0.session.protocol}//{0.session.host}' - '/{path}/{0.identifier}') + DEFAULT_URL_FORMAT = ( + '{0.session.protocol}//{0.session.host}/{path}/{0.identifier}' + ) def _make_URL(self, path: str, url_format: str = DEFAULT_URL_FORMAT) -> None: setattr(self, path, url_format.format(self._itm_obj, path=path)) @@ -270,7 +281,9 @@ def no_tasks_pending( :returns: `True` if no tasks are pending, otherwise `False`. """ - return all(x == 0 for x in self.get_task_summary(params, request_kwargs).values()) + return all( + x == 0 for x in self.get_task_summary(params, request_kwargs).values() + ) def get_all_item_tasks( self, @@ -319,13 +332,15 @@ def get_catalog( """ return list(self.session.iter_catalog(self.identifier, params, request_kwargs)) - def derive(self, - priority: int = 0, - remove_derived: str | None = None, - reduced_priority: bool = False, - data: MutableMapping | None = None, - headers: Mapping | None = None, - request_kwargs: Mapping | None = None) -> Response: + def derive( + self, + priority: int = 0, + remove_derived: str | None = None, + reduced_priority: bool = False, + data: MutableMapping | None = None, + headers: Mapping | None = None, + request_kwargs: Mapping | None = None, + ) -> Response: """Derive an item. :param priority: Task priority from 10 to -10 [default: 0] @@ -356,23 +371,27 @@ def derive(self, else: data['args'].update({'remove_derived': remove_derived}) - r = self.session.submit_task(self.identifier, - 'derive.php', - priority=priority, - data=data, - headers=headers, - reduced_priority=reduced_priority, - request_kwargs=request_kwargs) + r = self.session.submit_task( + self.identifier, + 'derive.php', + priority=priority, + data=data, + headers=headers, + reduced_priority=reduced_priority, + request_kwargs=request_kwargs, + ) r.raise_for_status() return r - def fixer(self, - ops: list | str | None = None, - priority: int | str | None = None, - reduced_priority: bool = False, - data: MutableMapping | None = None, - headers: Mapping | None = None, - request_kwargs: Mapping | None = None) -> Response: + def fixer( + self, + ops: list | str | None = None, + priority: int | str | None = None, + reduced_priority: bool = False, + data: MutableMapping | None = None, + headers: Mapping | None = None, + request_kwargs: Mapping | None = None, + ) -> Response: """Submit a fixer task on an item. :param ops: The fixer operation(s) to run on the item @@ -402,22 +421,26 @@ def fixer(self, for op in ops: data['args'][op] = '1' - r = self.session.submit_task(self.identifier, - 'fixer.php', - priority=priority, - data=data, - headers=headers, - reduced_priority=reduced_priority, - request_kwargs=request_kwargs) + r = self.session.submit_task( + self.identifier, + 'fixer.php', + priority=priority, + data=data, + headers=headers, + reduced_priority=reduced_priority, + request_kwargs=request_kwargs, + ) r.raise_for_status() return r - def undark(self, - comment: str, - priority: int | str | None = None, - reduced_priority: bool = False, - data: Mapping | None = None, - request_kwargs: Mapping | None = None) -> Response: + def undark( + self, + comment: str, + priority: int | str | None = None, + reduced_priority: bool = False, + data: Mapping | None = None, + request_kwargs: Mapping | None = None, + ) -> Response: """Undark the item. :param comment: The curation comment explaining reason for @@ -438,23 +461,27 @@ def undark(self, :returns: :class:`requests.Response` """ - r = self.session.submit_task(self.identifier, - 'make_undark.php', - comment=comment, - priority=priority, - data=data, - reduced_priority=reduced_priority, - request_kwargs=request_kwargs) + r = self.session.submit_task( + self.identifier, + 'make_undark.php', + comment=comment, + priority=priority, + data=data, + reduced_priority=reduced_priority, + request_kwargs=request_kwargs, + ) r.raise_for_status() return r # TODO: dark and undark have different order for data and reduced_pripoity - def dark(self, - comment: str, - priority: int | str | None = None, - data: Mapping | None = None, - reduced_priority: bool = False, - request_kwargs: Mapping | None = None) -> Response: + def dark( + self, + comment: str, + priority: int | str | None = None, + data: Mapping | None = None, + reduced_priority: bool = False, + request_kwargs: Mapping | None = None, + ) -> Response: """Dark the item. :param comment: The curation comment explaining reason for @@ -475,13 +502,15 @@ def dark(self, :returns: :class:`requests.Response` """ - r = self.session.submit_task(self.identifier, - 'make_dark.php', - comment=comment, - priority=priority, - data=data, - reduced_priority=reduced_priority, - request_kwargs=request_kwargs) + r = self.session.submit_task( + self.identifier, + 'make_dark.php', + comment=comment, + priority=priority, + data=data, + reduced_priority=reduced_priority, + request_kwargs=request_kwargs, + ) r.raise_for_status() return r @@ -529,12 +558,14 @@ def get_file(self, file_name: str, file_metadata: Mapping | None = None) -> File """ return File(self, file_name, file_metadata) - def get_files(self, - files: File | list[File] | None = None, - formats: str | list[str] | None = None, - glob_pattern: str | None = None, - exclude_pattern: str | None = None, - on_the_fly: bool = False): + def get_files( + self, + files: File | list[File] | None = None, + formats: str | list[str] | None = None, + glob_pattern: str | None = None, + exclude_pattern: str | None = None, + on_the_fly: bool = False, + ): files = files or [] formats = formats or [] exclude_pattern = exclude_pattern or '' @@ -577,33 +608,36 @@ def get_files(self, exclude_patterns = exclude_pattern for p in patterns: if fnmatch(f.get('name', ''), p): - if not any(fnmatch(f.get('name', ''), e) for e in exclude_patterns): + if not any( + fnmatch(f.get('name', ''), e) for e in exclude_patterns + ): yield self.get_file(str(f.get('name'))) - def download(self, - files: File | list[File] | None = None, - formats: str | list[str] | None = None, - glob_pattern: str | None = None, - exclude_pattern: str | None = None, - dry_run: bool = False, - verbose: bool = False, - ignore_existing: bool = False, - checksum: bool = False, - destdir: str | None = None, - no_directory: bool = False, - retries: int | None = None, - item_index: int | None = None, - ignore_errors: bool = False, - on_the_fly: bool = False, - return_responses: bool = False, - no_change_timestamp: bool = False, - ignore_history_dir: bool = False, - source: str | list[str] | None = None, - exclude_source: str | list[str] | None = None, - stdout: bool = False, - params: Mapping | None = None, - timeout: int | float | tuple[int, float] | None = None - ) -> list[Request | Response]: + def download( + self, + files: File | list[File] | None = None, + formats: str | list[str] | None = None, + glob_pattern: str | None = None, + exclude_pattern: str | None = None, + dry_run: bool = False, + verbose: bool = False, + ignore_existing: bool = False, + checksum: bool = False, + destdir: str | None = None, + no_directory: bool = False, + retries: int | None = None, + item_index: int | None = None, + ignore_errors: bool = False, + on_the_fly: bool = False, + return_responses: bool = False, + no_change_timestamp: bool = False, + ignore_history_dir: bool = False, + source: str | list[str] | None = None, + exclude_source: str | list[str] | None = None, + stdout: bool = False, + params: Mapping | None = None, + timeout: int | float | tuple[int, float] | None = None, + ) -> list[Request | Response]: """Download files from an item. :param files: Only download files matching given file names. @@ -716,7 +750,7 @@ def download(self, files = self.get_files( glob_pattern=glob_pattern, exclude_pattern=exclude_pattern, - on_the_fly=on_the_fly + on_the_fly=on_the_fly, ) if stdout: files = list(files) # type: ignore @@ -746,9 +780,23 @@ def download(self, ors = True else: ors = False - r = f.download(path, verbose, ignore_existing, checksum, destdir, - retries, ignore_errors, fileobj, return_responses, - no_change_timestamp, params, None, stdout, ors, timeout) + r = f.download( + path, + verbose, + ignore_existing, + checksum, + destdir, + retries, + ignore_errors, + fileobj, + return_responses, + no_change_timestamp, + params, + None, + stdout, + ors, + timeout, + ) if return_responses: responses.append(r) @@ -766,18 +814,20 @@ def download(self, return responses if return_responses else errors - def modify_metadata(self, - metadata: Mapping, - target: str | None = None, - append: bool = False, - append_list: bool = False, - insert: bool = False, - priority: int = 0, - access_key: str | None = None, - secret_key: str | None = None, - debug: bool = False, - headers: Mapping | None = None, - request_kwargs: Mapping | None = None) -> Request | Response: + def modify_metadata( + self, + metadata: Mapping, + target: str | None = None, + append: bool = False, + append_list: bool = False, + insert: bool = False, + priority: int = 0, + access_key: str | None = None, + secret_key: str | None = None, + debug: bool = False, + headers: Mapping | None = None, + request_kwargs: Mapping | None = None, + ) -> Request | Response: """Modify the metadata of an existing item on Archive.org. Note: The Metadata Write API does not yet comply with the @@ -831,7 +881,8 @@ def modify_metadata(self, secret_key=secret_key, append=append, append_list=append_list, - insert=insert) + insert=insert, + ) # Must use Session.prepare_request to make sure session settings # are used on request! prepared_request = request.prepare() @@ -860,23 +911,26 @@ def remove_from_simplelist(self, parent, list) -> Response: r = self.session.post(self.urls.metadata, data=data) # type: ignore return r - def upload_file(self, body, - key: str | None = None, - metadata: Mapping | None = None, - file_metadata: Mapping | None = None, - headers: dict | None = None, - access_key: str | None = None, - secret_key: str | None = None, - queue_derive: bool = False, - verbose: bool = False, - verify: bool = False, - checksum: bool = False, - delete: bool = False, - retries: int | None = None, - retries_sleep: int | None = None, - debug: bool = False, - validate_identifier: bool = False, - request_kwargs: MutableMapping | None = None) -> Request | Response: + def upload_file( + self, + body, + key: str | None = None, + metadata: Mapping | None = None, + file_metadata: Mapping | None = None, + headers: dict | None = None, + access_key: str | None = None, + secret_key: str | None = None, + queue_derive: bool = False, + verbose: bool = False, + verify: bool = False, + checksum: bool = False, + delete: bool = False, + retries: int | None = None, + retries_sleep: int | None = None, + debug: bool = False, + validate_identifier: bool = False, + request_kwargs: MutableMapping | None = None, + ) -> Request | Response: """Upload a single file to an item. The item will be created if it does not exist. @@ -983,7 +1037,8 @@ def upload_file(self, body, log.info( f'{key} successfully uploaded to ' f'https://archive.org/download/{self.identifier}/{key} ' - 'and verified, deleting local copy') + 'and verified, deleting local copy' + ) body.close() os.remove(filename) # Return an empty response object if checksums match. @@ -1009,11 +1064,13 @@ def _build_request(): chunk_size = 1048576 expected_size = math.ceil(size / chunk_size) chunks = chunk_generator(body, chunk_size) - progress_generator = tqdm(chunks, - desc=f' uploading {key}', - dynamic_ncols=True, - total=expected_size, - unit='MiB') + progress_generator = tqdm( + chunks, + desc=f' uploading {key}', + dynamic_ncols=True, + total=expected_size, + unit='MiB', + ) data = None # pre_encode is needed because http doesn't know that it # needs to encode a TextIO object when it's wrapped @@ -1022,7 +1079,7 @@ def _build_request(): data = IterableToFileAdapter( progress_generator, size, - pre_encode=isinstance(body, io.TextIOBase) + pre_encode=isinstance(body, io.TextIOBase), ) except Exception: print(f' uploading {key}', file=sys.stderr) @@ -1031,15 +1088,17 @@ def _build_request(): data = body _headers.update(self.session.headers) - request = S3Request(method='PUT', - url=url, - headers=_headers, - data=data, - metadata=metadata, - file_metadata=file_metadata, - access_key=access_key, - secret_key=secret_key, - queue_derive=queue_derive) + request = S3Request( + method='PUT', + url=url, + headers=_headers, + data=data, + metadata=metadata, + file_metadata=file_metadata, + access_key=access_key, + secret_key=secret_key, + queue_derive=queue_derive, + ) return request if debug: @@ -1049,9 +1108,11 @@ def _build_request(): else: try: while True: - error_msg = ('s3 is overloaded, sleeping for ' - f'{retries_sleep} seconds and retrying. ' - f'{retries} retries left.') + error_msg = ( + 's3 is overloaded, sleeping for ' + f'{retries_sleep} seconds and retrying. ' + f'{retries} retries left.' + ) if retries > 0: if self.session.s3_is_overloaded(access_key=access_key): sleep(retries_sleep) @@ -1069,9 +1130,9 @@ def _build_request(): if prepared_request.headers.get('transfer-encoding') == 'chunked': del prepared_request.headers['transfer-encoding'] - response = self.session.send(prepared_request, - stream=True, - **request_kwargs) + response = self.session.send( + prepared_request, stream=True, **request_kwargs + ) if (response.status_code == 503) and (retries > 0): if b'appears to be spam' in response.content: log.info('detected as spam, upload failed') @@ -1092,7 +1153,8 @@ def _build_request(): log.info( f'{key} successfully uploaded to ' f'https://archive.org/download/{self.identifier}/{key} and verified, ' - 'deleting local copy') + 'deleting local copy' + ) body.close() os.remove(filename) response.close() @@ -1100,11 +1162,15 @@ def _build_request(): except HTTPError as exc: try: msg = get_s3_xml_text(exc.response.content) - except ExpatError: # probably HTTP 500 error and response is invalid XML - msg = ('IA S3 returned invalid XML ' - f'(HTTP status code {exc.response.status_code}). ' - 'This is a server side error which is either temporary, ' - 'or requires the intervention of IA admins.') + except ( + ExpatError + ): # probably HTTP 500 error and response is invalid XML + msg = ( + 'IA S3 returned invalid XML ' + f'(HTTP status code {exc.response.status_code}). ' + 'This is a server side error which is either temporary, ' + 'or requires the intervention of IA admins.' + ) error_msg = f' error uploading {key} to {self.identifier}, {msg}' log.error(error_msg) @@ -1115,21 +1181,24 @@ def _build_request(): finally: body.close() - def upload(self, files, - metadata: Mapping | None = None, - headers: dict | None = None, - access_key: str | None = None, - secret_key: str | None = None, - queue_derive=None, # TODO: True if None?? - verbose: bool = False, - verify: bool = False, - checksum: bool = False, - delete: bool = False, - retries: int | None = None, - retries_sleep: int | None = None, - debug: bool = False, - validate_identifier: bool = False, - request_kwargs: dict | None = None) -> list[Request | Response]: + def upload( + self, + files, + metadata: Mapping | None = None, + headers: dict | None = None, + access_key: str | None = None, + secret_key: str | None = None, + queue_derive=None, # TODO: True if None?? + verbose: bool = False, + verify: bool = False, + checksum: bool = False, + delete: bool = False, + retries: int | None = None, + retries_sleep: int | None = None, + debug: bool = False, + validate_identifier: bool = False, + request_kwargs: dict | None = None, + ) -> list[Request | Response]: r"""Upload files to an item. The item will be created if it does not exist. @@ -1198,8 +1267,9 @@ def upload(self, files, file_metadata = f.copy() del file_metadata['name'] f = f['name'] - if ((isinstance(f, str) and is_dir(f)) - or (isinstance(f, tuple) and is_dir(f[-1]))): + if (isinstance(f, str) and is_dir(f)) or ( + isinstance(f, tuple) and is_dir(f[-1]) + ): if isinstance(f, tuple): remote_dir_name = f[0].strip('/') f = f[-1] @@ -1219,23 +1289,25 @@ def upload(self, files, elif remote_dir_name: key = f'{remote_dir_name}/{key}' key = norm_filepath(key) - resp = self.upload_file(filepath, - key=key, - metadata=metadata, - file_metadata=file_metadata, - headers=headers, - access_key=access_key, - secret_key=secret_key, - queue_derive=_queue_derive, - verbose=verbose, - verify=verify, - checksum=checksum, - delete=delete, - retries=retries, - retries_sleep=retries_sleep, - debug=debug, - validate_identifier=validate_identifier, - request_kwargs=request_kwargs) + resp = self.upload_file( + filepath, + key=key, + metadata=metadata, + file_metadata=file_metadata, + headers=headers, + access_key=access_key, + secret_key=secret_key, + queue_derive=_queue_derive, + verbose=verbose, + verify=verify, + checksum=checksum, + delete=delete, + retries=retries, + retries_sleep=retries_sleep, + debug=debug, + validate_identifier=validate_identifier, + request_kwargs=request_kwargs, + ) responses.append(resp) else: file_index += 1 @@ -1253,23 +1325,25 @@ def upload(self, files, key, body = f if key and not isinstance(key, str): key = str(key) - resp = self.upload_file(body, - key=key, - metadata=metadata, - file_metadata=file_metadata, - headers=headers, - access_key=access_key, - secret_key=secret_key, - queue_derive=_queue_derive, - verbose=verbose, - verify=verify, - checksum=checksum, - delete=delete, - retries=retries, - retries_sleep=retries_sleep, - debug=debug, - validate_identifier=validate_identifier, - request_kwargs=request_kwargs) + resp = self.upload_file( + body, + key=key, + metadata=metadata, + file_metadata=file_metadata, + headers=headers, + access_key=access_key, + secret_key=secret_key, + queue_derive=_queue_derive, + verbose=verbose, + verify=verify, + checksum=checksum, + delete=delete, + retries=retries, + retries_sleep=retries_sleep, + debug=debug, + validate_identifier=validate_identifier, + request_kwargs=request_kwargs, + ) responses.append(resp) return responses @@ -1287,14 +1361,13 @@ def __init__(self, *args, **kwargs): raise ValueError('mediatype is not "collection"!') deflt_srh = f'collection:{self.identifier}' - self._make_search('contents', - self.metadata.get('search_collection', deflt_srh)) - self._make_search('subcollections', - f'{deflt_srh} AND mediatype:collection') + self._make_search('contents', self.metadata.get('search_collection', deflt_srh)) + self._make_search('subcollections', f'{deflt_srh} AND mediatype:collection') def _do_search(self, name: str, query: str): rtn = self.searches.setdefault( - name, self.session.search_items(query, fields=['identifier'])) + name, self.session.search_items(query, fields=['identifier']) + ) if not hasattr(self, f'{name}_count'): setattr(self, f'{name}_count', self.searches[name].num_found) return rtn.iter_as_items() diff --git a/internetarchive/search.py b/internetarchive/search.py index 34791467..447d37ff 100644 --- a/internetarchive/search.py +++ b/internetarchive/search.py @@ -51,14 +51,18 @@ class Search: ... print(result['identifier']) """ - def __init__(self, archive_session, query, - fields=None, - sorts=None, - params=None, - full_text_search=None, - dsl_fts=None, - request_kwargs=None, - max_retries=None): + def __init__( + self, + archive_session, + query, + fields=None, + sorts=None, + params=None, + full_text_search=None, + dsl_fts=None, + request_kwargs=None, + max_retries=None, + ): params = params or {} self.session = archive_session @@ -75,8 +79,12 @@ def __init__(self, archive_session, query, self.request_kwargs = request_kwargs or {} self._num_found = None self.fts_url = f'{self.session.protocol}//be-api.us.archive.org/ia-pub-fts-api' - self.scrape_url = f'{self.session.protocol}//{self.session.host}/services/search/v1/scrape' - self.search_url = f'{self.session.protocol}//{self.session.host}/advancedsearch.php' + self.scrape_url = ( + f'{self.session.protocol}//{self.session.host}/services/search/v1/scrape' + ) + self.search_url = ( + f'{self.session.protocol}//{self.session.host}/advancedsearch.php' + ) if self.session.access_key and self.session.secret_key: self.auth = S3Auth(self.session.access_key, self.session.secret_key) else: @@ -125,10 +133,9 @@ def _advanced_search(self): self.params['output'] = 'json' - r = self.session.get(self.search_url, - params=self.params, - auth=self.auth, - **self.request_kwargs) + r = self.session.get( + self.search_url, params=self.params, auth=self.auth, **self.request_kwargs + ) j = r.json() num_found = int(j['response']['numFound']) if not self._num_found: @@ -145,10 +152,12 @@ def _scrape(self): i = 0 num_found = None while True: - r = self.session.post(self.scrape_url, - params=self.params, - auth=self.auth, - **self.request_kwargs) + r = self.session.post( + self.scrape_url, + params=self.params, + auth=self.auth, + **self.request_kwargs, + ) j = r.json() if j.get('error'): yield j @@ -164,8 +173,10 @@ def _scrape(self): yield item if 'cursor' not in j: if i != num_found: - raise ReadTimeout('The server failed to return results in the' - f' allotted amount of time for {r.request.url}') + raise ReadTimeout( + 'The server failed to return results in the' + f' allotted amount of time for {r.request.url}' + ) break def _full_text_search(self): @@ -184,10 +195,9 @@ def _full_text_search(self): d['size'] = self.params['size'] while True: - r = self.session.post(self.fts_url, - json=d, - auth=self.auth, - **self.request_kwargs) + r = self.session.post( + self.fts_url, json=d, auth=self.auth, **self.request_kwargs + ) j = r.json() scroll_id = j.get('_scroll_id') hits = j.get('hits', {}).get('hits') @@ -209,8 +219,7 @@ def _make_results_generator(self): return self._scrape() def _user_aggs(self): - """Experimental support for user aggregations. - """ + """Experimental support for user aggregations.""" self.params['page'] = '1' self.params['rows'] = '1' self.params['output'] = 'json' @@ -227,19 +236,20 @@ def num_found(self): if not self.fts: p = self.params.copy() p['total_only'] = 'true' - r = self.session.post(self.scrape_url, - params=p, - auth=self.auth, - **self.request_kwargs) + r = self.session.post( + self.scrape_url, params=p, auth=self.auth, **self.request_kwargs + ) j = r.json() self._handle_scrape_error(j) self._num_found = j.get('total') else: self.params['q'] = self.query - r = self.session.get(self.fts_url, - params=self.params, - auth=self.auth, - **self.request_kwargs) + r = self.session.get( + self.fts_url, + params=self.params, + auth=self.auth, + **self.request_kwargs, + ) j = r.json() self._num_found = j.get('hits', {}).get('total') return self._num_found @@ -249,7 +259,9 @@ def _handle_scrape_error(self, j): if all(s in j['error'].lower() for s in ['invalid', 'secret']): if not j['error'].endswith('.'): j['error'] += '.' - raise ValueError(f"{j['error']} Try running 'ia configure' and retrying.") + raise ValueError( + f"{j['error']} Try running 'ia configure' and retrying." + ) raise ValueError(j.get('error')) def _get_item_from_search_result(self, search_result): diff --git a/internetarchive/session.py b/internetarchive/session.py index 11634aec..5953c474 100644 --- a/internetarchive/session.py +++ b/internetarchive/session.py @@ -72,11 +72,13 @@ class ArchiveSession(requests.sessions.Session): 'collection': Collection, } - def __init__(self, - config: Mapping | None = None, - config_file: str = "", - debug: bool = False, - http_adapter_kwargs: MutableMapping | None = None): + def __init__( + self, + config: Mapping | None = None, + config_file: str = "", + debug: bool = False, + http_adapter_kwargs: MutableMapping | None = None, + ): """Initialize :class:`ArchiveSession ` object with config. :param config: A config dict used for initializing the @@ -102,9 +104,12 @@ def __init__(self, cookie_dict = parse_dict_cookies(raw_cookie) if not cookie_dict.get(ck): continue - cookie = create_cookie(ck, cookie_dict[ck], - domain=cookie_dict.get('domain', '.archive.org'), - path=cookie_dict.get('path', '/')) + cookie = create_cookie( + ck, + cookie_dict[ck], + domain=cookie_dict.get('domain', '.archive.org'), + path=cookie_dict.get('path', '/'), + ) self.cookies.set_cookie(cookie) self.secure: bool = self.config.get('general', {}).get('secure', True) @@ -129,12 +134,16 @@ def __init__(self, logging_config = self.config.get('logging', {}) if logging_config.get('level'): - self.set_file_logger(logging_config.get('level', 'NOTSET'), - logging_config.get('file', 'internetarchive.log')) + self.set_file_logger( + logging_config.get('level', 'NOTSET'), + logging_config.get('file', 'internetarchive.log'), + ) if debug or (logger.level <= 10): - self.set_file_logger(logging_config.get('level', 'NOTSET'), - logging_config.get('file', 'internetarchive.log'), - 'urllib3') + self.set_file_logger( + logging_config.get('level', 'NOTSET'), + logging_config.get('file', 'internetarchive.log'), + 'urllib3', + ) def _get_user_agent_string(self) -> str: """Generate a User-Agent string to be sent with every request.""" @@ -144,20 +153,26 @@ def _get_user_agent_string(self) -> str: except Exception: lang = '' py_version = '{}.{}.{}'.format(*sys.version_info) - return (f'internetarchive/{__version__} ' - f'({uname[0]} {uname[-1]}; N; {lang}; {self.access_key}) ' - f'Python/{py_version}') + return ( + f'internetarchive/{__version__} ' + f'({uname[0]} {uname[-1]}; N; {lang}; {self.access_key}) ' + f'Python/{py_version}' + ) def rebuild_auth(self, prepared_request, response): - """Never rebuild auth for archive.org URLs. - """ + """Never rebuild auth for archive.org URLs.""" u = urlparse(prepared_request.url) if u.netloc.endswith('archive.org'): return super().rebuild_auth(prepared_request, response) - def mount_http_adapter(self, protocol: str | None = None, max_retries: int | None = None, - status_forcelist: list | None = None, host: str | None = None) -> None: + def mount_http_adapter( + self, + protocol: str | None = None, + max_retries: int | None = None, + status_forcelist: list | None = None, + host: str | None = None, + ) -> None: """Mount an HTTP adapter to the :class:`ArchiveSession ` object. @@ -177,13 +192,15 @@ def mount_http_adapter(self, protocol: str | None = None, max_retries: int | Non status_forcelist = status_forcelist or [500, 501, 502, 503, 504] if max_retries and isinstance(max_retries, (int, float)): - self.http_adapter_kwargs['max_retries'] = Retry(total=max_retries, - connect=max_retries, - read=max_retries, - redirect=False, - allowed_methods=Retry.DEFAULT_ALLOWED_METHODS, - status_forcelist=status_forcelist, - backoff_factor=1) + self.http_adapter_kwargs['max_retries'] = Retry( + total=max_retries, + connect=max_retries, + read=max_retries, + redirect=False, + allowed_methods=Retry.DEFAULT_ALLOWED_METHODS, + status_forcelist=status_forcelist, + backoff_factor=1, + ) else: self.http_adapter_kwargs['max_retries'] = max_retries @@ -194,10 +211,7 @@ def mount_http_adapter(self, protocol: str | None = None, max_retries: int | Non self.mount(f'{protocol}//{host}', max_retries_adapter) def set_file_logger( - self, - log_level: str, - path: str, - logger_name: str = 'internetarchive' + self, log_level: str, path: str, logger_name: str = 'internetarchive' ) -> None: """Convenience function to quickly configure any level of logging to a file. @@ -231,10 +245,12 @@ def set_file_logger( _log.addHandler(fh) - def get_item(self, - identifier: str, - item_metadata: Mapping | None = None, - request_kwargs: MutableMapping | None = None): + def get_item( + self, + identifier: str, + item_metadata: Mapping | None = None, + request_kwargs: MutableMapping | None = None, + ): """A method for creating :class:`internetarchive.Item ` and :class:`internetarchive.Collection ` objects. @@ -258,7 +274,9 @@ def get_item(self, item_class = Item return item_class(self, identifier, item_metadata) - def get_metadata(self, identifier: str, request_kwargs: MutableMapping | None = None): + def get_metadata( + self, identifier: str, request_kwargs: MutableMapping | None = None + ): """Get an item's metadata from the `Metadata API `__ @@ -283,15 +301,17 @@ def get_metadata(self, identifier: str, request_kwargs: MutableMapping | None = raise type(exc)(error_msg) return resp.json() - def search_items(self, - query: str, - fields: Iterable[str] | None = None, - sorts: Iterable[str] | None = None, - params: Mapping | None = None, - full_text_search: bool = False, - dsl_fts: bool = False, - request_kwargs: Mapping | None = None, - max_retries: int | Retry | None = None) -> Search: + def search_items( + self, + query: str, + fields: Iterable[str] | None = None, + sorts: Iterable[str] | None = None, + params: Mapping | None = None, + full_text_search: bool = False, + dsl_fts: bool = False, + request_kwargs: Mapping | None = None, + max_retries: int | Retry | None = None, + ) -> Search: """Search for items on Archive.org. :param query: The Archive.org search query to yield results for. Refer to @@ -313,14 +333,17 @@ def search_items(self, :returns: A :class:`Search` object, yielding search results. """ request_kwargs = request_kwargs or {} - return Search(self, query, - fields=fields, - sorts=sorts, - params=params, - full_text_search=full_text_search, - dsl_fts=dsl_fts, - request_kwargs=request_kwargs, - max_retries=max_retries) + return Search( + self, + query, + fields=fields, + sorts=sorts, + params=params, + full_text_search=full_text_search, + dsl_fts=dsl_fts, + request_kwargs=request_kwargs, + max_retries=max_retries, + ) def s3_is_overloaded(self, identifier=None, access_key=None, request_kwargs=None): request_kwargs = request_kwargs or {} @@ -343,18 +366,22 @@ def s3_is_overloaded(self, identifier=None, access_key=None, request_kwargs=None return True return j.get('over_limit') != 0 - def get_tasks_api_rate_limit(self, cmd: str = 'derive.php', request_kwargs: dict | None = None): + def get_tasks_api_rate_limit( + self, cmd: str = 'derive.php', request_kwargs: dict | None = None + ): return catalog.Catalog(self, request_kwargs).get_rate_limit(cmd=cmd) - def submit_task(self, - identifier: str, - cmd: str, - comment: str = '', - priority: int = 0, - data: dict | None = None, - headers: dict | None = None, - reduced_priority: bool = False, - request_kwargs: Mapping | None = None) -> requests.Response: + def submit_task( + self, + identifier: str, + cmd: str, + comment: str = '', + priority: int = 0, + data: dict | None = None, + headers: dict | None = None, + reduced_priority: bool = False, + request_kwargs: Mapping | None = None, + ) -> requests.Response: """Submit an archive.org task. :param identifier: Item identifier. @@ -391,16 +418,21 @@ def submit_task(self, headers = headers or {} if reduced_priority: headers.update({'X-Accept-Reduced-Priority': '1'}) - return catalog.Catalog(self, request_kwargs).submit_task(identifier, cmd, - comment=comment, - priority=priority, - data=data, - headers=headers) - - def iter_history(self, - identifier: str | None, - params: dict | None = None, - request_kwargs: Mapping | None = None) -> Iterable[catalog.CatalogTask]: + return catalog.Catalog(self, request_kwargs).submit_task( + identifier, + cmd, + comment=comment, + priority=priority, + data=data, + headers=headers, + ) + + def iter_history( + self, + identifier: str | None, + params: dict | None = None, + request_kwargs: Mapping | None = None, + ) -> Iterable[catalog.CatalogTask]: """A generator that returns completed tasks. :param identifier: Item identifier. @@ -416,14 +448,18 @@ def iter_history(self, :returns: An iterable of completed CatalogTasks. """ params = params or {} - params.update({'identifier': identifier, 'catalog': 0, 'summary': 0, 'history': 1}) + params.update( + {'identifier': identifier, 'catalog': 0, 'summary': 0, 'history': 1} + ) c = catalog.Catalog(self, request_kwargs) yield from c.iter_tasks(params) - def iter_catalog(self, - identifier: str | None = None, - params: dict | None = None, - request_kwargs: Mapping | None = None) -> Iterable[catalog.CatalogTask]: + def iter_catalog( + self, + identifier: str | None = None, + params: dict | None = None, + request_kwargs: Mapping | None = None, + ) -> Iterable[catalog.CatalogTask]: """A generator that returns queued or running tasks. :param identifier: Item identifier. @@ -439,13 +475,18 @@ def iter_catalog(self, :returns: An iterable of queued or running CatalogTasks. """ params = params or {} - params.update({'identifier': identifier, 'catalog': 1, 'summary': 0, 'history': 0}) + params.update( + {'identifier': identifier, 'catalog': 1, 'summary': 0, 'history': 0} + ) c = catalog.Catalog(self, request_kwargs) yield from c.iter_tasks(params) - def get_tasks_summary(self, identifier: str = "", - params: dict | None = None, - request_kwargs: Mapping | None = None) -> dict: + def get_tasks_summary( + self, + identifier: str = "", + params: dict | None = None, + request_kwargs: Mapping | None = None, + ) -> dict: """Get the total counts of catalog tasks meeting all criteria, organized by run status (queued, running, error, and paused). @@ -461,11 +502,16 @@ def get_tasks_summary(self, identifier: str = "", :returns: Counts of catalog tasks meeting all criteria. """ - return catalog.Catalog(self, request_kwargs).get_summary(identifier=identifier, params=params) + return catalog.Catalog(self, request_kwargs).get_summary( + identifier=identifier, params=params + ) - def get_tasks(self, identifier: str = "", - params: dict | None = None, - request_kwargs: Mapping | None = None) -> set[catalog.CatalogTask]: + def get_tasks( + self, + identifier: str = "", + params: dict | None = None, + request_kwargs: Mapping | None = None, + ) -> set[catalog.CatalogTask]: """Get a list of all tasks meeting all criteria. The list is ordered by submission time. @@ -488,14 +534,15 @@ def get_tasks(self, identifier: str = "", params['history'] = 1 if 'catalog' not in params: params['catalog'] = 1 - return set(catalog.Catalog(self, request_kwargs).get_tasks( - identifier=identifier, - params=params) + return set( + catalog.Catalog(self, request_kwargs).get_tasks( + identifier=identifier, params=params + ) ) - def get_my_catalog(self, - params: dict | None = None, - request_kwargs: Mapping | None = None) -> set[catalog.CatalogTask]: + def get_my_catalog( + self, params: dict | None = None, request_kwargs: Mapping | None = None + ) -> set[catalog.CatalogTask]: """Get all queued or running tasks. :param params: Query parameters, refer to @@ -509,11 +556,18 @@ def get_my_catalog(self, :returns: A set of all queued or running tasks. """ params = params or {} - _params = {'submitter': self.user_email, 'catalog': 1, 'history': 0, 'summary': 0} + _params = { + 'submitter': self.user_email, + 'catalog': 1, + 'history': 0, + 'summary': 0, + } params.update(_params) return self.get_tasks(params=params, request_kwargs=request_kwargs) - def get_task_log(self, task_id: str | int, request_kwargs: Mapping | None = None) -> str: + def get_task_log( + self, task_id: str | int, request_kwargs: Mapping | None = None + ) -> str: """Get a task log. :param task_id: The task id for the task log you'd like to fetch. @@ -548,8 +602,11 @@ def send(self, request, **kwargs) -> Response: break if insecure: from requests.exceptions import RequestException - msg = ('You are attempting to make an HTTPS request on an insecure platform,' - ' please see:\n\n\thttps://archive.org/services/docs/api' - '/internetarchive/troubleshooting.html#https-issues\n') + + msg = ( + 'You are attempting to make an HTTPS request on an insecure platform,' + ' please see:\n\n\thttps://archive.org/services/docs/api' + '/internetarchive/troubleshooting.html#https-issues\n' + ) raise RequestException(msg) return r diff --git a/internetarchive/utils.py b/internetarchive/utils.py index 38b09546..ef9d6d2b 100644 --- a/internetarchive/utils.py +++ b/internetarchive/utils.py @@ -43,6 +43,7 @@ JSONDecodeError = ValueError except ImportError: import json # type: ignore + JSONDecodeError = json.JSONDecodeError # type: ignore @@ -65,22 +66,27 @@ def validate_s3_identifier(string: str) -> bool: # periods, underscores, and dashes are legal, but may not be the first # character! if any(string.startswith(c) is True for c in ['.', '_', '-']): - raise InvalidIdentifierException('Identifier cannot begin with periods ".", underscores ' - '"_", or dashes "-".') + raise InvalidIdentifierException( + 'Identifier cannot begin with periods ".", underscores ' + '"_", or dashes "-".' + ) if len(string) > 100 or len(string) < 3: - raise InvalidIdentifierException('Identifier should be between 3 and 80 characters in ' - 'length.') + raise InvalidIdentifierException( + 'Identifier should be between 3 and 80 characters in length.' + ) # Support for uploading to user items, e.g. first character can be `@`. if string.startswith('@'): string = string[1:] if any(c not in legal_chars for c in string): - raise InvalidIdentifierException('Identifier can only contain alphanumeric characters, ' - 'periods ".", underscores "_", or dashes "-". However, ' - 'identifier cannot begin with periods, underscores, or ' - 'dashes.') + raise InvalidIdentifierException( + 'Identifier can only contain alphanumeric characters, ' + 'periods ".", underscores "_", or dashes "-". However, ' + 'identifier cannot begin with periods, underscores, or ' + 'dashes.' + ) return True @@ -162,13 +168,14 @@ def __len__(self) -> int: class IdentifierListAsItems: - """This class is a lazily-loaded list of Items, accessible by index or identifier. - """ + """This class is a lazily-loaded list of Items, accessible by index or identifier.""" def __init__(self, id_list_or_single_id, session): - self.ids = (id_list_or_single_id - if isinstance(id_list_or_single_id, list) - else [id_list_or_single_id]) + self.ids = ( + id_list_or_single_id + if isinstance(id_list_or_single_id, list) + else [id_list_or_single_id] + ) self._items = [None] * len(self.ids) self.session = session @@ -176,7 +183,7 @@ def __len__(self) -> int: return len(self.ids) def __getitem__(self, idx): - for i in (range(*idx.indices(len(self))) if isinstance(idx, slice) else [idx]): + for i in range(*idx.indices(len(self))) if isinstance(idx, slice) else [idx]: if self._items[i] is None: self._items[i] = self.session.get_item(self.ids[i]) return self._items[idx] @@ -349,8 +356,11 @@ def remove_none(obj): except (AttributeError, TypeError): return lst elif isinstance(obj, dict): - return type(obj)((remove_none(k), remove_none(v)) - for k, v in obj.items() if k is not None and v is not None) + return type(obj)( + (remove_none(k), remove_none(v)) + for k, v in obj.items() + if k is not None and v is not None + ) else: return obj @@ -393,19 +403,19 @@ def merge_dictionaries( ) -> dict: """Merge two dictionaries. - Items in `dict0` can optionally be dropped before the merge. + Items in `dict0` can optionally be dropped before the merge. - If equal keys exist in both dictionaries, - entries in`dict0` are overwritten. + If equal keys exist in both dictionaries, + entries in`dict0` are overwritten. - :param dict0: A base dictionary with the bulk of the items. + :param dict0: A base dictionary with the bulk of the items. - :param dict1: Additional items which overwrite the items in `dict0`. + :param dict1: Additional items which overwrite the items in `dict0`. - :param keys_to_drop: An iterable of keys to drop from `dict0` before the merge. + :param keys_to_drop: An iterable of keys to drop from `dict0` before the merge. - :returns: A merged dictionary. - """ + :returns: A merged dictionary. + """ new_dict = dict0.copy() if keys_to_drop is not None: for key in keys_to_drop: diff --git a/setup.cfg b/setup.cfg index 1b6af950..67b38f96 100644 --- a/setup.cfg +++ b/setup.cfg @@ -87,5 +87,4 @@ show_error_codes = True show_error_context = True [tool:black] -line-length = 102 skip-string-normalization = true diff --git a/tests/cli/test_ia_download.py b/tests/cli/test_ia_download.py index 87ddcb78..a4c414b2 100644 --- a/tests/cli/test_ia_download.py +++ b/tests/cli/test_ia_download.py @@ -62,7 +62,9 @@ def test_on_the_fly_format(): stdout, stderr = call_cmd(f'ia --insecure download --dry-run --format="DAISY" {i}') assert stdout == '' - stdout, stderr = call_cmd(f'ia --insecure download --dry-run --format="DAISY" --on-the-fly {i}') + stdout, stderr = call_cmd( + f'ia --insecure download --dry-run --format="DAISY" --on-the-fly {i}' + ) assert stdout == f'http://archive.org/download/{i}/{i}_daisy.zip' @@ -73,8 +75,10 @@ def test_clobber(tmpdir_ch): stdout, stderr = call_cmd(cmd) assert files_downloaded('nasa') == {'nasa_meta.xml'} - expected_stderr = ('nasa:\n' - ' skipping nasa/nasa_meta.xml, file already exists based on length and date.') + expected_stderr = ( + 'nasa:\n' + ' skipping nasa/nasa_meta.xml, file already exists based on length and date.' + ) assert expected_stderr == stderr @@ -84,7 +88,10 @@ def test_checksum(tmpdir_ch): stdout, stderr = call_cmd('ia --insecure download --checksum nasa nasa_meta.xml') assert files_downloaded('nasa') == {'nasa_meta.xml'} - assert 'nasa:\n skipping nasa/nasa_meta.xml, file already exists based on checksum.' == stderr + assert ( + 'nasa:\n skipping nasa/nasa_meta.xml, file already exists based on checksum.' + == stderr + ) def test_no_directories(tmpdir_ch): @@ -103,8 +110,7 @@ def test_destdir(tmpdir_ch): assert files_downloaded('thisdirdoesnotexist/nasa') == {'nasa_meta.xml'} tmpdir_ch.mkdir('dir2/') - cmd = ('ia --insecure download --no-directories --destdir=dir2/ ' - 'nasa nasa_meta.xml') + cmd = 'ia --insecure download --no-directories --destdir=dir2/ nasa nasa_meta.xml' call_cmd(cmd) assert files_downloaded('dir2') == {'nasa_meta.xml'} diff --git a/tests/cli/test_ia_list.py b/tests/cli/test_ia_list.py index 77fed691..5145783f 100644 --- a/tests/cli/test_ia_list.py +++ b/tests/cli/test_ia_list.py @@ -12,7 +12,7 @@ 'nasa_reviews.xml', 'nasa_meta.xml', 'nasa_archive.torrent', - 'nasa_files.xml' + 'nasa_files.xml', } @@ -50,15 +50,18 @@ def test_ia_list_location(capsys, nasa_mocker): def test_ia_list_columns(capsys): with IaRequestsMock() as rsps: rsps.add_metadata_mock('nasa') - ia_list.main(['list', '--columns', 'name,md5', '--glob', '*meta.xml', 'nasa'], - SESSION) + ia_list.main( + ['list', '--columns', 'name,md5', '--glob', '*meta.xml', 'nasa'], SESSION + ) out, err = capsys.readouterr() assert out == 'nasa_meta.xml\t0e339f4a29a8bc42303813cbec9243e5\n' with IaRequestsMock() as rsps: rsps.add_metadata_mock('nasa') - ia_list.main(['list', '--columns', 'md5', '--glob', '*meta.xml', 'nasa'], SESSION) + ia_list.main( + ['list', '--columns', 'md5', '--glob', '*meta.xml', 'nasa'], SESSION + ) out, err = capsys.readouterr() assert out == '0e339f4a29a8bc42303813cbec9243e5\n' diff --git a/tests/cli/test_ia_metadata.py b/tests/cli/test_ia_metadata.py index af8cae88..f66f2d39 100644 --- a/tests/cli/test_ia_metadata.py +++ b/tests/cli/test_ia_metadata.py @@ -23,14 +23,21 @@ def test_ia_metadata_exists(capsys): def test_ia_metadata_formats(capsys, nasa_mocker): ia_call(['ia', 'metadata', '--formats', 'nasa']) out, err = capsys.readouterr() - expected_formats = {'Collection Header', 'Archive BitTorrent', 'JPEG', - 'Metadata', ''} + expected_formats = { + 'Collection Header', + 'Archive BitTorrent', + 'JPEG', + 'Metadata', + '', + } assert set(out.split('\n')) == expected_formats def test_ia_metadata_modify(capsys): - md_rsp = ('{"success":true,"task_id":447613301,' - '"log":"https://catalogd.archive.org/log/447613301"}') + md_rsp = ( + '{"success":true,"task_id":447613301,' + '"log":"https://catalogd.archive.org/log/447613301"}' + ) with IaRequestsMock() as rsps: rsps.add_metadata_mock('nasa') rsps.add_metadata_mock('nasa', body=md_rsp, method=responses.POST) diff --git a/tests/cli/test_ia_search.py b/tests/cli/test_ia_search.py index c3d950f0..045caf01 100644 --- a/tests/cli/test_ia_search.py +++ b/tests/cli/test_ia_search.py @@ -9,16 +9,16 @@ def test_ia_search_itemlist(capsys): with responses.RequestsMock(assert_all_requests_are_fired=False) as rsps: url = f'{PROTOCOL}//archive.org/services/search/v1/scrape' - p1 = { - 'q': 'collection:attentionkmartshoppers', - 'count': '10000' - } + p1 = {'q': 'collection:attentionkmartshoppers', 'count': '10000'} _j = json.loads(test_scrape_response) del _j['cursor'] _r = json.dumps(_j) - rsps.add(responses.POST, url, - body=_r, - match=[responses.matchers.query_param_matcher(p1)]) + rsps.add( + responses.POST, + url, + body=_r, + match=[responses.matchers.query_param_matcher(p1)], + ) ia_call(['ia', 'search', 'collection:attentionkmartshoppers', '--itemlist']) out, err = capsys.readouterr() @@ -28,14 +28,13 @@ def test_ia_search_itemlist(capsys): def test_ia_search_num_found(capsys): with IaRequestsMock(assert_all_requests_are_fired=False) as rsps: url = f'{PROTOCOL}//archive.org/services/search/v1/scrape' - p = { - 'q': 'collection:nasa', - 'total_only': 'true', - 'count': '10000' - } - rsps.add(responses.POST, url, - body='{"items":[],"count":0,"total":50}', - match=[responses.matchers.query_param_matcher(p)]) + p = {'q': 'collection:nasa', 'total_only': 'true', 'count': '10000'} + rsps.add( + responses.POST, + url, + body='{"items":[],"count":0,"total":50}', + match=[responses.matchers.query_param_matcher(p)], + ) ia_call(['ia', 'search', 'collection:nasa', '--num-found']) out, err = capsys.readouterr() diff --git a/tests/cli/test_ia_upload.py b/tests/cli/test_ia_upload.py index 3255d981..c40ccc42 100644 --- a/tests/cli/test_ia_upload.py +++ b/tests/cli/test_ia_upload.py @@ -18,33 +18,43 @@ def test_ia_upload(tmpdir_ch, caplog): with IaRequestsMock() as rsps: rsps.add_metadata_mock('nasa') - rsps.add(responses.PUT, f'{PROTOCOL}//s3.us.archive.org/nasa/test.txt', - body='', - content_type='text/plain') + rsps.add( + responses.PUT, + f'{PROTOCOL}//s3.us.archive.org/nasa/test.txt', + body='', + content_type='text/plain', + ) ia_call(['ia', '--log', 'upload', 'nasa', 'test.txt']) - assert f'uploaded test.txt to {PROTOCOL}//s3.us.archive.org/nasa/test.txt' in caplog.text + assert ( + f'uploaded test.txt to {PROTOCOL}//s3.us.archive.org/nasa/test.txt' + in caplog.text + ) def test_ia_upload_invalid_identifier(capsys, caplog): with open('test.txt', 'w') as fh: fh.write('foo') - ia_call(['ia', '--log', 'upload', 'føø', 'test.txt'], - expected_exit_code=1) + ia_call(['ia', '--log', 'upload', 'føø', 'test.txt'], expected_exit_code=1) out, err = capsys.readouterr() - assert (' should be between 3 and 80 characters in length, and ' - 'can only contain alphanumeric characters, periods ".", ' - 'underscores "_", or dashes "-". However, cannot begin ' - 'with periods, underscores, or dashes.') in err + assert ( + ' should be between 3 and 80 characters in length, and ' + 'can only contain alphanumeric characters, periods ".", ' + 'underscores "_", or dashes "-". However, cannot begin ' + 'with periods, underscores, or dashes.' + ) in err def test_ia_upload_status_check(capsys): with IaRequestsMock() as rsps: - rsps.add(responses.GET, f'{PROTOCOL}//s3.us.archive.org', - body=STATUS_CHECK_RESPONSE, - content_type='application/json') + rsps.add( + responses.GET, + f'{PROTOCOL}//s3.us.archive.org', + body=STATUS_CHECK_RESPONSE, + content_type='application/json', + ) ia_call(['ia', 'upload', 'nasa', '--status-check']) out, err = capsys.readouterr() @@ -53,14 +63,19 @@ def test_ia_upload_status_check(capsys): j = json.loads(STATUS_CHECK_RESPONSE) j['over_limit'] = 1 rsps.reset() - rsps.add(responses.GET, f'{PROTOCOL}//s3.us.archive.org', - body=json.dumps(j), - content_type='application/json') + rsps.add( + responses.GET, + f'{PROTOCOL}//s3.us.archive.org', + body=json.dumps(j), + content_type='application/json', + ) ia_call(['ia', 'upload', 'nasa', '--status-check'], expected_exit_code=1) out, err = capsys.readouterr() - assert ('warning: nasa is over limit, and not accepting requests. ' - 'Expect 503 SlowDown errors.') in err + assert ( + 'warning: nasa is over limit, and not accepting requests. ' + 'Expect 503 SlowDown errors.' + ) in err def test_ia_upload_debug(capsys, tmpdir_ch, nasa_mocker): @@ -79,23 +94,27 @@ def test_ia_upload_debug(capsys, tmpdir_ch, nasa_mocker): def test_ia_upload_403(capsys): - s3_error = ('' - 'SignatureDoesNotMatch' - 'The request signature we calculated does not match ' - 'the signature you provided. Check your AWS Secret Access Key ' - 'and signing method. For more information, see REST ' - 'Authentication and SOAP Authentication for details.' - "'PUT\n\n\n\n/iacli-test-item60/test-replace.txt'" - '18a9c5ea-088f-42f5-9fcf-70651cc085ca' - '') + s3_error = ( + '' + 'SignatureDoesNotMatch' + 'The request signature we calculated does not match ' + 'the signature you provided. Check your AWS Secret Access Key ' + 'and signing method. For more information, see REST ' + 'Authentication and SOAP Authentication for details.' + "'PUT\n\n\n\n/iacli-test-item60/test-replace.txt'" + '18a9c5ea-088f-42f5-9fcf-70651cc085ca' + '' + ) with IaRequestsMock() as rsps: rsps.add_metadata_mock('nasa') - rsps.add(responses.PUT, - f'{PROTOCOL}//s3.us.archive.org/nasa/test_ia_upload.py', - body=s3_error, - status=403, - content_type='text/plain') + rsps.add( + responses.PUT, + f'{PROTOCOL}//s3.us.archive.org/nasa/test_ia_upload.py', + body=s3_error, + status=403, + content_type='text/plain', + ) ia_call(['ia', 'upload', 'nasa', __file__], expected_exit_code=1) out, err = capsys.readouterr() @@ -131,16 +150,29 @@ def test_ia_upload_unicode(tmpdir_ch, caplog): efname = '%E0%AE%A4%E0%AE%AE%E0%AE%BF%E0%AE%B4%E0%AF%8D%20-%20baz%20%E2%88%86.txt' with IaRequestsMock(assert_all_requests_are_fired=False) as rsps: rsps.add_metadata_mock('nasa') - rsps.add(responses.PUT, - f'{PROTOCOL}//s3.us.archive.org/nasa/{efname}', - body='', - content_type='text/plain') - ia_call(['ia', '--log', 'upload', 'nasa', 'தமிழ் - baz ∆.txt', - '--metadata', 'foo:∆']) - - assert (f'uploaded தமிழ் - baz ∆.txt to {PROTOCOL}//s3.us.archive.org/nasa/' - '%E0%AE%A4%E0%AE%AE%E0%AE%BF%E0%AE%B4%E0%AF%8D%20-%20' - 'baz%20%E2%88%86.txt') in caplog.text + rsps.add( + responses.PUT, + f'{PROTOCOL}//s3.us.archive.org/nasa/{efname}', + body='', + content_type='text/plain', + ) + ia_call( + [ + 'ia', + '--log', + 'upload', + 'nasa', + 'தமிழ் - baz ∆.txt', + '--metadata', + 'foo:∆', + ] + ) + + assert ( + f'uploaded தமிழ் - baz ∆.txt to {PROTOCOL}//s3.us.archive.org/nasa/' + '%E0%AE%A4%E0%AE%AE%E0%AE%BF%E0%AE%B4%E0%AF%8D%20-%20' + 'baz%20%E2%88%86.txt' + ) in caplog.text def test_ia_upload_remote_name(tmpdir_ch, caplog): @@ -149,13 +181,19 @@ def test_ia_upload_remote_name(tmpdir_ch, caplog): with IaRequestsMock() as rsps: rsps.add_metadata_mock('nasa') - rsps.add(responses.PUT, f'{PROTOCOL}//s3.us.archive.org/nasa/hi.txt', - body='', - content_type='text/plain') - ia_call(['ia', '--log', 'upload', 'nasa', 'test.txt', '--remote-name', - 'hi.txt']) + rsps.add( + responses.PUT, + f'{PROTOCOL}//s3.us.archive.org/nasa/hi.txt', + body='', + content_type='text/plain', + ) + ia_call( + ['ia', '--log', 'upload', 'nasa', 'test.txt', '--remote-name', 'hi.txt'] + ) - assert f'uploaded hi.txt to {PROTOCOL}//s3.us.archive.org/nasa/hi.txt' in caplog.text + assert ( + f'uploaded hi.txt to {PROTOCOL}//s3.us.archive.org/nasa/hi.txt' in caplog.text + ) def test_ia_upload_stdin(tmpdir_ch, caplog): @@ -170,13 +208,18 @@ def replace_stdin(f): with IaRequestsMock() as rsps: rsps.add_metadata_mock('nasa') - rsps.add(responses.PUT, f'{PROTOCOL}//s3.us.archive.org/nasa/hi.txt', - body='', - content_type='text/plain') + rsps.add( + responses.PUT, + f'{PROTOCOL}//s3.us.archive.org/nasa/hi.txt', + body='', + content_type='text/plain', + ) with replace_stdin(StringIO('foo')): ia_call(['ia', '--log', 'upload', 'nasa', '-', '--remote-name', 'hi.txt']) - assert f'uploaded hi.txt to {PROTOCOL}//s3.us.archive.org/nasa/hi.txt' in caplog.text + assert ( + f'uploaded hi.txt to {PROTOCOL}//s3.us.archive.org/nasa/hi.txt' in caplog.text + ) def test_ia_upload_inexistent_file(tmpdir_ch, capsys, caplog): @@ -197,16 +240,26 @@ def test_ia_upload_spreadsheet(tmpdir_ch, caplog): with IaRequestsMock() as rsps: rsps.add_metadata_mock('nasa') - rsps.add(responses.PUT, f'{PROTOCOL}//s3.us.archive.org/nasa/foo.txt', - body='', - content_type='text/plain') - rsps.add(responses.PUT, f'{PROTOCOL}//s3.us.archive.org/nasa/bar.txt', - body='', - content_type='text/plain') + rsps.add( + responses.PUT, + f'{PROTOCOL}//s3.us.archive.org/nasa/foo.txt', + body='', + content_type='text/plain', + ) + rsps.add( + responses.PUT, + f'{PROTOCOL}//s3.us.archive.org/nasa/bar.txt', + body='', + content_type='text/plain', + ) ia_call(['ia', 'upload', '--spreadsheet', 'test.csv']) - assert f'uploaded foo.txt to {PROTOCOL}//s3.us.archive.org/nasa/foo.txt' in caplog.text - assert f'uploaded bar.txt to {PROTOCOL}//s3.us.archive.org/nasa/bar.txt' in caplog.text + assert ( + f'uploaded foo.txt to {PROTOCOL}//s3.us.archive.org/nasa/foo.txt' in caplog.text + ) + assert ( + f'uploaded bar.txt to {PROTOCOL}//s3.us.archive.org/nasa/bar.txt' in caplog.text + ) def test_ia_upload_spreadsheet_item_column(tmpdir_ch, caplog): @@ -218,12 +271,18 @@ def test_ia_upload_spreadsheet_item_column(tmpdir_ch, caplog): with IaRequestsMock() as rsps: rsps.add_metadata_mock('nasa') - rsps.add(responses.PUT, f'{PROTOCOL}//s3.us.archive.org/nasa/test.txt', - body='', - content_type='text/plain') + rsps.add( + responses.PUT, + f'{PROTOCOL}//s3.us.archive.org/nasa/test.txt', + body='', + content_type='text/plain', + ) ia_call(['ia', 'upload', '--spreadsheet', 'test.csv']) - assert f'uploaded test.txt to {PROTOCOL}//s3.us.archive.org/nasa/test.txt' in caplog.text + assert ( + f'uploaded test.txt to {PROTOCOL}//s3.us.archive.org/nasa/test.txt' + in caplog.text + ) def test_ia_upload_spreadsheet_item_and_identifier_column(tmpdir_ch, caplog): @@ -236,9 +295,12 @@ def test_ia_upload_spreadsheet_item_and_identifier_column(tmpdir_ch, caplog): with IaRequestsMock() as rsps: rsps.add_metadata_mock('nasa') - rsps.add(responses.PUT, f'{PROTOCOL}//s3.us.archive.org/nasa/test.txt', - body='', - content_type='text/plain') + rsps.add( + responses.PUT, + f'{PROTOCOL}//s3.us.archive.org/nasa/test.txt', + body='', + content_type='text/plain', + ) ia_call(['ia', 'upload', '--spreadsheet', 'test.csv']) @@ -248,7 +310,10 @@ def test_ia_upload_spreadsheet_item_and_identifier_column(tmpdir_ch, caplog): assert 'x-archive-meta00-identifier' not in putCalls[0].request.headers assert 'x-archive-meta00-item' not in putCalls[0].request.headers - assert f'uploaded test.txt to {PROTOCOL}//s3.us.archive.org/nasa/test.txt' in caplog.text + assert ( + f'uploaded test.txt to {PROTOCOL}//s3.us.archive.org/nasa/test.txt' + in caplog.text + ) def test_ia_upload_spreadsheet_missing_identifier(tmpdir_ch, capsys, caplog): @@ -285,12 +350,18 @@ def test_ia_upload_spreadsheet_bom(tmpdir_ch, caplog): with IaRequestsMock() as rsps: rsps.add_metadata_mock('nasa') - rsps.add(responses.PUT, f'{PROTOCOL}//s3.us.archive.org/nasa/test.txt', - body='', - content_type='text/plain') + rsps.add( + responses.PUT, + f'{PROTOCOL}//s3.us.archive.org/nasa/test.txt', + body='', + content_type='text/plain', + ) ia_call(['ia', 'upload', '--spreadsheet', 'test.csv']) - assert f'uploaded test.txt to {PROTOCOL}//s3.us.archive.org/nasa/test.txt' in caplog.text + assert ( + f'uploaded test.txt to {PROTOCOL}//s3.us.archive.org/nasa/test.txt' + in caplog.text + ) def test_ia_upload_checksum(tmpdir_ch, caplog): @@ -300,25 +371,39 @@ def test_ia_upload_checksum(tmpdir_ch, caplog): # First upload, file not in metadata yet with IaRequestsMock() as rsps: rsps.add_metadata_mock('nasa') - rsps.add(responses.PUT, f'{PROTOCOL}//s3.us.archive.org/nasa/test.txt', - body='', - content_type='text/plain') + rsps.add( + responses.PUT, + f'{PROTOCOL}//s3.us.archive.org/nasa/test.txt', + body='', + content_type='text/plain', + ) ia_call(['ia', '--log', 'upload', 'nasa', 'test.txt', '--checksum']) - assert f'uploaded test.txt to {PROTOCOL}//s3.us.archive.org/nasa/test.txt' in caplog.text + assert ( + f'uploaded test.txt to {PROTOCOL}//s3.us.archive.org/nasa/test.txt' + in caplog.text + ) caplog.clear() # Second upload with file in metadata def insert_test_txt(body): body = json.loads(body) - body['files'].append({'name': 'test.txt', 'md5': 'acbd18db4cc2f85cedef654fccc4a4d8'}) + body['files'].append( + {'name': 'test.txt', 'md5': 'acbd18db4cc2f85cedef654fccc4a4d8'} + ) return json.dumps(body) with IaRequestsMock() as rsps: rsps.add_metadata_mock('nasa', transform_body=insert_test_txt) - ia_call(['ia', '--log', 'upload', 'nasa', 'test.txt', '--checksum'], expected_exit_code=1) + ia_call( + ['ia', '--log', 'upload', 'nasa', 'test.txt', '--checksum'], + expected_exit_code=1, + ) - assert f'test.txt already exists: {PROTOCOL}//s3.us.archive.org/nasa/test.txt' in caplog.text + assert ( + f'test.txt already exists: {PROTOCOL}//s3.us.archive.org/nasa/test.txt' + in caplog.text + ) caplog.clear() @@ -329,10 +414,15 @@ def insert_test_txt(body): with IaRequestsMock() as rsps: rsps.add_metadata_mock('nasa', transform_body=insert_test_txt) - ia_call(['ia', '--log', 'upload', '--spreadsheet', 'test.csv', '--checksum'], - expected_exit_code=1) + ia_call( + ['ia', '--log', 'upload', '--spreadsheet', 'test.csv', '--checksum'], + expected_exit_code=1, + ) - assert f'test.txt already exists: {PROTOCOL}//s3.us.archive.org/nasa/test.txt' in caplog.text + assert ( + f'test.txt already exists: {PROTOCOL}//s3.us.archive.org/nasa/test.txt' + in caplog.text + ) def test_ia_upload_keep_directories(tmpdir_ch, caplog): @@ -346,36 +436,62 @@ def test_ia_upload_keep_directories(tmpdir_ch, caplog): # Default behaviour with IaRequestsMock() as rsps: rsps.add_metadata_mock('nasa') - rsps.add(responses.PUT, f'{PROTOCOL}//s3.us.archive.org/nasa/test.txt', - body='', - content_type='text/plain') + rsps.add( + responses.PUT, + f'{PROTOCOL}//s3.us.archive.org/nasa/test.txt', + body='', + content_type='text/plain', + ) ia_call(['ia', '--log', 'upload', 'nasa', 'foo/test.txt']) - assert f'uploaded test.txt to {PROTOCOL}//s3.us.archive.org/nasa/test.txt' in caplog.text + assert ( + f'uploaded test.txt to {PROTOCOL}//s3.us.archive.org/nasa/test.txt' + in caplog.text + ) caplog.clear() with IaRequestsMock() as rsps: rsps.add_metadata_mock('nasa') - rsps.add(responses.PUT, f'{PROTOCOL}//s3.us.archive.org/nasa/test.txt', - body='', - content_type='text/plain') + rsps.add( + responses.PUT, + f'{PROTOCOL}//s3.us.archive.org/nasa/test.txt', + body='', + content_type='text/plain', + ) ia_call(['ia', '--log', 'upload', '--spreadsheet', 'test.csv']) - assert f'uploaded test.txt to {PROTOCOL}//s3.us.archive.org/nasa/test.txt' in caplog.text + assert ( + f'uploaded test.txt to {PROTOCOL}//s3.us.archive.org/nasa/test.txt' + in caplog.text + ) caplog.clear() # With the option with IaRequestsMock() as rsps: rsps.add_metadata_mock('nasa') - rsps.add(responses.PUT, f'{PROTOCOL}//s3.us.archive.org/nasa/foo/test.txt', - body='', - content_type='text/plain') + rsps.add( + responses.PUT, + f'{PROTOCOL}//s3.us.archive.org/nasa/foo/test.txt', + body='', + content_type='text/plain', + ) ia_call(['ia', '--log', 'upload', 'nasa', 'foo/test.txt', '--keep-directories']) - assert f'uploaded foo/test.txt to {PROTOCOL}//s3.us.archive.org/nasa/foo/test.txt' in caplog.text + assert ( + f'uploaded foo/test.txt to {PROTOCOL}//s3.us.archive.org/nasa/foo/test.txt' + in caplog.text + ) caplog.clear() with IaRequestsMock() as rsps: rsps.add_metadata_mock('nasa') - rsps.add(responses.PUT, f'{PROTOCOL}//s3.us.archive.org/nasa/foo/test.txt', - body='', - content_type='text/plain') - ia_call(['ia', '--log', 'upload', '--spreadsheet', 'test.csv', '--keep-directories']) - assert f'uploaded foo/test.txt to {PROTOCOL}//s3.us.archive.org/nasa/foo/test.txt' in caplog.text + rsps.add( + responses.PUT, + f'{PROTOCOL}//s3.us.archive.org/nasa/foo/test.txt', + body='', + content_type='text/plain', + ) + ia_call( + ['ia', '--log', 'upload', '--spreadsheet', 'test.csv', '--keep-directories'] + ) + assert ( + f'uploaded foo/test.txt to {PROTOCOL}//s3.us.archive.org/nasa/foo/test.txt' + in caplog.text + ) diff --git a/tests/conftest.py b/tests/conftest.py index 0b1392a7..99199246 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -20,9 +20,11 @@ try: WindowsError # type: ignore[used-before-def] except NameError: + class WindowsError(Exception): pass + PROTOCOL = 'https:' BASE_URL = 'https://archive.org/' METADATA_URL = f'{BASE_URL}metadata/' @@ -86,8 +88,14 @@ def call_cmd(cmd, expected_exit_code=0): class IaRequestsMock(RequestsMock): - def add_metadata_mock(self, identifier, body=None, method=responses.GET, - protocol='https?', transform_body=None): + def add_metadata_mock( + self, + identifier, + body=None, + method=responses.GET, + protocol='https?', + transform_body=None, + ): url = re.compile(f'{protocol}://archive.org/metadata/{identifier}') if body is None: body = load_test_data_file(f'metadata/{identifier}.json') diff --git a/tests/test_api.py b/tests/test_api.py index 0f81a27c..8ad14b52 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -75,11 +75,12 @@ def test_get_item_with_kwargs(): with IaRequestsMock(assert_all_requests_are_fired=False) as rsps: rsps.add_metadata_mock('nasa') item = get_item('nasa', http_adapter_kwargs={'max_retries': 13}) - assert isinstance(item.session.adapters[f'{PROTOCOL}//'].max_retries, - urllib3.Retry) + assert isinstance( + item.session.adapters[f'{PROTOCOL}//'].max_retries, urllib3.Retry + ) try: - get_item('nasa', request_kwargs={'timeout': .0000000000001}) + get_item('nasa', request_kwargs={'timeout': 0.0000000000001}) except Exception as exc: assert 'timed out' in str(exc) @@ -109,9 +110,9 @@ def test_get_files_with_get_item_kwargs(tmpdir): assert len(files) == 1 assert files[0].name == 'nasa_meta.xml' - files = get_files('nasa', - files='nasa_meta.xml', - config={'logging': {'level': 'INFO'}}) + files = get_files( + 'nasa', files='nasa_meta.xml', config={'logging': {'level': 'INFO'}} + ) files = list(files) assert len(files) == 1 assert files[0].name == 'nasa_meta.xml' @@ -119,21 +120,19 @@ def test_get_files_with_get_item_kwargs(tmpdir): test_conf = '[s3]\naccess = key2' with open('ia_test.ini', 'w') as fh: fh.write(test_conf) - files = get_files('nasa', files='nasa_meta.xml', - config_file='ia_test.ini') + files = get_files('nasa', files='nasa_meta.xml', config_file='ia_test.ini') files = list(files) assert len(files) == 1 assert files[0].name == 'nasa_meta.xml' - files = get_files('nasa', - files='nasa_meta.xml', - http_adapter_kwargs={'max_retries': 3}) + files = get_files( + 'nasa', files='nasa_meta.xml', http_adapter_kwargs={'max_retries': 3} + ) files = list(files) assert len(files) == 1 assert files[0].name == 'nasa_meta.xml' - files = get_files('nasa', files='nasa_meta.xml', - request_kwargs={'timeout': 4}) + files = get_files('nasa', files='nasa_meta.xml', request_kwargs={'timeout': 4}) files = list(files) assert len(files) == 1 assert files[0].name == 'nasa_meta.xml' @@ -186,17 +185,25 @@ def test_get_files_glob_pattern(): def test_modify_metadata(): with IaRequestsMock(assert_all_requests_are_fired=False) as rsps: - rsps.add(responses.GET, f'{PROTOCOL}//archive.org/metadata/nasa', - body='{"metadata":{"title":"foo"}}') - rsps.add(responses.POST, f'{PROTOCOL}//archive.org/metadata/nasa', - body=('{"success":true,"task_id":423444944,' - '"log":"https://catalogd.archive.org/log/423444944"}')) + rsps.add( + responses.GET, + f'{PROTOCOL}//archive.org/metadata/nasa', + body='{"metadata":{"title":"foo"}}', + ) + rsps.add( + responses.POST, + f'{PROTOCOL}//archive.org/metadata/nasa', + body=( + '{"success":true,"task_id":423444944,' + '"log":"https://catalogd.archive.org/log/423444944"}' + ), + ) r = modify_metadata('nasa', {'foo': 1}) assert r.status_code == 200 assert r.json() == { 'task_id': 423444944, 'success': True, - 'log': 'https://catalogd.archive.org/log/423444944' + 'log': 'https://catalogd.archive.org/log/423444944', } @@ -210,19 +217,25 @@ def test_upload(): 'authorization': 'LOW test_access:test_secret', } with IaRequestsMock(assert_all_requests_are_fired=False) as rsps: - rsps.add(responses.PUT, re.compile(r'.*s3.us.archive.org/.*'), - adding_headers=expected_s3_headers) + rsps.add( + responses.PUT, + re.compile(r'.*s3.us.archive.org/.*'), + adding_headers=expected_s3_headers, + ) rsps.add_metadata_mock('nasa') - rsps.add(responses.GET, f'{PROTOCOL}//archive.org/metadata/nasa', - body='{}') - _responses = upload('nasa', NASA_METADATA_PATH, - access_key='test_access', - secret_key='test_secret') + rsps.add(responses.GET, f'{PROTOCOL}//archive.org/metadata/nasa', body='{}') + _responses = upload( + 'nasa', + NASA_METADATA_PATH, + access_key='test_access', + secret_key='test_secret', + ) for response in _responses: req = response.request headers = {k.lower(): str(v) for k, v in req.headers.items()} scanner_header = '%20'.join( - response.headers['x-archive-meta00-scanner'].split('%20')[:4]) + response.headers['x-archive-meta00-scanner'].split('%20')[:4] + ) headers['x-archive-meta00-scanner'] = scanner_header assert 'user-agent' in headers del headers['accept'] @@ -235,10 +248,13 @@ def test_upload(): def test_upload_validate_identifier(): try: - upload('føø', NASA_METADATA_PATH, - access_key='test_access', - secret_key='test_secret', - validate_identifier=True) + upload( + 'føø', + NASA_METADATA_PATH, + access_key='test_access', + secret_key='test_secret', + validate_identifier=True, + ) raise AssertionError("Given invalid identifier was not correctly validated.") except Exception as exc: assert isinstance(exc, InvalidIdentifierException) @@ -252,24 +268,31 @@ def test_upload_validate_identifier(): 'authorization': 'LOW test_access:test_secret', } with IaRequestsMock(assert_all_requests_are_fired=False) as rsps: - rsps.add(responses.PUT, re.compile(r'.*s3.us.archive.org/.*'), - adding_headers=expected_s3_headers) + rsps.add( + responses.PUT, + re.compile(r'.*s3.us.archive.org/.*'), + adding_headers=expected_s3_headers, + ) rsps.add_metadata_mock('nasa') - rsps.add(responses.GET, f'{PROTOCOL}//archive.org/metadata/nasa', - body='{}') - upload('nasa', NASA_METADATA_PATH, - access_key='test_access', - secret_key='test_secret', - validate_identifier=True) + rsps.add(responses.GET, f'{PROTOCOL}//archive.org/metadata/nasa', body='{}') + upload( + 'nasa', + NASA_METADATA_PATH, + access_key='test_access', + secret_key='test_secret', + validate_identifier=True, + ) assert True def test_download(tmpdir): tmpdir.chdir() with IaRequestsMock() as rsps: - rsps.add(responses.GET, - f'{PROTOCOL}//archive.org/download/nasa/nasa_meta.xml', - body='test content') + rsps.add( + responses.GET, + f'{PROTOCOL}//archive.org/download/nasa/nasa_meta.xml', + body='test content', + ) rsps.add_metadata_mock('nasa') download('nasa', 'nasa_meta.xml') p = os.path.join(str(tmpdir), 'nasa') @@ -286,13 +309,19 @@ def test_search_items(session): p2 = p1.copy() p2['total_only'] = 'true' with IaRequestsMock(assert_all_requests_are_fired=False) as rsps: - rsps.add(responses.POST, url, - body=TEST_SCRAPE_RESPONSE, - match=[responses.matchers.query_param_matcher(p1)]) - rsps.add(responses.POST, url, - body='{"items":[],"count":0,"total":1}', - match=[responses.matchers.query_param_matcher(p2)], - content_type='application/json; charset=UTF-8') + rsps.add( + responses.POST, + url, + body=TEST_SCRAPE_RESPONSE, + match=[responses.matchers.query_param_matcher(p1)], + ) + rsps.add( + responses.POST, + url, + body='{"items":[],"count":0,"total":1}', + match=[responses.matchers.query_param_matcher(p2)], + content_type='application/json; charset=UTF-8', + ) r = search_items('identifier:nasa', archive_session=session) expected_results = [{'identifier': 'nasa'}] assert r.num_found == 1 @@ -306,9 +335,7 @@ def test_search_items(session): def test_search_items_with_fields(session): _j = json.loads(TEST_SCRAPE_RESPONSE) - _j['items'] = [ - {'identifier': 'nasa', 'title': 'NASA Images'} - ] + _j['items'] = [{'identifier': 'nasa', 'title': 'NASA Images'}] search_response_str = json.dumps(_j) url = f'{PROTOCOL}//archive.org/services/search/v1/scrape' p1 = { @@ -322,23 +349,32 @@ def test_search_items_with_fields(session): 'count': '10000', } with IaRequestsMock() as rsps: - rsps.add(responses.POST, url, - match=[responses.matchers.query_param_matcher(p1)], - body=search_response_str) - rsps.add(responses.POST, url, - body='{"items":[],"count":0,"total":1}', - match=[responses.matchers.query_param_matcher(p2)], - content_type='application/json; charset=UTF-8') - r = search_items('identifier:nasa', fields=['identifier', 'title'], - archive_session=session) + rsps.add( + responses.POST, + url, + match=[responses.matchers.query_param_matcher(p1)], + body=search_response_str, + ) + rsps.add( + responses.POST, + url, + body='{"items":[],"count":0,"total":1}', + match=[responses.matchers.query_param_matcher(p2)], + content_type='application/json; charset=UTF-8', + ) + r = search_items( + 'identifier:nasa', fields=['identifier', 'title'], archive_session=session + ) assert list(r) == [{'identifier': 'nasa', 'title': 'NASA Images'}] def test_search_items_as_items(session): with IaRequestsMock(assert_all_requests_are_fired=False) as rsps: - rsps.add(responses.POST, - f'{PROTOCOL}//archive.org/services/search/v1/scrape', - body=TEST_SCRAPE_RESPONSE) + rsps.add( + responses.POST, + f'{PROTOCOL}//archive.org/services/search/v1/scrape', + body=TEST_SCRAPE_RESPONSE, + ) rsps.add_metadata_mock('nasa') r = search_items('identifier:nasa', archive_session=session) assert [x.identifier for x in r.iter_as_items()] == ['nasa'] @@ -347,31 +383,28 @@ def test_search_items_as_items(session): def test_search_items_fts(session): with IaRequestsMock(assert_all_requests_are_fired=False) as rsps: - rsps.add(responses.POST, - f'{PROTOCOL}//be-api.us.archive.org/ia-pub-fts-api', - body=TEST_SCRAPE_RESPONSE) + rsps.add( + responses.POST, + f'{PROTOCOL}//be-api.us.archive.org/ia-pub-fts-api', + body=TEST_SCRAPE_RESPONSE, + ) rsps.add_metadata_mock('nasa') - r = search_items('nina simone', - full_text_search=True, - archive_session=session) + r = search_items('nina simone', full_text_search=True, archive_session=session) print(r.search_url) assert r.fts == True assert r.dsl_fts == False assert r.query == '!L nina simone' assert r.params == {'count': 10000, 'q': '!L nina simone'} - r = search_items('nina simone', - full_text_search=True, - dsl_fts=True, - archive_session=session) + r = search_items( + 'nina simone', full_text_search=True, dsl_fts=True, archive_session=session + ) assert r.fts == True assert r.dsl_fts == True assert r.query == 'nina simone' assert r.params == {'count': 10000, 'q': 'nina simone'} - r = search_items('nina simone', - dsl_fts=True, - archive_session=session) + r = search_items('nina simone', dsl_fts=True, archive_session=session) assert r.fts == True assert r.dsl_fts == True assert r.query == 'nina simone' @@ -384,14 +417,20 @@ def test_page_row_specification(session): _j['response']['numFound'] = 1 _search_r = json.dumps(_j) with IaRequestsMock(assert_all_requests_are_fired=False) as rsps: - rsps.add(responses.GET, f'{PROTOCOL}//archive.org/advancedsearch.php', - body=_search_r) + rsps.add( + responses.GET, f'{PROTOCOL}//archive.org/advancedsearch.php', body=_search_r + ) rsps.add_metadata_mock('nasa') - rsps.add(responses.POST, - f'{PROTOCOL}//archive.org/services/search/v1/scrape', - body='{"items":[],"count":0,"total":1}', - content_type='application/json; charset=UTF-8') - r = search_items('identifier:nasa', params={'page': '1', 'rows': '1'}, - archive_session=session) + rsps.add( + responses.POST, + f'{PROTOCOL}//archive.org/services/search/v1/scrape', + body='{"items":[],"count":0,"total":1}', + content_type='application/json; charset=UTF-8', + ) + r = search_items( + 'identifier:nasa', + params={'page': '1', 'rows': '1'}, + archive_session=session, + ) assert r.iter_as_items().search == r assert len(r.iter_as_items()) == 1 diff --git a/tests/test_config.py b/tests/test_config.py index f798bf08..e2947c5c 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -30,8 +30,9 @@ def test_get_auth_config(): "screenname":"jakej" }, "version": 1}""" - responses.add(responses.POST, 'https://archive.org/services/xauthn/', - body=test_body) + responses.add( + responses.POST, 'https://archive.org/services/xauthn/', body=test_body + ) r = internetarchive.config.get_auth_config('test@example.com', 'password1') assert r['s3']['access'] == 'Ac3ssK3y' assert r['s3']['secret'] == 'S3cretK3y' @@ -42,14 +43,18 @@ def test_get_auth_config(): @responses.activate def test_get_auth_config_auth_fail(): # No logged-in-sig cookie set raises AuthenticationError. - responses.add(responses.POST, 'https://archive.org/services/xauthn/', - body='{"error": "failed"}') + responses.add( + responses.POST, + 'https://archive.org/services/xauthn/', + body='{"error": "failed"}', + ) try: r = internetarchive.config.get_auth_config('test@example.com', 'password1') except AuthenticationError as exc: return - assert str(exc) == ('Authentication failed. Please check your credentials ' - 'and try again.') + assert str(exc) == ( + 'Authentication failed. Please check your credentials and try again.' + ) def test_get_config(): @@ -58,19 +63,22 @@ def test_get_config(): def test_get_config_with_config_file(tmpdir): - test_conf = ('[s3]\n' - 'access = test-access\n' - 'secret = test-secret\n' - '[cookies]\n' - 'logged-in-sig = test-sig\n' - 'logged-in-user = test@archive.org\n') + test_conf = ( + '[s3]\n' + 'access = test-access\n' + 'secret = test-secret\n' + '[cookies]\n' + 'logged-in-sig = test-sig\n' + 'logged-in-user = test@archive.org\n' + ) tmpdir.chdir() with open('ia_test.ini', 'w') as fp: fp.write(test_conf) - config = internetarchive.config.get_config(config_file='ia_test.ini', - config={'custom': 'test'}) + config = internetarchive.config.get_config( + config_file='ia_test.ini', config={'custom': 'test'} + ) assert config['cookies']['logged-in-sig'] == 'test-sig' assert config['cookies']['logged-in-user'] == 'test@archive.org' assert config['s3']['access'] == 'test-access' @@ -125,12 +133,14 @@ def test_get_config_home_not_set_with_config(): def test_get_config_config_and_config_file(tmpdir): - test_conf = ('[s3]\n' - 'access = test-access\n' - 'secret = test-secret\n' - '[cookies]\n' - 'logged-in-sig = test-sig\n' - 'logged-in-user = test@archive.org\n') + test_conf = ( + '[s3]\n' + 'access = test-access\n' + 'secret = test-secret\n' + '[cookies]\n' + 'logged-in-sig = test-sig\n' + 'logged-in-user = test@archive.org\n' + ) tmpdir.chdir() @@ -148,8 +158,9 @@ def test_get_config_config_and_config_file(tmpdir): }, } del test_conf['s3']['access'] - config = internetarchive.config.get_config(config_file='ia_test.ini', - config=test_conf) + config = internetarchive.config.get_config( + config_file='ia_test.ini', config=test_conf + ) assert config['cookies']['logged-in-sig'] == 'test-sig' assert config['cookies']['logged-in-user'] == 'test@archive.org' assert config['s3']['access'] == 'test-access' @@ -175,12 +186,13 @@ def _environ(**kwargs): def _test_parse_config_file( - expected_result, - config_file_contents='', - config_file_paths=None, - home=None, - xdg_config_home=None, - config_file_param=None): + expected_result, + config_file_contents='', + config_file_paths=None, + home=None, + xdg_config_home=None, + config_file_param=None, +): # expected_result: (config_file_path, is_xdg); config isn't compared. # config_file_contents: str # config_file_paths: list of filenames to write config_file_contents to @@ -193,6 +205,7 @@ def _test_parse_config_file( config_file_paths = [] with tempfile.TemporaryDirectory() as tmp_test_dir: + def _replace_path(s): if s and s.startswith('$TMPTESTDIR/'): return os.path.join(tmp_test_dir, s.split('/', 1)[1]) @@ -216,7 +229,8 @@ def _replace_path(s): env['XDG_CONFIG_HOME'] = xdg_config_home with _environ(**env): config_file_path, is_xdg, config = internetarchive.config.parse_config_file( - config_file=config_file_param) + config_file=config_file_param + ) assert (config_file_path, is_xdg) == expected_result[0:2] @@ -254,7 +268,7 @@ def test_parse_config_file_existing_all(): config_file_paths=[ '$TMPTESTDIR/.config/internetarchive/ia.ini', '$TMPTESTDIR/.config/ia.ini', - '$TMPTESTDIR/.ia' + '$TMPTESTDIR/.ia', ], ) @@ -288,7 +302,7 @@ def test_parse_config_file_direct_path_overrides_existing_files(): config_file_paths=[ '$TMPTESTDIR/.config/internetarchive/ia.ini', '$TMPTESTDIR/.config/ia.ini', - '$TMPTESTDIR/.ia' + '$TMPTESTDIR/.ia', ], config_file_param='/path/to/ia.ini', ) @@ -310,11 +324,12 @@ def test_parse_config_file_with_environment_variable_and_parameter(): def _test_write_config_file( - expected_config_file, - expected_modes, - dirs=None, - create_expected_file=False, - config_file_param=None): + expected_config_file, + expected_modes, + dirs=None, + create_expected_file=False, + config_file_param=None, +): # expected_config_file: str # expected_modes: list of (path, mode) tuples # dirs: list of str, directories to create before running write_config_file @@ -328,7 +343,9 @@ def _test_write_config_file( expected_config_file = os.path.join(temp_home_dir, expected_config_file) if dirs: dirs = [os.path.join(temp_home_dir, d) for d in dirs] - expected_modes = [(os.path.join(temp_home_dir, p), m) for p, m in expected_modes] + expected_modes = [ + (os.path.join(temp_home_dir, p), m) for p, m in expected_modes + ] if config_file_param: config_file_param = os.path.join(temp_home_dir, config_file_param) with _environ(HOME=temp_home_dir): @@ -342,7 +359,9 @@ def _test_write_config_file( if create_expected_file: with open(expected_config_file, 'w') as fp: os.chmod(expected_config_file, 0o777) - config_file = internetarchive.config.write_config_file({}, config_file_param) + config_file = internetarchive.config.write_config_file( + {}, config_file_param + ) assert config_file == expected_config_file assert os.path.isfile(config_file) for path, mode in expected_modes: diff --git a/tests/test_item.py b/tests/test_item.py index 67bece6b..c3c1e598 100644 --- a/tests/test_item.py +++ b/tests/test_item.py @@ -66,12 +66,14 @@ def test_get_files(nasa_item): files = nasa_item.get_files() assert isinstance(files, types.GeneratorType) - expected_files = {'NASAarchiveLogo.jpg', - 'globe_west_540.jpg', - 'nasa_reviews.xml', - 'nasa_meta.xml', - 'nasa_archive.torrent', - 'nasa_files.xml'} + expected_files = { + 'NASAarchiveLogo.jpg', + 'globe_west_540.jpg', + 'nasa_reviews.xml', + 'nasa_meta.xml', + 'nasa_archive.torrent', + 'nasa_files.xml', + } files = {x.name for x in files} assert files == expected_files @@ -89,22 +91,28 @@ def test_get_files_by_formats(nasa_item): expected_files = {'nasa_archive.torrent'} assert files == expected_files - files = {f.name for f in nasa_item.get_files(formats=['Archive BitTorrent', 'JPEG'])} + files = { + f.name for f in nasa_item.get_files(formats=['Archive BitTorrent', 'JPEG']) + } expected_files = {'nasa_archive.torrent', 'globe_west_540.jpg'} assert files == expected_files def test_get_files_by_glob(nasa_item): files = {f.name for f in nasa_item.get_files(glob_pattern='*jpg|*torrent')} - expected_files = {'NASAarchiveLogo.jpg', - 'globe_west_540.jpg', - 'nasa_archive.torrent'} + expected_files = { + 'NASAarchiveLogo.jpg', + 'globe_west_540.jpg', + 'nasa_archive.torrent', + } assert files == expected_files files = {f.name for f in nasa_item.get_files(glob_pattern=['*jpg', '*torrent'])} - expected_files = {'NASAarchiveLogo.jpg', - 'globe_west_540.jpg', - 'nasa_archive.torrent'} + expected_files = { + 'NASAarchiveLogo.jpg', + 'globe_west_540.jpg', + 'nasa_archive.torrent', + } assert files == expected_files @@ -130,10 +138,12 @@ def test_get_files_by_glob_with_exclude(nasa_item): def test_get_files_with_multiple_filters(nasa_item): files = {f.name for f in nasa_item.get_files(formats='JPEG', glob_pattern='*xml')} - expected_files = {'globe_west_540.jpg', - 'nasa_reviews.xml', - 'nasa_meta.xml', - 'nasa_files.xml'} + expected_files = { + 'globe_west_540.jpg', + 'nasa_reviews.xml', + 'nasa_meta.xml', + 'nasa_files.xml', + } assert files == expected_files @@ -173,14 +183,11 @@ def test_download_ignore_errors(tmpdir, nasa_item): def test_download_ignore_existing(tmpdir, nasa_item): tmpdir.chdir() - with IaRequestsMock( - assert_all_requests_are_fired=False) as rsps: - rsps.add(responses.GET, DOWNLOAD_URL_RE, - body='test content') + with IaRequestsMock(assert_all_requests_are_fired=False) as rsps: + rsps.add(responses.GET, DOWNLOAD_URL_RE, body='test content') nasa_item.download(files='nasa_meta.xml', ignore_existing=True) - rsps.add(responses.GET, DOWNLOAD_URL_RE, - body='new test content') + rsps.add(responses.GET, DOWNLOAD_URL_RE, body='new test content') nasa_item.download(files='nasa_meta.xml', ignore_existing=True) with open('nasa/nasa_meta.xml') as fh: assert fh.read() == 'test content' @@ -215,8 +222,9 @@ def test_download_checksum(tmpdir, caplog): # test no overwrite based on checksum. rsps.reset() - rsps.add(responses.GET, DOWNLOAD_URL_RE, - body=load_test_data_file('nasa_meta.xml')) + rsps.add( + responses.GET, DOWNLOAD_URL_RE, body=load_test_data_file('nasa_meta.xml') + ) nasa_item.download(files='nasa_meta.xml', checksum=True) nasa_item.download(files='nasa_meta.xml', checksum=True) @@ -248,9 +256,12 @@ def test_download_no_directory(tmpdir, nasa_item): def test_download_dry_run(tmpdir, capsys, nasa_item): tmpdir.chdir() with IaRequestsMock(assert_all_requests_are_fired=False) as rsps: - rsps.add(responses.GET, DOWNLOAD_URL_RE, - body='no dest dir', - adding_headers={'content-length': '100'}) + rsps.add( + responses.GET, + DOWNLOAD_URL_RE, + body='no dest dir', + adding_headers={'content-length': '100'}, + ) nasa_item.download(formats='Metadata', dry_run=True) expected = {'nasa_reviews.xml', 'nasa_meta.xml', 'nasa_files.xml'} @@ -262,9 +273,12 @@ def test_download_dry_run(tmpdir, capsys, nasa_item): def test_download_dry_run_on_the_fly_formats(tmpdir, capsys, nasa_item): tmpdir.chdir() with IaRequestsMock(assert_all_requests_are_fired=False) as rsps: - rsps.add(responses.GET, DOWNLOAD_URL_RE, - body='no dest dir', - adding_headers={'content-length': '100'}) + rsps.add( + responses.GET, + DOWNLOAD_URL_RE, + body='no dest dir', + adding_headers={'content-length': '100'}, + ) nasa_item.download(formats='MARCXML', on_the_fly=True, dry_run=True) expected = {'nasa_archive_marc.xml'} @@ -276,9 +290,12 @@ def test_download_dry_run_on_the_fly_formats(tmpdir, capsys, nasa_item): def test_download_verbose(tmpdir, capsys, nasa_item): tmpdir.chdir() with IaRequestsMock(assert_all_requests_are_fired=False) as rsps: - rsps.add(responses.GET, DOWNLOAD_URL_RE, - body='no dest dir', - adding_headers={'content-length': '100'}) + rsps.add( + responses.GET, + DOWNLOAD_URL_RE, + body='no dest dir', + adding_headers={'content-length': '100'}, + ) nasa_item.download(files='nasa_meta.xml', verbose=True) out, err = capsys.readouterr() print(repr(err)) @@ -291,14 +308,20 @@ def test_download_dark_item(tmpdir, capsys, nasa_metadata, session): nasa_metadata['metadata']['identifier'] = 'dark-item' nasa_metadata['is_dark'] = True _item_metadata = json.dumps(nasa_metadata) - rsps.add(responses.GET, f'{PROTOCOL}//archive.org/metadata/dark-item', - body=_item_metadata, - content_type='application/json') + rsps.add( + responses.GET, + f'{PROTOCOL}//archive.org/metadata/dark-item', + body=_item_metadata, + content_type='application/json', + ) _item = session.get_item('dark-item') - rsps.add(responses.GET, DOWNLOAD_URL_RE, - body='no dest dir', - status=403, - adding_headers={'content-length': '100'}) + rsps.add( + responses.GET, + DOWNLOAD_URL_RE, + body='no dest dir', + status=403, + adding_headers={'content-length': '100'}, + ) _item.download(files='nasa_meta.xml', verbose=True) out, err = capsys.readouterr() assert 'skipping dark-item, item is dark' in err @@ -306,16 +329,16 @@ def test_download_dark_item(tmpdir, capsys, nasa_metadata, session): def test_upload(nasa_item): with IaRequestsMock(assert_all_requests_are_fired=False) as rsps: - rsps.add(responses.PUT, S3_URL_RE, - adding_headers=EXPECTED_S3_HEADERS) - _responses = nasa_item.upload(NASA_METADATA_PATH, - access_key='a', - secret_key='b') + rsps.add(responses.PUT, S3_URL_RE, adding_headers=EXPECTED_S3_HEADERS) + _responses = nasa_item.upload( + NASA_METADATA_PATH, access_key='a', secret_key='b' + ) for resp in _responses: request = resp.request headers = {k.lower(): str(v) for k, v in request.headers.items()} scanner_header = '%20'.join( - resp.headers['x-archive-meta00-scanner'].split('%20')[:4]) + resp.headers['x-archive-meta00-scanner'].split('%20')[:4] + ) headers['x-archive-meta00-scanner'] = scanner_header assert 'user-agent' in headers del headers['user-agent'] @@ -326,25 +349,26 @@ def test_upload(nasa_item): def test_upload_validate_identifier(session): item = session.get_item('føø') with IaRequestsMock(assert_all_requests_are_fired=False) as rsps: - rsps.add(responses.PUT, S3_URL_RE, - adding_headers=EXPECTED_S3_HEADERS) + rsps.add(responses.PUT, S3_URL_RE, adding_headers=EXPECTED_S3_HEADERS) try: - item.upload(NASA_METADATA_PATH, - access_key='a', - secret_key='b', - validate_identifier=True) - raise AssertionError("Given invalid identifier was not correctly validated.") + item.upload( + NASA_METADATA_PATH, + access_key='a', + secret_key='b', + validate_identifier=True, + ) + raise AssertionError( + "Given invalid identifier was not correctly validated." + ) except Exception as exc: assert isinstance(exc, InvalidIdentifierException) valid_item = session.get_item('foo') with IaRequestsMock(assert_all_requests_are_fired=False) as rsps: - rsps.add(responses.PUT, S3_URL_RE, - adding_headers=EXPECTED_S3_HEADERS) - valid_item.upload(NASA_METADATA_PATH, - access_key='a', - secret_key='b', - validate_identifier=True) + rsps.add(responses.PUT, S3_URL_RE, adding_headers=EXPECTED_S3_HEADERS) + valid_item.upload( + NASA_METADATA_PATH, access_key='a', secret_key='b', validate_identifier=True + ) assert True @@ -370,24 +394,26 @@ def test_upload_metadata(nasa_item): _expected_headers['x-archive-meta00-baz'] = ( 'uri(%D0%9F%D0%BE%D1%87%D0%B5%D0%BC' '%D1%83%20%D0%B1%D1%8B%20%D0%B8%20%' - 'D0%BD%D0%B5%D1%82...)') + 'D0%BD%D0%B5%D1%82...)' + ) _expected_headers['x-archive-meta00-baz2'] = ( 'uri(%D0%9F%D0%BE%D1%87%D0%B5%D0%BC' '%D1%83%20%D0%B1%D1%8B%20%D0%B8%20%' - 'D0%BD%D0%B5%D1%82...)') - rsps.add(responses.PUT, S3_URL_RE, - adding_headers=_expected_headers) + 'D0%BD%D0%B5%D1%82...)' + ) + rsps.add(responses.PUT, S3_URL_RE, adding_headers=_expected_headers) md = { 'foo': 'bar', 'subject': ['first', 'second'], 'baz': 'Почему бы и нет...', - 'baz2': ('\u041f\u043e\u0447\u0435\u043c\u0443 \u0431\u044b \u0438 ' - '\u043d\u0435\u0442...'), + 'baz2': ( + '\u041f\u043e\u0447\u0435\u043c\u0443 \u0431\u044b \u0438 ' + '\u043d\u0435\u0442...' + ), } - _responses = nasa_item.upload(NASA_METADATA_PATH, - metadata=md, - access_key='a', - secret_key='b') + _responses = nasa_item.upload( + NASA_METADATA_PATH, metadata=md, access_key='a', secret_key='b' + ) for resp in _responses: request = resp.request del request.headers['x-archive-meta00-scanner'] @@ -398,26 +424,32 @@ def test_upload_metadata(nasa_item): def test_upload_503(capsys, nasa_item): - body = ("" - 'SlowDownPlease reduce your request rate.' - 'simulated error caused by x-(amz|archive)-simulate-error' - ', try x-archive-simulate-error:helpd36ec445-8d4a-4a64-' - 'a110-f67af6ee2c2a') + body = ( + "" + 'SlowDownPlease reduce your request rate.' + 'simulated error caused by x-(amz|archive)-simulate-error' + ', try x-archive-simulate-error:helpd36ec445-8d4a-4a64-' + 'a110-f67af6ee2c2a' + ) with IaRequestsMock(assert_all_requests_are_fired=False) as rsps: _expected_headers = deepcopy(EXPECTED_S3_HEADERS) - rsps.add(responses.GET, S3_URL_RE, - body='{"over_limit": "1"}') - rsps.add(responses.PUT, S3_URL_RE, - body=body, - adding_headers=_expected_headers, - status=503) + rsps.add(responses.GET, S3_URL_RE, body='{"over_limit": "1"}') + rsps.add( + responses.PUT, + S3_URL_RE, + body=body, + adding_headers=_expected_headers, + status=503, + ) try: - nasa_item.upload(NASA_METADATA_PATH, - access_key='a', - secret_key='b', - retries=1, - retries_sleep=.1, - verbose=True) + nasa_item.upload( + NASA_METADATA_PATH, + access_key='a', + secret_key='b', + retries=1, + retries_sleep=0.1, + verbose=True, + ) except Exception as exc: assert 'Please reduce your request rate' in str(exc) out, err = capsys.readouterr() @@ -439,8 +471,7 @@ def test_upload_file_keys(nasa_item): def test_upload_dir(tmpdir, nasa_item): with IaRequestsMock(assert_all_requests_are_fired=False) as rsps: - rsps.add(responses.PUT, S3_URL_RE, - adding_headers=EXPECTED_S3_HEADERS) + rsps.add(responses.PUT, S3_URL_RE, adding_headers=EXPECTED_S3_HEADERS) tmpdir.mkdir('dir_test') with open(os.path.join(str(tmpdir), 'dir_test', 'foo.txt'), 'w') as fh: @@ -449,9 +480,9 @@ def test_upload_dir(tmpdir, nasa_item): fh.write('hi 2') # Test no-slash upload, dir is not in key name. - _responses = nasa_item.upload(os.path.join(str(tmpdir), 'dir_test') + '/', - access_key='a', - secret_key='b') + _responses = nasa_item.upload( + os.path.join(str(tmpdir), 'dir_test') + '/', access_key='a', secret_key='b' + ) expected_eps = [ f'{S3_URL}nasa/foo.txt', f'{S3_URL}nasa/foo2.txt', @@ -460,9 +491,9 @@ def test_upload_dir(tmpdir, nasa_item): assert resp.request.url in expected_eps # Test slash upload, dir is in key name. - _responses = nasa_item.upload(os.path.join(str(tmpdir), 'dir_test'), - access_key='a', - secret_key='b') + _responses = nasa_item.upload( + os.path.join(str(tmpdir), 'dir_test'), access_key='a', secret_key='b' + ) tmp_path = norm_filepath(str(tmpdir)) expected_eps = [ f'{S3_URL}nasa{tmp_path}/dir_test/foo.txt', @@ -478,7 +509,9 @@ def test_upload_queue_derive(nasa_item): _expected_headers['x-archive-queue-derive'] = '1' del _expected_headers['x-archive-meta00-scanner'] rsps.add(responses.PUT, S3_URL_RE, adding_headers=_expected_headers) - _responses = nasa_item.upload(NASA_METADATA_PATH, access_key='a', secret_key='b') + _responses = nasa_item.upload( + NASA_METADATA_PATH, access_key='a', secret_key='b' + ) for resp in _responses: headers = {k.lower(): str(v) for k, v in resp.request.headers.items()} del headers['x-archive-meta00-scanner'] @@ -488,12 +521,14 @@ def test_upload_queue_derive(nasa_item): def test_upload_delete(tmpdir, nasa_item): - body = ("" - 'BadDigestThe Content-MD5 you specified did not ' - 'match what we received.content-md5 submitted with PUT: ' - 'foo != received data md5: 70871f9fce8dd23853d6e42417356b05also not equal to ' - 'base64 version: cIcfn86N0jhT1uQkFzVrBQ==ec03fe7c-e123-' - '4133-a207-3141d4d74096') + body = ( + "" + 'BadDigestThe Content-MD5 you specified did not ' + 'match what we received.content-md5 submitted with PUT: ' + 'foo != received data md5: 70871f9fce8dd23853d6e42417356b05also not equal to ' + 'base64 version: cIcfn86N0jhT1uQkFzVrBQ==ec03fe7c-e123-' + '4133-a207-3141d4d74096' + ) _expected_headers = deepcopy(EXPECTED_S3_HEADERS) del _expected_headers['x-archive-meta00-scanner'] @@ -504,16 +539,21 @@ def test_upload_delete(tmpdir, nasa_item): with IaRequestsMock(assert_all_requests_are_fired=False) as rsps: # Non-matching md5, should not delete. - rsps.add(responses.PUT, S3_URL_RE, - body=body, - adding_headers=_expected_headers, - status=400) + rsps.add( + responses.PUT, + S3_URL_RE, + body=body, + adding_headers=_expected_headers, + status=400, + ) with pytest.raises(HTTPError): - nasa_item.upload(test_file, - access_key='a', - secret_key='b', - delete=True, - queue_derive=True) + nasa_item.upload( + test_file, + access_key='a', + secret_key='b', + delete=True, + queue_derive=True, + ) assert len(tmpdir.listdir()) == 1 @@ -524,13 +564,10 @@ def test_upload_delete(tmpdir, nasa_item): # Matching md5, should delete. with IaRequestsMock(assert_all_requests_are_fired=False) as rsps: - rsps.add(responses.PUT, S3_URL_RE, - adding_headers=_expected_headers) - resp = nasa_item.upload(test_file, - access_key='a', - secret_key='b', - delete=True, - queue_derive=True) + rsps.add(responses.PUT, S3_URL_RE, adding_headers=_expected_headers) + resp = nasa_item.upload( + test_file, access_key='a', secret_key='b', delete=True, queue_derive=True + ) for r in resp: headers = {k.lower(): str(v) for k, v in r.headers.items()} del headers['content-type'] @@ -553,10 +590,9 @@ def test_upload_checksum(tmpdir, nasa_item): # No skip. rsps.add(responses.PUT, S3_URL_RE, adding_headers=_expected_headers) - resp = nasa_item.upload(test_file, - access_key='a', - secret_key='b', - checksum=True) + resp = nasa_item.upload( + test_file, access_key='a', secret_key='b', checksum=True + ) for r in resp: headers = {k.lower(): str(v) for k, v in r.headers.items()} del headers['content-type'] @@ -565,11 +601,11 @@ def test_upload_checksum(tmpdir, nasa_item): # Skip. nasa_item.item_metadata['files'].append( - {'name': 'checksum_test.txt', 'md5': '33213e7683c1e6d15b2a658f3c567717'}) - resp = nasa_item.upload(test_file, - access_key='a', - secret_key='b', - checksum=True) + {'name': 'checksum_test.txt', 'md5': '33213e7683c1e6d15b2a658f3c567717'} + ) + resp = nasa_item.upload( + test_file, access_key='a', secret_key='b', checksum=True + ) for r in resp: headers = {k.lower(): str(v) for k, v in r.headers.items()} assert r.status_code is None @@ -582,9 +618,11 @@ def test_modify_metadata(nasa_item, nasa_metadata): # Test simple add. md = {'foo': 'bar'} p = nasa_item.modify_metadata(md, debug=True) - _patch = json.dumps([ - {'add': '/foo', 'value': 'bar'}, - ]) + _patch = json.dumps( + [ + {'add': '/foo', 'value': 'bar'}, + ] + ) expected_data = { 'priority': -5, '-target': 'metadata', @@ -605,7 +643,7 @@ def test_modify_metadata(nasa_item, nasa_metadata): expected_data = { 'priority': -5, '-target': 'metadata', - '-patch': json.dumps([{'remove': '/title'}]) + '-patch': json.dumps([{'remove': '/title'}]), } assert set(p.data.keys()) == set(expected_data.keys()) assert p.data['priority'] == expected_data['priority'] @@ -619,13 +657,16 @@ def test_modify_metadata(nasa_item, nasa_metadata): expected_data = { 'priority': -1, '-target': 'metadata', - '-patch': json.dumps([{'add': '/subject', 'value': ['one', 'two', 'last']}]) + '-patch': json.dumps( + [{'add': '/subject', 'value': ['one', 'two', 'last']}] + ), } assert set(p.data.keys()) == set(expected_data.keys()) assert p.data['priority'] == expected_data['priority'] assert p.data['-target'] == expected_data['-target'] - assert '["one", "two", "last"]' in str(p.data['-patch']) \ - or '["one","two","last"]' in str(p.data['-patch']) + assert '["one", "two", "last"]' in str( + p.data['-patch'] + ) or '["one","two","last"]' in str(p.data['-patch']) # Test indexed mod. nasa_item.item_metadata['metadata']['subject'] = ['first', 'middle', 'last'] @@ -634,7 +675,7 @@ def test_modify_metadata(nasa_item, nasa_metadata): expected_data = { 'priority': -5, '-target': 'metadata', - '-patch': json.dumps([{'value': 'new first', 'replace': '/subject/2'}]) + '-patch': json.dumps([{'value': 'new first', 'replace': '/subject/2'}]), } # Avoid comparing the json strings, because they are not in a canonical form @@ -650,10 +691,7 @@ def test_modify_metadata(nasa_item, nasa_metadata): # Test auth. md = {'title': 'NASA Images'} - p = nasa_item.modify_metadata(md, - access_key='a', - secret_key='b', - debug=True) + p = nasa_item.modify_metadata(md, access_key='a', secret_key='b', debug=True) assert 'access=a' in p.body assert 'secret=b' in p.body @@ -661,9 +699,9 @@ def test_modify_metadata(nasa_item, nasa_metadata): md = {'title': 'new title'} nasa_metadata['metadata']['title'] = 'new title' _item_metadata = json.dumps(nasa_metadata) - rsps.add(responses.GET, f'{PROTOCOL}//archive.org/metadata/nasa', body=_item_metadata) - nasa_item.modify_metadata(md, - access_key='a', - secret_key='b') + rsps.add( + responses.GET, f'{PROTOCOL}//archive.org/metadata/nasa', body=_item_metadata + ) + nasa_item.modify_metadata(md, access_key='a', secret_key='b') # Test that item re-initializes assert nasa_item.metadata['title'] == 'new title' diff --git a/tests/test_session.py b/tests/test_session.py index 5574b94b..0fd50554 100644 --- a/tests/test_session.py +++ b/tests/test_session.py @@ -44,9 +44,12 @@ def test_get_item(tmpdir): item_metadata = fh.read().strip() with responses.RequestsMock() as rsps: - rsps.add(responses.GET, f'{PROTOCOL}//archive.org/metadata/nasa', - body=item_metadata, - content_type='application/json') + rsps.add( + responses.GET, + f'{PROTOCOL}//archive.org/metadata/nasa', + body=item_metadata, + content_type='application/json', + ) s = internetarchive.session.ArchiveSession() item = s.get_item('nasa') @@ -54,10 +57,13 @@ def test_get_item(tmpdir): assert item.identifier == 'nasa' with responses.RequestsMock() as rsps: - rsps.add(responses.GET, f'{PROTOCOL}//archive.org/metadata/nasa', - body=item_metadata, - status=400, - content_type='application/json') + rsps.add( + responses.GET, + f'{PROTOCOL}//archive.org/metadata/nasa', + body=item_metadata, + status=400, + content_type='application/json', + ) s = internetarchive.session.ArchiveSession(CONFIG) try: @@ -86,9 +92,12 @@ def test_s3_is_overloaded(): }""" with IaRequestsMock() as rsps: - rsps.add(responses.GET, f'{PROTOCOL}//s3.us.archive.org', - body=test_body, - content_type='application/json') + rsps.add( + responses.GET, + f'{PROTOCOL}//s3.us.archive.org', + body=test_body, + content_type='application/json', + ) s = internetarchive.session.ArchiveSession(CONFIG) r = s.s3_is_overloaded('nasa') assert r is False @@ -111,9 +120,12 @@ def test_s3_is_overloaded(): }""" with responses.RequestsMock() as rsps: - rsps.add(responses.GET, f'{PROTOCOL}//s3.us.archive.org', - body=test_body, - content_type='application/json') + rsps.add( + responses.GET, + f'{PROTOCOL}//s3.us.archive.org', + body=test_body, + content_type='application/json', + ) s = internetarchive.session.ArchiveSession(CONFIG) r = s.s3_is_overloaded('nasa') assert r is True diff --git a/tests/test_utils.py b/tests/test_utils.py index 298305ff..e7118fb7 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -14,8 +14,10 @@ def test_utils(): def test_needs_quote(): - notascii = ('ȧƈƈḗƞŧḗḓ ŧḗẋŧ ƒǿř ŧḗşŧīƞɠ, ℛℯα∂α♭ℓℯ ♭ʊ☂ η☺т Ѧ$☾ℐℐ, ' - '¡ooʇ ןnɟǝsn sı uʍop-ǝpısdn') + notascii = ( + 'ȧƈƈḗƞŧḗḓ ŧḗẋŧ ƒǿř ŧḗşŧīƞɠ, ℛℯα∂α♭ℓℯ ♭ʊ☂ η☺т Ѧ$☾ℐℐ, ' + '¡ooʇ ןnɟǝsn sı uʍop-ǝpısdn' + ) assert internetarchive.utils.needs_quote(notascii) assert internetarchive.utils.needs_quote(string.whitespace) assert not internetarchive.utils.needs_quote(string.ascii_letters + string.digits) @@ -53,22 +55,29 @@ def test_IdentifierListAsItems(session): def test_IdentifierListAsItems_len(session): - assert len(internetarchive.utils.IdentifierListAsItems(['foo', 'bar'], session)) == 2 + assert ( + len(internetarchive.utils.IdentifierListAsItems(['foo', 'bar'], session)) == 2 + ) + # TODO: Add test of slice access to IdenfierListAsItems def test_get_s3_xml_text(): - xml_str = ('NoSuchBucket' - 'The specified bucket does not exist.' - '' - 'does-not-exist-! not found by Metadata::get_obj()[server]' - '' - 'd56bdc63-169b-4b4f-8c47-0fac6de39040') + xml_str = ( + 'NoSuchBucket' + 'The specified bucket does not exist.' + '' + 'does-not-exist-! not found by Metadata::get_obj()[server]' + '' + 'd56bdc63-169b-4b4f-8c47-0fac6de39040' + ) expected_txt = internetarchive.utils.get_s3_xml_text(xml_str) - assert expected_txt == ('The specified bucket does not exist. - does-not-exist-! ' - 'not found by Metadata::get_obj()[server]') + assert expected_txt == ( + 'The specified bucket does not exist. - does-not-exist-! ' + 'not found by Metadata::get_obj()[server]' + ) def test_get_file_size(): @@ -85,10 +94,23 @@ def test_is_valid_metadata_key(): # Keys starting with "xml" should also be invalid # due to the XML specification, but are supported # by the Internet Archive. - valid = ('adaptive_ocr', 'bookreader-defaults', 'frames_per_second', - 'identifier', 'possible-copyright-status', 'index[0]') - invalid = ('Analog Format', "Date of transfer (probably today's date)", - '_metadata_key', '58', '_', '', 'a') + valid = ( + 'adaptive_ocr', + 'bookreader-defaults', + 'frames_per_second', + 'identifier', + 'possible-copyright-status', + 'index[0]', + ) + invalid = ( + 'Analog Format', + "Date of transfer (probably today's date)", + '_metadata_key', + '58', + '_', + '', + 'a', + ) for metadata_key in valid: assert internetarchive.utils.is_valid_metadata_key(metadata_key)