diff --git a/pyproject.toml b/pyproject.toml index b61a95e..1ab4871 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,11 +31,11 @@ requires-python = ">=3.9" [project.optional-dependencies] dev = [ "poethepoet ~= 0.34.0", - "pytest ~=7.4.4", - "pytest-benchmark", - "flit ~= 3.9.0", - "ruff ~= 0.11.6", - "coverage ~= 7.4.0", + "pytest ~=8.3.5", + "pytest-benchmark ~= 5.1.0", + "flit ~= 3.12.0", + "ruff ~= 0.11.10", + "coverage ~= 7.8.0", ] [build-system] @@ -48,8 +48,11 @@ omit = [ ] [tool.coverage.report] -fail_under=94 +fail_under=99.5 precision=1 +exclude_also = [ + "@abstractmethod", +] [tool.flit.sdist] exclude = [".github", "tests"] diff --git a/rispy/parser.py b/rispy/parser.py index 3c159e2..8417de2 100644 --- a/rispy/parser.py +++ b/rispy/parser.py @@ -199,8 +199,6 @@ def _add_list_value(self, record: dict, name: str, value: Union[str, list[str]]) except KeyError: record[name] = value_list except AttributeError: - if not isinstance(record[name], str): - raise must_exist = record[name] record[name] = [must_exist, *value_list] @@ -267,7 +265,7 @@ def load( *, encoding: Optional[str] = None, newline: Optional[str] = None, - implementation: Optional[RisParser] = None, + implementation: type[RisParser] = RisParser, **kw, ) -> list[dict]: """Load a RIS file and return a list of entries. @@ -279,35 +277,28 @@ def load( of strings. Args: - file (Union[TextIO, Path]): File handle to read ris formatted data. + file (Union[TextIO, Path]): File handle of RIS data. encoding(str, optional): File encoding, only used when a Path is supplied. Consistent with the python standard library, if `None` is supplied, the default system encoding is used. newline(str, optional): File line separator. - implementation (RisImplementation): RIS implementation; base by - default. + implementation (RisParser): RIS implementation; RisParser by default. Returns: list: Returns list of RIS entries. """ - if implementation is None: - parser = RisParser - else: - parser = implementation - - if hasattr(file, "readline"): - return parser(newline=newline, **kw).parse_lines(file) - elif hasattr(file, "open"): + if isinstance(file, Path): with file.open(mode="r", newline=newline, encoding=encoding) as f: - return parser(**kw).parse_lines(f) + return implementation(**kw).parse_lines(f) + if hasattr(file, "readline"): + return implementation(newline=newline, **kw).parse_lines(file) elif hasattr(file, "read"): return loads(file.read(), implementation=implementation, newline=newline, **kw) - else: - raise ValueError("File must be a file-like object or a Path object") + raise ValueError("File must be a file-like object or a Path object") -def loads(text: str, *, implementation: Optional[type[RisParser]] = None, **kw) -> list[dict]: +def loads(text: str, *, implementation: type[RisParser] = RisParser, **kw) -> list[dict]: """Load a RIS file and return a list of entries. Entries are codified as dictionaries whose keys are the @@ -317,16 +308,10 @@ def loads(text: str, *, implementation: Optional[type[RisParser]] = None, **kw) of strings. Args: - text (str): A string version of an RIS file. - implementation (RisImplementation): RIS implementation; base by - default. + text (str): A string version of RIS data + implementation (RisParser): RIS implementation; RisParser by default. Returns: list: Returns list of RIS entries. """ - if implementation is None: - parser = RisParser - else: - parser = implementation - - return parser(**kw).parse(text) + return implementation(**kw).parse(text) diff --git a/rispy/writer.py b/rispy/writer.py index dd04069..785b85d 100644 --- a/rispy/writer.py +++ b/rispy/writer.py @@ -1,7 +1,7 @@ """RIS Writer.""" import warnings -from abc import ABC +from abc import ABC, abstractmethod from pathlib import Path from typing import ClassVar, Optional, TextIO, Union @@ -90,21 +90,15 @@ def __init__( def _get_reference_type(self, ref): if self.REFERENCE_TYPE_KEY in ref: - # TODO add check return ref[self.REFERENCE_TYPE_KEY] - - if self.DEFAULT_REFERENCE_TYPE is not None: - return self.DEFAULT_REFERENCE_TYPE - else: - raise ValueError("Unknown type of reference") + return self.DEFAULT_REFERENCE_TYPE def _format_line(self, tag, value=""): """Format a RIS line.""" return self.PATTERN.format(tag=tag, value=value) def _format_reference(self, ref, count, n): - header = self.set_header(count) - if header is not None: + if header := self.set_header(count): yield header yield self._format_line(self.START_TAG, self._get_reference_type(ref)) @@ -166,9 +160,10 @@ def formats(self, references: list[dict]) -> str: lines = self._yield_lines(references, extra_line=True) return self.NEWLINE.join(lines) - def set_header(self, count: int) -> Optional[str]: - """Create the header for each reference.""" - return None + @abstractmethod + def set_header(self, count: int) -> str: + """Create the header for each reference; if empty string, unused.""" + ... class RisWriter(BaseWriter): @@ -189,7 +184,7 @@ def dump( file: Union[TextIO, Path], *, encoding: Optional[str] = None, - implementation: Optional[BaseWriter] = None, + implementation: type[BaseWriter] = RisWriter, **kw, ): """Write an RIS file to file or file-like object. @@ -204,26 +199,18 @@ def dump( references (list[dict]): List of references. file (TextIO): File handle to store ris formatted data. encoding (str, optional): Encoding to use when opening file. - implementation (RisImplementation): RIS implementation; base by - default. + implementation (BaseWriter): RIS implementation; base by default. """ - if implementation is None: - writer = RisWriter - else: - writer = implementation - - if hasattr(file, "write"): - writer(**kw).format_lines(file, references) - elif hasattr(file, "open"): + if isinstance(file, Path): with file.open(mode="w", encoding=encoding) as f: - writer(**kw).format_lines(f, references) + implementation(**kw).format_lines(f, references) + elif hasattr(file, "write"): + implementation(**kw).format_lines(file, references) else: raise ValueError("File must be a file-like object or a Path object") -def dumps( - references: list[dict], *, implementation: Optional[type[BaseWriter]] = None, **kw -) -> str: +def dumps(references: list[dict], *, implementation: type[BaseWriter] = RisWriter, **kw) -> str: """Return an RIS formatted string. Entries are codified as dictionaries whose keys are the @@ -234,12 +221,6 @@ def dumps( Args: references (list[dict]): List of references. - implementation (RisImplementation): RIS implementation; base by - default. + implementation (BaseWriter): RIS implementation; RisWriter by default. """ - if implementation is None: - writer = RisWriter - else: - writer = implementation - - return writer(**kw).formats(references) + return implementation(**kw).formats(references) diff --git a/tests/test_parser.py b/tests/test_parser.py index 78a2a36..0ad0e60 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -1,3 +1,4 @@ +from io import StringIO from pathlib import Path import pytest @@ -7,44 +8,62 @@ DATA_DIR = Path(__file__).parent.resolve() / "data" -def test_load_example_basic_ris(): - filepath = DATA_DIR / "example_basic.ris" - expected = { - "type_of_reference": "JOUR", - "authors": ["Shannon,Claude E."], - "year": "1948/07//", - "title": "A Mathematical Theory of Communication", - "alternate_title3": "Bell System Technical Journal", - "start_page": "379", - "end_page": "423", - "volume": "27", - } +@pytest.fixture +def example_basic(): + # expected output from `example_basic.ris` + return [ + { + "type_of_reference": "JOUR", + "authors": ["Shannon,Claude E."], + "year": "1948/07//", + "title": "A Mathematical Theory of Communication", + "alternate_title3": "Bell System Technical Journal", + "start_page": "379", + "end_page": "423", + "volume": "27", + } + ] + +def test_load_file(example_basic): # test with file object + filepath = DATA_DIR / "example_basic.ris" with open(filepath) as f: entries = rispy.load(f) - assert expected == entries[0] + assert example_basic == entries + + +def test_load_file_noreadline(example_basic): + # test with file object that has no readline - # test with pathlib object + class NoReadline(StringIO): + @property + def readline(self): # type: ignore + raise AttributeError("Not found") + + filepath = DATA_DIR / "example_basic.ris" + f = NoReadline(filepath.read_text()) + assert not hasattr(f, "readline") + entries = rispy.load(f) + assert example_basic == entries + + +def test_load_path(example_basic): + # test with Path object + filepath = DATA_DIR / "example_basic.ris" p = Path(filepath) entries = rispy.load(p) - assert expected == entries[0] + assert example_basic == entries -def test_loads(): - ristext = (DATA_DIR / "example_basic.ris").read_text() - expected = { - "type_of_reference": "JOUR", - "authors": ["Shannon,Claude E."], - "year": "1948/07//", - "title": "A Mathematical Theory of Communication", - "alternate_title3": "Bell System Technical Journal", - "start_page": "379", - "end_page": "423", - "volume": "27", - } +def test_load_bad_file(): + with pytest.raises(ValueError, match="File must be a file-like object or a Path object"): + rispy.load("test") # type: ignore - assert expected == rispy.loads(ristext)[0] + +def test_loads(example_basic): + ristext = (DATA_DIR / "example_basic.ris").read_text() + assert example_basic == rispy.loads(ristext) def test_load_multiline_ris(): diff --git a/tests/test_utils.py b/tests/test_utils.py new file mode 100644 index 0000000..ae5347f --- /dev/null +++ b/tests/test_utils.py @@ -0,0 +1,14 @@ +import pytest + +from rispy.utils import invert_dictionary + + +def test_invert_dictionary(): + d = {"a": "b"} + assert invert_dictionary(d) == {"b": "a"} + + +def test_invert_dictionary_failure(): + d = {"a": "b", "c": "b"} + with pytest.raises(ValueError, match="Dictionary cannot be inverted"): + invert_dictionary(d) diff --git a/tests/test_writer.py b/tests/test_writer.py index 8bb363b..809a965 100644 --- a/tests/test_writer.py +++ b/tests/test_writer.py @@ -9,6 +9,20 @@ DATA_DIR = Path(__file__).parent.resolve() / "data" +@pytest.fixture +def ris_data(): + return [ + { + "type_of_reference": "JOUR", + "authors": ["Shannon, Claude E.", "Doe, John"], + "year": "1948/07//", + "title": "A Mathematical Theory of Communication", + "start_page": "379", + "urls": ["https://example.com", "https://example2.com"], + } + ] + + def test_dump_and_load(): # check that we can write the same file we read source_fp = DATA_DIR / "example_full.ris" @@ -131,81 +145,39 @@ class CustomWriter(rispy.RisWriter): assert reload == entries -def test_write_single_unknown_tag(): - entries = [ - { - "type_of_reference": "JOUR", - "authors": ["Shannon, Claude E."], - "year": "1948/07//", - "title": "A Mathematical Theory of Communication", - "start_page": "379", - "unknown_tag": {"JP": ["CRISPR"]}, - } - ] - - text_output = rispy.dumps(entries) - +def test_write_single_unknown_tag(ris_data): + ris_data[0]["unknown_tag"] = {"JP": ["CRISPR"]} + text_output = rispy.dumps(ris_data) # check output is as expected lines = text_output.splitlines() - assert lines[6] == "JP - CRISPR" - assert len(lines) == 8 + assert lines[9] == "JP - CRISPR" + assert len(lines) == 11 -def test_write_multiple_unknown_tag_same_type(): - entries = [ - { - "type_of_reference": "JOUR", - "authors": ["Shannon, Claude E."], - "year": "1948/07//", - "title": "A Mathematical Theory of Communication", - "start_page": "379", - "unknown_tag": {"JP": ["CRISPR", "PEOPLE"]}, - } - ] - - text_output = rispy.dumps(entries) +def test_write_multiple_unknown_tag_same_type(ris_data): + ris_data[0]["unknown_tag"] = {"JP": ["CRISPR", "PEOPLE"]} + text_output = rispy.dumps(ris_data) # check output is as expected lines = text_output.splitlines() - assert lines[6] == "JP - CRISPR" - assert lines[7] == "JP - PEOPLE" - assert len(lines) == 9 + assert lines[9] == "JP - CRISPR" + assert lines[10] == "JP - PEOPLE" + assert len(lines) == 12 -def test_write_multiple_unknown_tag_diff_type(): - entries = [ - { - "type_of_reference": "JOUR", - "authors": ["Shannon, Claude E."], - "year": "1948/07//", - "title": "A Mathematical Theory of Communication", - "start_page": "379", - "unknown_tag": {"JP": ["CRISPR"], "ED": ["Swinburne, Ricardo"]}, - } - ] - - text_output = rispy.dumps(entries) +def test_write_multiple_unknown_tag_diff_type(ris_data): + ris_data[0]["unknown_tag"] = {"JP": ["CRISPR"], "ED": ["Swinburne, Ricardo"]} + text_output = rispy.dumps(ris_data) # check output is as expected lines = text_output.splitlines() - assert lines[6] == "JP - CRISPR" - assert lines[7] == "ED - Swinburne, Ricardo" - assert len(lines) == 9 + assert lines[9] == "JP - CRISPR" + assert lines[10] == "ED - Swinburne, Ricardo" + assert len(lines) == 12 -def test_default_dump(): - entries = [ - { - "type_of_reference": "JOUR", - "authors": ["Shannon, Claude E.", "Doe, John"], - "year": "1948/07//", - "title": "A Mathematical Theory of Communication", - "start_page": "379", - "urls": ["https://example.com", "https://example2.com"], - } - ] - - text_output = rispy.dumps(entries) +def test_default_dump(ris_data): + text_output = rispy.dumps(ris_data) lines = text_output.splitlines() assert lines[2] == "AU - Shannon, Claude E." assert lines[3] == "AU - Doe, John" @@ -214,20 +186,9 @@ def test_default_dump(): assert len(lines) == 10 -def test_delimited_dump(): - entries = [ - { - "type_of_reference": "JOUR", - "authors": ["Shannon, Claude E.", "Doe, John"], - "year": "1948/07//", - "title": "A Mathematical Theory of Communication", - "start_page": "379", - "urls": ["https://example.com", "https://example2.com"], - } - ] - +def test_delimited_dump(ris_data): # remove URLs from list_tags and give it a custom delimiter - text_output = rispy.dumps(entries, list_tags=["AU"], delimiter_tags_mapping={"UR": ","}) + text_output = rispy.dumps(ris_data, list_tags=["AU"], delimiter_tags_mapping={"UR": ","}) # check output is as expected lines = text_output.splitlines() @@ -235,3 +196,15 @@ def test_delimited_dump(): assert lines[3] == "AU - Doe, John" assert lines[7] == "UR - https://example.com,https://example2.com" assert len(lines) == 9 + + +def test_dump_path(tmp_path, ris_data): + # check that dump works with a Path object + path = tmp_path / "file.ris" + rispy.dump(ris_data, path) + assert len(path.read_text()) > 0 + + +def test_bad_dump(ris_data): + with pytest.raises(ValueError, match="File must be a file-like object or a Path object"): + rispy.dump(ris_data, 123) # type: ignore