diff --git a/docs/developers.rst b/docs/developers.rst index 5b2bb47cb..814f9f35c 100644 --- a/docs/developers.rst +++ b/docs/developers.rst @@ -93,7 +93,7 @@ solely on the basis that it was not discussed upfront. RDFLib follows `semantic versioning `_ and `trunk-based development `_, so if any breaking changes were introduced into the main branch since the last release, then the next release -will be a major release with an incremented major version. +will be a major release with an incremented major version. Releases of RDFLib will not as a rule be conditioned on specific features, so there may be new major releases that contain very few breaking changes, and @@ -201,6 +201,15 @@ executing the tests. $ poetry install --all-extras $ poetry run pytest +By default tests of the ``SPARQLStore`` against remote public endpoints are skipped, to enable them add the flag: + +.. code-block:: console + + $ poetry run pytest --public-endpoints + $ # Or exclusively run the SPARQLStore tests: + $ poetry run pytest test/test_store/test_store_sparqlstore_public.py --public-endpoints + + Writing tests ~~~~~~~~~~~~~ @@ -406,7 +415,7 @@ container: # Inside the repository base directory cd ./rdflib/ - + # Build the development container. devcontainer build . @@ -448,7 +457,7 @@ Create a release-preparation pull request with the following changes: * Updated version and date in ``CITATION.cff``. * Updated copyright year in the ``LICENSE`` file. * Updated copyright year in the ``docs/conf.py`` file. -* Updated main branch version and current version in the ``README.md`` file. +* Updated main branch version and current version in the ``README.md`` file. * Updated version in the ``pyproject.toml`` file. * Updated ``__date__`` in the ``rdflib/__init__.py`` file. * Accurate ``CHANGELOG.md`` entry for the release. @@ -456,7 +465,7 @@ Create a release-preparation pull request with the following changes: Once the PR is merged, switch to the main branch, build the release and upload it to PyPI: .. code-block:: bash - + # Clean up any previous builds \rm -vf dist/* @@ -468,7 +477,7 @@ Once the PR is merged, switch to the main branch, build the release and upload i bsdtar -xvf dist/rdflib-*.tar.gz -O '*/PKG-INFO' | view - # Check that the built wheel and sdist works correctly: - ## Ensure pipx is installed but not within RDFLib's environment + ## Ensure pipx is installed but not within RDFLib's environment pipx run --no-cache --spec "$(readlink -f dist/rdflib*.whl)" rdfpipe --version pipx run --no-cache --spec "$(readlink -f dist/rdflib*.whl)" rdfpipe https://github.com/RDFLib/rdflib/raw/main/test/data/defined_namespaces/rdfs.ttl pipx run --no-cache --spec "$(readlink -f dist/rdflib*.tar.gz)" rdfpipe --version @@ -485,7 +494,7 @@ Once the PR is merged, switch to the main branch, build the release and upload i # Publish to PyPI poetry publish ## poetry publish -u __token__ -p pypi- - + Once this is done, create a release tag from `GitHub releases `_. For a release of version diff --git a/rdflib/compare.py b/rdflib/compare.py index 58644ae8f..77bfba65d 100644 --- a/rdflib/compare.py +++ b/rdflib/compare.py @@ -442,23 +442,23 @@ def _traces( experimental = self._experimental_path(coloring_copy) experimental_score = set([c.key() for c in experimental]) if last_coloring: - generator = self._create_generator( # type: ignore[unreachable] + generator = self._create_generator( [last_coloring, experimental], generator ) last_coloring = experimental - if best_score is None or best_score < color_score: # type: ignore[unreachable] + if best_score is None or best_score < color_score: best = [refined_coloring] best_score = color_score best_experimental_score = experimental_score - elif best_score > color_score: # type: ignore[unreachable] + elif best_score > color_score: # prune this branch. - if stats is not None: + if stats is not None and isinstance(stats["prunings"], int): stats["prunings"] += 1 elif experimental_score != best_experimental_score: best.append(refined_coloring) else: # prune this branch. - if stats is not None: + if stats is not None and isinstance(stats["prunings"], int): stats["prunings"] += 1 discrete: list[list[Color]] = [x for x in best if self._discrete(x)] if len(discrete) == 0: @@ -468,7 +468,7 @@ def _traces( d = [depth[0]] new_color = self._traces(coloring, stats=stats, depth=d) color_score = tuple([c.key() for c in refined_coloring]) - if best_score is None or color_score > best_score: # type: ignore[unreachable] + if best_score is None or color_score > best_score: discrete = [new_color] best_score = color_score best_depth = d[0] diff --git a/rdflib/plugins/parsers/jsonld.py b/rdflib/plugins/parsers/jsonld.py index 45e696adb..0031c8662 100644 --- a/rdflib/plugins/parsers/jsonld.py +++ b/rdflib/plugins/parsers/jsonld.py @@ -663,7 +663,7 @@ def _add_list( if rest: # type error: Statement is unreachable - graph.add((subj, RDF.rest, rest)) # type: ignore[unreachable] + graph.add((subj, RDF.rest, rest)) subj = rest obj = self._to_object(dataset, graph, context, term, node, inlist=True) diff --git a/rdflib/plugins/stores/sparqlconnector.py b/rdflib/plugins/stores/sparqlconnector.py index 2fe454003..1b5b580d3 100644 --- a/rdflib/plugins/stores/sparqlconnector.py +++ b/rdflib/plugins/stores/sparqlconnector.py @@ -17,6 +17,9 @@ if TYPE_CHECKING: import typing_extensions as te + SUPPORTED_METHODS = te.Literal["GET", "POST", "POST_FORM"] + SUPPORTED_FORMATS = te.Literal["xml", "json", "csv", "tsv", "application/rdf+xml"] + class SPARQLConnectorException(Exception): # noqa: N818 pass @@ -41,8 +44,8 @@ def __init__( self, query_endpoint: str | None = None, update_endpoint: str | None = None, - returnFormat: str = "xml", # noqa: N803 - method: te.Literal["GET", "POST", "POST_FORM"] = "GET", + returnFormat: SUPPORTED_FORMATS = "xml", # noqa: N803 + method: SUPPORTED_METHODS = "GET", auth: tuple[str, str] | None = None, **kwargs, ): diff --git a/rdflib/plugins/stores/sparqlstore.py b/rdflib/plugins/stores/sparqlstore.py index b58e967ac..861b2463e 100644 --- a/rdflib/plugins/stores/sparqlstore.py +++ b/rdflib/plugins/stores/sparqlstore.py @@ -45,6 +45,7 @@ ) from rdflib.plugins.sparql.sparql import Query, Update from rdflib.query import Result, ResultRow + from .sparqlconnector import SUPPORTED_FORMATS, SUPPORTED_METHODS from .sparqlconnector import SPARQLConnector @@ -68,11 +69,37 @@ def _node_to_sparql(node: Node) -> str: class SPARQLStore(SPARQLConnector, Store): - """An RDFLib store around a SPARQL endpoint + """An RDFLib store around a SPARQL endpoint. This is context-aware and should work as expected when a context is specified. + Usage example + ------------- + + .. code-block:: python + + from rdflib import Dataset + from rdflib.plugins.stores.sparqlstore import SPARQLStore + + g = Dataset( + SPARQLStore("https://query.wikidata.org/sparql", returnFormat="xml"), + default_union=True + ) + res = g.query("SELECT ?s ?p ?o WHERE { ?s ?p ?o } LIMIT 5") + + # Iterate the results + for row in res: + print(row) + + # Or serialize the results + print(res.serialize(format="json").decode()) + + .. warning:: Not all SPARQL endpoints support the same features. + + Checkout the `test suite on public endpoints `_ + for more details on how to successfully query different types of endpoints. + For ConjunctiveGraphs, reading is done from the "default graph". Exactly what this means depends on your endpoint, because SPARQL does not offer a simple way to query the union of all graphs as it would be expected for a @@ -84,11 +111,11 @@ class SPARQLStore(SPARQLConnector, Store): .. warning:: By default the SPARQL Store does not support blank-nodes! - As blank-nodes act as variables in SPARQL queries, - there is no way to query for a particular blank node without - using non-standard SPARQL extensions. + As blank-nodes act as variables in SPARQL queries, + there is no way to query for a particular blank node without + using non-standard SPARQL extensions. - See http://www.w3.org/TR/sparql11-query/#BGPsparqlBNodes + See http://www.w3.org/TR/sparql11-query/#BGPsparqlBNodes You can make use of such extensions through the ``node_to_sparql`` argument. For example if you want to transform BNode('0001') into @@ -111,12 +138,10 @@ class SPARQLStore(SPARQLConnector, Store): urllib when doing HTTP calls. I.e. you have full control of cookies/auth/headers. - Form example: + HTTP basic auth is available with: >>> store = SPARQLStore('...my endpoint ...', auth=('user','pass')) - will use HTTP basic auth. - """ formula_aware = False @@ -130,13 +155,15 @@ def __init__( sparql11: bool = True, context_aware: bool = True, node_to_sparql: _NodeToSparql = _node_to_sparql, - returnFormat: str = "xml", # noqa: N803 + returnFormat: SUPPORTED_FORMATS = "xml", # noqa: N803 + method: SUPPORTED_METHODS = "GET", auth: tuple[str, str] | None = None, **sparqlconnector_kwargs, ): super(SPARQLStore, self).__init__( query_endpoint=query_endpoint, returnFormat=returnFormat, + method=method, auth=auth, **sparqlconnector_kwargs, ) diff --git a/rdflib/store.py b/rdflib/store.py index 96a16956b..51d6d8422 100644 --- a/rdflib/store.py +++ b/rdflib/store.py @@ -123,7 +123,7 @@ def loads(self, s: bytes) -> Node: up = Unpickler(BytesIO(s)) # NOTE on type error: https://github.com/python/mypy/issues/2427 # type error: Cannot assign to a method - up.persistent_load = self._get_object # type: ignore[assignment] + up.persistent_load = self._get_object try: return up.load() except KeyError as e: @@ -134,7 +134,7 @@ def dumps(self, obj: Node, protocol: Any | None = None, bin: Any | None = None): p = Pickler(src) # NOTE on type error: https://github.com/python/mypy/issues/2427 # type error: Cannot assign to a method - p.persistent_id = self._get_ids # type: ignore[assignment] + p.persistent_id = self._get_ids p.dump(obj) return src.getvalue() diff --git a/test/conftest.py b/test/conftest.py index 6cc7c2ca3..8cdb5936c 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -104,8 +104,15 @@ def exit_stack() -> Generator[ExitStack, None, None]: @pytest.hookimpl(tryfirst=True) -def pytest_collection_modifyitems(items: Iterable[pytest.Item]): +def pytest_collection_modifyitems(config: pytest.Config, items: Iterable[pytest.Item]): for item in items: + if config and not config.getoption("--public-endpoints", False): + # Skip tests marked with public_endpoints if the option is not provided + if "public_endpoints" in item.keywords: + item.add_marker( + pytest.mark.skip(reason="need --public-endpoints option to run") + ) + parent_name = ( str(Path(item.parent.module.__file__).relative_to(PROJECT_ROOT)) if item.parent is not None @@ -117,3 +124,19 @@ def pytest_collection_modifyitems(items: Iterable[pytest.Item]): extra_markers = EXTRA_MARKERS[(parent_name, item.name)] for extra_marker in extra_markers: item.add_marker(extra_marker) + + +def pytest_addoption(parser): + """Add optional pytest markers to run tests on public endpoints""" + parser.addoption( + "--public-endpoints", + action="store_true", + default=False, + help="run tests that require public SPARQL endpoints", + ) + + +def pytest_configure(config): + config.addinivalue_line( + "markers", "public_endpoints: mark tests that require public SPARQL endpoints" + ) diff --git a/test/test_store/test_store_sparqlstore_public.py b/test/test_store/test_store_sparqlstore_public.py new file mode 100644 index 000000000..2e1430e33 --- /dev/null +++ b/test/test_store/test_store_sparqlstore_public.py @@ -0,0 +1,208 @@ +import json + +import pytest + +from rdflib import Dataset +from rdflib.plugins.sparql.results.jsonresults import JSONResult +from rdflib.plugins.sparql.results.xmlresults import XMLResult +from rdflib.plugins.stores.sparqlstore import SPARQLStore +from rdflib.query import Result + +# Mark all tests in this module as public_endpoints +# They will be skipped by default, unless the --public-endpoints flag is passed to pytest +pytestmark = pytest.mark.public_endpoints + + +# NOTE: dbpedia virtuoso can be unstable, sometimes everything working as expected +# But it has phases where sometimes it sends (405, 'HTTP Error 405: Not Allowed', None), or urllib.error.HTTPError: HTTP Error 502: Bad Gateway +# While accessing the endpoint directly in the browser works fine (so this is not a network issue or the endpoint being down), classic virtuoso +VIRTUOSO_8_DBPEDIA = "https://dbpedia.org/sparql" +BLAZEGRAPH_WIKIDATA = "https://query.wikidata.org/sparql" +GRAPHDB_FF = "http://factforge.net/repositories/ff-news" # http://factforge.net/ +RDF4J_GEOSCIML = "http://vocabs.ands.org.au/repository/api/sparql/csiro_international-chronostratigraphic-chart_2018-revised-corrected" +ALLEGROGRAPH_AGROVOC = "https://agrovoc.fao.org/sparql" +ALLEGROGRAPH_4_MMI = "https://mmisw.org/sparql" # AllegroServe/1.3.28 http://mmisw.org:10035/doc/release-notes.html +FUSEKI_LOV = "https://lov.linkeddata.es/dataset/lov/sparql" # Fuseki - version 1.1.1 (Build date: 2014-10-02T16:36:17+0100) +FUSEKI2_STW = "http://zbw.eu/beta/sparql/stw/query" # Fuseki 3.8.0 (Fuseki2) +STARDOG_LINDAS = ( + "https://lindas.admin.ch/query" # human UI https://lindas.admin.ch/sparql/ +) +STORE4_CHISE = "http://rdf.chise.org/sparql" # 4store SPARQL server v1.1.4 +QLEVER_WIKIDATA = "https://qlever.cs.uni-freiburg.de/api/wikidata" # https://qlever.cs.uni-freiburg.de/wikidata + + +@pytest.fixture( + params=[ + VIRTUOSO_8_DBPEDIA, + GRAPHDB_FF, + ALLEGROGRAPH_AGROVOC, + ALLEGROGRAPH_4_MMI, + BLAZEGRAPH_WIKIDATA, + FUSEKI_LOV, + RDF4J_GEOSCIML, + STARDOG_LINDAS, + STORE4_CHISE, + FUSEKI2_STW, + QLEVER_WIKIDATA, + ] +) +def endpoint(request): + return request.param + + +def query_sparql(query, endpoint, return_format, method): + """Generic function to make a SPARQL request and return the result""" + g = Dataset( + SPARQLStore(endpoint, returnFormat=return_format, method=method), + default_union=True, + ) + return g.query(query) + + +METHODS_SUPPORTED = ["GET", "POST", "POST_FORM"] + +## SELECT and ASK + +ROWS_TYPES_MAP = { + "xml": XMLResult, + "json": JSONResult, + "csv": Result, + "tsv": Result, +} + + +@pytest.mark.parametrize("return_format", ROWS_TYPES_MAP.keys()) +@pytest.mark.parametrize("method", METHODS_SUPPORTED) +def test_select_query(endpoint, return_format, method): + """Test SELECT queries with various return formats and methods""" + if endpoint in [STORE4_CHISE, FUSEKI2_STW] and method in ["POST", "POST_FORM"]: + pytest.skip(f"{endpoint} does not support POST requests") + if endpoint in [FUSEKI_LOV] and method == "POST": + pytest.skip("Return type issue with POST requests") + # NOTE: getting rdflib.plugin.PluginException: No plugin registered for (text/plain, ) + + query = "SELECT ?s ?p ?o WHERE { ?s ?p ?o } LIMIT 5" + res = query_sparql(query, endpoint, return_format, method) + assert isinstance(res, ROWS_TYPES_MAP[return_format]) + assert len(res) > 3 + + res_json = json.loads(res.serialize(format="json")) + assert len(res_json["results"]["bindings"]) > 3 + + +# NOTE: an error usually returns a tuple, e.g. (404, 'HTTP Error 404: Not Found', None) +# But sometimes it can also throws it + + +@pytest.mark.parametrize("return_format", ROWS_TYPES_MAP.keys()) +@pytest.mark.parametrize("method", METHODS_SUPPORTED) +def test_ask_query(endpoint, return_format, method): + """Test ASK queries with various return formats and methods""" + if endpoint in [STORE4_CHISE, FUSEKI2_STW] and method in ["POST", "POST_FORM"]: + pytest.skip("POST requests not supported") + if endpoint in [STORE4_CHISE] and method == "GET" and return_format == "tsv": + pytest.skip("TSV not supported with GET requests") + if endpoint in [ + QLEVER_WIKIDATA, + ALLEGROGRAPH_4_MMI, + GRAPHDB_FF, + RDF4J_GEOSCIML, + STARDOG_LINDAS, + ] and return_format in ["csv", "tsv"]: + pytest.skip("CSV/TSV not supported for ASK query type") + if endpoint in [VIRTUOSO_8_DBPEDIA] and return_format == "tsv": + pytest.skip("TSV not supported for ASK query type") + if endpoint in [FUSEKI_LOV] and method == "POST": + pytest.skip("Return type issue with POST requests") + # NOTE: getting rdflib.plugin.PluginException: No plugin registered for (text/plain, ) + + query = "ASK WHERE { ?s ?p ?o }" + res = query_sparql(query, endpoint, return_format, method) + assert isinstance(res, ROWS_TYPES_MAP[return_format]) + for row in res: + if return_format in ["csv", "tsv"] and not isinstance(row, bool): + # NOTE: Sometimes CSV/TSV with ASK returns a tuple. But sometimes it returns a boolean (e.g. wikidata) + assert len(row) == 1 + assert row[0].toPython() in [True, "true", "1"] + # And yes the content of the tuple can be one of the 3 above + else: + # So it is highly recommended to use XML or JSON for consistency + assert row is True + + +## CONSTRUCT and DESCRIBE + +RDF_TYPES_MAP = { + "xml": Result, + "application/rdf+xml": Result, + # "turtle": Result, + # NOTE: Turtle not in SPARQLConnector list of _response_mime_types + # Only XML available for RDF results +} + + +@pytest.mark.parametrize("return_format", RDF_TYPES_MAP.keys()) +@pytest.mark.parametrize("method", METHODS_SUPPORTED) +def test_construct_query(endpoint, return_format, method): + """Test CONSTRUCT queries with various return formats and methods""" + if endpoint in [STORE4_CHISE, FUSEKI2_STW] and method in ["POST", "POST_FORM"]: + pytest.skip(f"{endpoint} does not support POST requests") + if endpoint in [FUSEKI_LOV] and method == "POST": + pytest.skip("Return type issue with POST requests") + # NOTE: getting rdflib.plugin.PluginException: No plugin registered for (text/plain, ) + if endpoint in [QLEVER_WIKIDATA]: + pytest.skip("Qlever does not support application/rdf+xml") + + query = "CONSTRUCT { ?s ?p ?o } WHERE { ?s ?p ?o } LIMIT 5" + res = query_sparql(query, endpoint, return_format, method) + assert isinstance(res, RDF_TYPES_MAP[return_format]) + assert len(res) > 3 + + # Test if serialization works + res_g = Dataset() + res_g.parse(res.serialize(format="ttl"), format="ttl") + assert len(res_g) > 3 + + +@pytest.mark.parametrize("return_format", RDF_TYPES_MAP.keys()) +@pytest.mark.parametrize("method", METHODS_SUPPORTED) +def test_describe_query(endpoint, return_format, method): + """Test DESCRIBE queries with various return formats and methods""" + if endpoint in [STORE4_CHISE, FUSEKI2_STW] and method in ["POST", "POST_FORM"]: + pytest.skip(f"{endpoint} does not support POST requests") + if endpoint in [FUSEKI_LOV] and method == "POST": + pytest.skip("Return type issue with POST requests") + # NOTE: getting rdflib.plugin.PluginException: No plugin registered for (text/plain, ) + if endpoint in [QLEVER_WIKIDATA] and return_format in [ + "xml", + "application/rdf+xml", + ]: + pytest.skip("Qlever does not support application/rdf+xml") + + query = "DESCRIBE " + res = query_sparql(query, endpoint, return_format, method) + assert isinstance(res, RDF_TYPES_MAP[return_format]) + # Would need to get a valid URI for each endpoint to properly test this. But is it worth the pain? + # assert len(res) > 0 + + +@pytest.mark.parametrize("return_format", ROWS_TYPES_MAP.keys()) +@pytest.mark.parametrize("method", METHODS_SUPPORTED) +def test_query_invalid(endpoint, return_format, method): + """Test invalid query with various return formats and methods""" + if endpoint in [STORE4_CHISE, FUSEKI2_STW] and method in ["POST", "POST_FORM"]: + pytest.skip(f"{endpoint} does not support POST requests") + if endpoint in [FUSEKI_LOV]: + pytest.skip("Unsupported text/plain type returned by LOV Fuseki when error") + # rdflib.plugin.PluginException: No plugin registered for (text/plain, ) + + query = "SELECT ?s ?p ?o WHERE { ?s } LIMIT 5" + try: + res = query_sparql(query, endpoint, return_format, method) + # NOTE: some endpoints return a tuple with error, others will throw an error + if "HTTP Error 400" in res[1]: + assert True + else: + assert False, f"Unexpected results for invalid query: {res}" + except ValueError: + assert True