diff --git a/fern/docs/pages/manual/knowledge-graph.mdx b/fern/docs/pages/manual/knowledge-graph.mdx new file mode 100644 index 0000000000..73ded12893 --- /dev/null +++ b/fern/docs/pages/manual/knowledge-graph.mdx @@ -0,0 +1,33 @@ +# GraphStore Providers +PrivateGPT supports [Neo4J](https://neo4j.com/). + +In order to select one or the other, set the `graphstore.database` property in the `settings.yaml` file to `neo4j`. + +```yaml +graphstore: + database: neo4j +``` + +## Neo4j + +Neo4j is a graph database management system that provides an efficient and scalable solution for storing and querying graph data. + +### Configuration + +To configure Neo4j as the graph store provider, specify the following parameters in the `settings.yaml` file: + +```yaml +graphstore: + database: neo4j + +neo4j: + url: neo4j://localhost:7687 + username: neo4j + password: password + database: neo4j +``` + +- **url**: The URL of the Neo4j server. +- **username**: The username for accessing the Neo4j database. +- **password**: The password for accessing the Neo4j database. +- **database**: The name of the Neo4j database. diff --git a/private_gpt/components/graph_store/__init__.py b/private_gpt/components/graph_store/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/private_gpt/components/graph_store/graph_store_component.py b/private_gpt/components/graph_store/graph_store_component.py new file mode 100644 index 0000000000..2c646d8d90 --- /dev/null +++ b/private_gpt/components/graph_store/graph_store_component.py @@ -0,0 +1,77 @@ +import logging +import typing + +from injector import inject, singleton +from llama_index.core.graph_stores.types import ( + GraphStore, +) +from llama_index.core.indices.knowledge_graph import ( + KnowledgeGraphRAGRetriever, +) +from llama_index.core.llms.llm import LLM +from llama_index.core.storage import StorageContext + +from private_gpt.settings.settings import Settings + +logger = logging.getLogger(__name__) + + +@singleton +class GraphStoreComponent: + settings: Settings + graph_store: GraphStore | None = None + + @inject + def __init__(self, settings: Settings) -> None: + self.settings = settings + + # If no graphstore is defined, return, making the graphstore optional + if settings.graphstore is None: + return + + match settings.graphstore.database: + case "neo4j": + try: + from llama_index.graph_stores.neo4j import ( # type: ignore + Neo4jGraphStore, + ) + except ImportError as e: + raise ImportError( + "Neo4j dependencies not found, install with `poetry install --extras graph-stores-neo4j`" + ) from e + + if settings.neo4j is None: + raise ValueError( + "Neo4j settings not found. Please provide settings." + ) + + self.graph_store = typing.cast( + GraphStore, + Neo4jGraphStore( + **settings.neo4j.model_dump(exclude_none=True), + ), # TODO + ) + case _: + # Should be unreachable + # The settings validator should have caught this + raise ValueError( + f"Vectorstore database {settings.vectorstore.database} not supported" + ) + + def get_knowledge_graph( + self, + storage_context: StorageContext, + llm: LLM, + ) -> KnowledgeGraphRAGRetriever: + if self.graph_store is None: + raise ValueError("GraphStore not defined in settings") + + return KnowledgeGraphRAGRetriever( + storage_context=storage_context, + llm=llm, + verbose=True, + ) + + def close(self) -> None: + if self.graph_store and hasattr(self.graph_store.client, "close"): + self.graph_store.client.close() diff --git a/private_gpt/components/ingest/ingest_component.py b/private_gpt/components/ingest/ingest_component.py index 5ed0395906..54010d9240 100644 --- a/private_gpt/components/ingest/ingest_component.py +++ b/private_gpt/components/ingest/ingest_component.py @@ -9,11 +9,16 @@ from queue import Queue from typing import Any +from llama_index.core import KnowledgeGraphIndex from llama_index.core.data_structs import IndexDict from llama_index.core.embeddings.utils import EmbedType -from llama_index.core.indices import VectorStoreIndex, load_index_from_storage +from llama_index.core.indices import ( + VectorStoreIndex, + load_index_from_storage, +) from llama_index.core.indices.base import BaseIndex from llama_index.core.ingestion import run_transformations +from llama_index.core.llms.llm import LLM from llama_index.core.schema import BaseNode, Document, TransformComponent from llama_index.core.storage import StorageContext @@ -67,9 +72,13 @@ def __init__( self._index_thread_lock = ( threading.Lock() ) # Thread lock! Not Multiprocessing lock - self._index = self._initialize_index() + self._index = self._initialize_index(**kwargs) + self._knowledge_graph = self._initialize_knowledge_graph(**kwargs) - def _initialize_index(self) -> BaseIndex[IndexDict]: + def _initialize_index( + self, + llm: LLM, + ) -> BaseIndex[IndexDict]: """Initialize the index from the storage context.""" try: # Load the index with store_nodes_override=True to be able to delete them @@ -79,6 +88,7 @@ def _initialize_index(self) -> BaseIndex[IndexDict]: show_progress=self.show_progress, embed_model=self.embed_model, transformations=self.transformations, + llm=llm, ) except ValueError: # There are no index in the storage context, creating a new one @@ -94,9 +104,34 @@ def _initialize_index(self) -> BaseIndex[IndexDict]: index.storage_context.persist(persist_dir=local_data_path) return index + def _initialize_knowledge_graph( + self, + llm: LLM, + max_triplets_per_chunk: int = 10, + include_embeddings: bool = True, + ) -> KnowledgeGraphIndex: + """Initialize the index from the storage context.""" + index = KnowledgeGraphIndex.from_documents( + [], + storage_context=self.storage_context, + show_progress=self.show_progress, + embed_model=self.embed_model, + transformations=self.transformations, + llm=llm, + max_triplets_per_chunk=max_triplets_per_chunk, + include_embeddings=include_embeddings, + ) + index.storage_context.persist(persist_dir=local_data_path) + return index + def _save_index(self) -> None: + logger.debug("Persisting the index") self._index.storage_context.persist(persist_dir=local_data_path) + def _save_knowledge_graph(self) -> None: + logger.debug("Persisting the knowledge graph") + self._knowledge_graph.storage_context.persist(persist_dir=local_data_path) + def delete(self, doc_id: str) -> None: with self._index_thread_lock: # Delete the document from the index @@ -105,6 +140,12 @@ def delete(self, doc_id: str) -> None: # Save the index self._save_index() + # Delete the document from the knowledge graph + self._knowledge_graph.delete_ref_doc(doc_id, delete_from_docstore=True) + + # Save the knowledge graph + self._save_knowledge_graph() + class SimpleIngestComponent(BaseIngestComponentWithIndex): def __init__( @@ -138,14 +179,35 @@ def bulk_ingest(self, files: list[tuple[str, Path]]) -> list[Document]: def _save_docs(self, documents: list[Document]) -> list[Document]: logger.debug("Transforming count=%s documents into nodes", len(documents)) with self._index_thread_lock: - for document in documents: - self._index.insert(document, show_progress=True) - logger.debug("Persisting the index and nodes") - # persist the index and nodes - self._save_index() + logger.debug("Persisting the index and nodes in the vector store") + self._save_to_index(documents) + + logger.debug("Persisting the index and nodes in the knowledge graph") + self._save_to_knowledge_graph(documents) + logger.debug("Persisted the index and nodes") return documents + def _save_to_index(self, documents: list[Document]) -> None: + logger.debug("Inserting count=%s documents in the index", len(documents)) + for document in documents: + logger.info("Inserting document=%s in the index", document) + self._index.insert(document, show_progress=True) + self._save_index() + pass + + def _save_to_knowledge_graph(self, documents: list[Document]) -> None: + logger.debug( + "Inserting count=%s documents in the knowledge graph", len(documents) + ) + for document in [ + d for d in documents if d.extra_info.get("graph_type", None) is not None + ]: + logger.info("Inserting document=%s in the knowledge graph", document) + logger.info("Document=%s", document.extra_info) + self._knowledge_graph.insert(document, show_progress=True) + self._save_knowledge_graph() + class BatchIngestComponent(BaseIngestComponentWithIndex): """Parallelize the file reading and parsing on multiple CPU core. @@ -485,6 +547,8 @@ def get_ingestion_component( embed_model: EmbedType, transformations: list[TransformComponent], settings: Settings, + *args: Any, + **kwargs: Any, ) -> BaseIngestComponent: """Get the ingestion component for the given configuration.""" ingest_mode = settings.embedding.ingest_mode @@ -494,6 +558,7 @@ def get_ingestion_component( embed_model=embed_model, transformations=transformations, count_workers=settings.embedding.count_workers, + llm=kwargs.get("llm"), ) elif ingest_mode == "parallel": return ParallelizedIngestComponent( @@ -501,6 +566,7 @@ def get_ingestion_component( embed_model=embed_model, transformations=transformations, count_workers=settings.embedding.count_workers, + llm=kwargs.get("llm"), ) elif ingest_mode == "pipeline": return PipelineIngestComponent( @@ -508,10 +574,12 @@ def get_ingestion_component( embed_model=embed_model, transformations=transformations, count_workers=settings.embedding.count_workers, + llm=kwargs.get("llm"), ) else: return SimpleIngestComponent( storage_context=storage_context, embed_model=embed_model, transformations=transformations, + llm=kwargs.get("llm"), ) diff --git a/private_gpt/components/ingest/ingest_helper.py b/private_gpt/components/ingest/ingest_helper.py index a110907022..46666d4393 100644 --- a/private_gpt/components/ingest/ingest_helper.py +++ b/private_gpt/components/ingest/ingest_helper.py @@ -27,6 +27,10 @@ def _try_loading_included_file_formats() -> dict[str, type[BaseReader]]: from llama_index.readers.file.video_audio import ( # type: ignore VideoAudioReader, ) + + from private_gpt.components.ingest.readers.rdfreader import ( # type: ignore + RDFReader, + ) except ImportError as e: raise ImportError("`llama-index-readers-file` package not found") from e @@ -48,7 +52,10 @@ def _try_loading_included_file_formats() -> dict[str, type[BaseReader]]: ".mbox": MboxReader, ".ipynb": IPYNBReader, } - return default_file_reader_cls + optional_file_reader_cls: dict[str, type[BaseReader]] = { + ".ttl": RDFReader, + } + return {**default_file_reader_cls, **optional_file_reader_cls} # Patching the default file reader to support other file types diff --git a/private_gpt/components/ingest/readers/__init__.py b/private_gpt/components/ingest/readers/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/private_gpt/components/ingest/readers/rdfreader.py b/private_gpt/components/ingest/readers/rdfreader.py new file mode 100644 index 0000000000..eda34d3a40 --- /dev/null +++ b/private_gpt/components/ingest/readers/rdfreader.py @@ -0,0 +1,92 @@ +# mypy: ignore-errors + +"""Read RDF files. + +This module is used to read RDF files. +It was created by llama-hub but it has not been ported +to llama-index==0.1.0 with multiples changes to fix the code. + +Original code: +https://github.com/run-llama/llama-hub +""" + +import logging +from pathlib import Path +from typing import Any + +from llama_index.core.readers.base import BaseReader +from llama_index.core.schema import Document +from rdflib import Graph, URIRef +from rdflib.namespace import RDF, RDFS + +logger = logging.getLogger(__name__) + + +class RDFReader(BaseReader): + """RDF reader.""" + + def __init__( + self, + *args: Any, + **kwargs: Any, + ) -> None: + """Initialize loader.""" + super().__init__(*args, **kwargs) + + def fetch_labels(self, uri: URIRef, graph: Graph, lang: str): + """Fetch all labels of a URI by language.""" + return list( + filter(lambda x: x.language in [lang, None], graph.objects(uri, RDFS.label)) + ) + + def fetch_label_in_graphs(self, uri: URIRef, lang: str = "en"): + """Fetch one label of a URI by language from the local or global graph.""" + labels = self.fetch_labels(uri, self.g_local, lang) + if len(labels) > 0: + return labels[0].value + + labels = self.fetch_labels(uri, self.g_global, lang) + if len(labels) > 0: + return labels[0].value + + return str(uri) + + def load_data(self, file: Path, extra_info: dict | None = None) -> list[Document]: + """Parse file.""" + extra_info = extra_info or {} + extra_info["graph_type"] = "rdf" + lang = ( + extra_info["lang"] + if extra_info is not None and "lang" in extra_info + else "en" + ) + + self.g_local = Graph() + self.g_local.parse(file) + + self.g_global = Graph() + self.g_global.parse(str(RDF)) + self.g_global.parse(str(RDFS)) + + text_list = [] + + for s, p, o in self.g_local: + logger.debug("s=%s, p=%s, o=%s", s, p, o) + if p == RDFS.label: + continue + + subj_label = self.fetch_label_in_graphs(s, lang=lang) + pred_label = self.fetch_label_in_graphs(p, lang=lang) + obj_label = self.fetch_label_in_graphs(o, lang=lang) + + if subj_label is None or pred_label is None or obj_label is None: + continue + + triple = f"<{subj_label}> " f"<{pred_label}> " f"<{obj_label}>" + text_list.append(triple) + + text = "\n".join(text_list) + return [self._text_to_document(text, extra_info)] + + def _text_to_document(self, text: str, extra_info: dict | None = None) -> Document: + return Document(text=text, extra_info=extra_info or {}) diff --git a/private_gpt/server/chat/chat_service.py b/private_gpt/server/chat/chat_service.py index ea57f2c0d0..584073b9b0 100644 --- a/private_gpt/server/chat/chat_service.py +++ b/private_gpt/server/chat/chat_service.py @@ -11,11 +11,15 @@ from llama_index.core.postprocessor import ( SimilarityPostprocessor, ) +from llama_index.core.retrievers.router_retriever import RouterRetriever +from llama_index.core.selectors import LLMSingleSelector from llama_index.core.storage import StorageContext +from llama_index.core.tools.retriever_tool import RetrieverTool from llama_index.core.types import TokenGen from pydantic import BaseModel from private_gpt.components.embedding.embedding_component import EmbeddingComponent +from private_gpt.components.graph_store.graph_store_component import GraphStoreComponent from private_gpt.components.llm.llm_component import LLMComponent from private_gpt.components.node_store.node_store_component import NodeStoreComponent from private_gpt.components.vector_store.vector_store_component import ( @@ -82,6 +86,7 @@ def __init__( vector_store_component: VectorStoreComponent, embedding_component: EmbeddingComponent, node_store_component: NodeStoreComponent, + graph_store_component: GraphStoreComponent, ) -> None: self.settings = settings self.llm_component = llm_component @@ -89,6 +94,9 @@ def __init__( self.vector_store_component = vector_store_component self.storage_context = StorageContext.from_defaults( vector_store=vector_store_component.vector_store, + graph_store=graph_store_component.graph_store + if graph_store_component and graph_store_component.graph_store + else None, docstore=node_store_component.doc_store, index_store=node_store_component.index_store, ) @@ -99,6 +107,8 @@ def __init__( embed_model=embedding_component.embedding_model, show_progress=True, ) + self.graph_store_component = graph_store_component + self.knowledge_graph_index = graph_store_component.graph_store def _chat_engine( self, @@ -113,9 +123,28 @@ def _chat_engine( context_filter=context_filter, similarity_top_k=self.settings.rag.similarity_top_k, ) + graph_knowledge_retrevier = self.graph_store_component.get_knowledge_graph( + llm=self.llm_component.llm, + storage_context=self.storage_context, + ) + + retrievers = [ + r for r in [vector_index_retriever, graph_knowledge_retrevier] if r + ] + retriever = RouterRetriever.from_defaults( + retriever_tools=[ + RetrieverTool.from_defaults(retriever) for retriever in retrievers + ], + llm=self.llm_component.llm, + selector=LLMSingleSelector.from_defaults( + llm=self.llm_component.llm + ), # TODO: Could be LLMMultiSelector if needed + select_multi=len(retrievers) > 1, + ) + return ContextChatEngine.from_defaults( system_prompt=system_prompt, - retriever=vector_index_retriever, + retriever=retriever, llm=self.llm_component.llm, # Takes no effect at the moment node_postprocessors=[ MetadataReplacementPostProcessor(target_metadata_key="window"), diff --git a/private_gpt/server/chunks/chunks_service.py b/private_gpt/server/chunks/chunks_service.py index 7bda5d9046..26a1e7a022 100644 --- a/private_gpt/server/chunks/chunks_service.py +++ b/private_gpt/server/chunks/chunks_service.py @@ -7,6 +7,7 @@ from pydantic import BaseModel, Field from private_gpt.components.embedding.embedding_component import EmbeddingComponent +from private_gpt.components.graph_store.graph_store_component import GraphStoreComponent from private_gpt.components.llm.llm_component import LLMComponent from private_gpt.components.node_store.node_store_component import NodeStoreComponent from private_gpt.components.vector_store.vector_store_component import ( @@ -60,6 +61,7 @@ def __init__( self, llm_component: LLMComponent, vector_store_component: VectorStoreComponent, + graph_store_component: GraphStoreComponent, embedding_component: EmbeddingComponent, node_store_component: NodeStoreComponent, ) -> None: @@ -68,6 +70,9 @@ def __init__( self.embedding_component = embedding_component self.storage_context = StorageContext.from_defaults( vector_store=vector_store_component.vector_store, + graph_store=graph_store_component.graph_store + if graph_store_component and graph_store_component.graph_store + else None, docstore=node_store_component.doc_store, index_store=node_store_component.index_store, ) diff --git a/private_gpt/server/ingest/ingest_service.py b/private_gpt/server/ingest/ingest_service.py index f9ae4728f1..0243c0e917 100644 --- a/private_gpt/server/ingest/ingest_service.py +++ b/private_gpt/server/ingest/ingest_service.py @@ -8,6 +8,7 @@ from llama_index.core.storage import StorageContext from private_gpt.components.embedding.embedding_component import EmbeddingComponent +from private_gpt.components.graph_store.graph_store_component import GraphStoreComponent from private_gpt.components.ingest.ingest_component import get_ingestion_component from private_gpt.components.llm.llm_component import LLMComponent from private_gpt.components.node_store.node_store_component import NodeStoreComponent @@ -30,12 +31,16 @@ def __init__( self, llm_component: LLMComponent, vector_store_component: VectorStoreComponent, + graph_store_component: GraphStoreComponent, embedding_component: EmbeddingComponent, node_store_component: NodeStoreComponent, ) -> None: self.llm_service = llm_component self.storage_context = StorageContext.from_defaults( vector_store=vector_store_component.vector_store, + graph_store=graph_store_component.graph_store + if graph_store_component and graph_store_component.graph_store + else None, docstore=node_store_component.doc_store, index_store=node_store_component.index_store, ) @@ -46,6 +51,7 @@ def __init__( embed_model=embedding_component.embedding_model, transformations=[node_parser, embedding_component.embedding_model], settings=settings(), + llm=self.llm_service.llm, ) def _ingest_data(self, file_name: str, file_data: AnyStr) -> list[IngestedDoc]: diff --git a/private_gpt/settings/settings.py b/private_gpt/settings/settings.py index 5896f00d6d..fc9be51898 100644 --- a/private_gpt/settings/settings.py +++ b/private_gpt/settings/settings.py @@ -114,6 +114,10 @@ class NodeStoreSettings(BaseModel): database: Literal["simple", "postgres"] +class GraphStoreSettings(BaseModel): + database: Literal["neo4j"] + + class LlamaCPPSettings(BaseModel): llm_hf_repo_id: str llm_hf_model_file: str @@ -376,6 +380,25 @@ class QdrantSettings(BaseModel): ) +class Neo4jSettings(BaseModel): + url: str | None = Field( + "bolt://localhost:7687", + description="URL of the Neo4j database.", + ) + username: str | None = Field( + "neo4j", + description="Username to connect to the Neo4j database.", + ) + password: str | None = Field( + "password", + description="Password to connect to the Neo4j database.", + ) + database: str | None = Field( + "neo4j", + description="Database name to connect to the Neo4j database.", + ) + + class Settings(BaseModel): server: ServerSettings data: DataSettings @@ -389,10 +412,12 @@ class Settings(BaseModel): ollama: OllamaSettings azopenai: AzureOpenAISettings vectorstore: VectorstoreSettings + graphstore: GraphStoreSettings | None = None nodestore: NodeStoreSettings rag: RagSettings qdrant: QdrantSettings | None = None postgres: PostgresSettings | None = None + neo4j: Neo4jSettings | None = None """ diff --git a/pyproject.toml b/pyproject.toml index d568999879..342193dd5b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -57,6 +57,7 @@ vector-stores-qdrant = ["llama-index-vector-stores-qdrant"] vector-stores-chroma = ["llama-index-vector-stores-chroma"] vector-stores-postgres = ["llama-index-vector-stores-postgres"] storage-nodestore-postgres = ["llama-index-storage-docstore-postgres","llama-index-storage-index-store-postgres","psycopg2-binary","asyncpg"] +graph-stores-neo4j = ["llama-index-graph-stores-neo4j"] [tool.poetry.group.dev.dependencies] black = "^22" diff --git a/settings-ollama.yaml b/settings-ollama.yaml index d7e1a12ca0..fabaf5dfa0 100644 --- a/settings-ollama.yaml +++ b/settings-ollama.yaml @@ -9,6 +9,7 @@ llm: embedding: mode: ollama + ingest_mode: simple # Enabled simple that stores the embeddings and triples in the graphstore. ollama: llm_model: mistral @@ -24,5 +25,14 @@ ollama: vectorstore: database: qdrant +graphstore: + database: neo4j + qdrant: path: local_data/private_gpt/qdrant + +neo4j: + url: neo4j://localhost:7687 + username: neo4j + password: password + database: neo4j diff --git a/tests/server/ingest/test.ttl b/tests/server/ingest/test.ttl new file mode 100644 index 0000000000..99a7433888 --- /dev/null +++ b/tests/server/ingest/test.ttl @@ -0,0 +1,358 @@ +@prefix ns1: . +@prefix rdfs: . + +ns1:Q1044339 rdfs:label "Valeriano Balloni" ; + ns1:hasTeam ns1:Q13385, + ns1:Q289029, + ns1:Q297430, + ns1:Q650365, + ns1:Q6767 . + +ns1:Q110992321 rdfs:label "Tim Karius" ; + ns1:hasTeam ns1:Q1387210, + ns1:Q655591 . + +ns1:Q12402730 rdfs:label "Xoaquín Álvarez Corbacho" ; + ns1:hasTeam ns1:Q8749 . + +ns1:Q12813965 rdfs:label "József Cserháti" ; + ns1:hasTeam ns1:Q732885 . + +ns1:Q13101502 rdfs:label "Alphonse Weicker" ; + ns1:hasTeam ns1:Q184266, + ns1:Q693092 . + +ns1:Q1497593 rdfs:label "Gediminas Budnikas" ; + ns1:hasTeam ns1:Q393357 . + +ns1:Q1531063 rdfs:label "Glenn W. Harrison" ; + ns1:hasTeam ns1:Q1034556 . + +ns1:Q16081110 rdfs:label "Moon Hyung-pyo" ; + ns1:hasTeam ns1:Q39988 . + +ns1:Q16091117 rdfs:label "Lee Ju-yeol" ; + ns1:hasTeam ns1:Q39988 . + +ns1:Q16299411 rdfs:label "Juha Joenväärä" ; + ns1:hasTeam ns1:Q1130636, + ns1:Q1232297 . + +ns1:Q1686485 rdfs:label "Jeff Immelt" ; + ns1:hasTeam ns1:Q5225674 . + +ns1:Q16942062 rdfs:label "Tito Montaño" ; + ns1:hasTeam ns1:Q127925 . + +ns1:Q1776728 rdfs:label "Svein Gjedrem" ; + ns1:hasTeam ns1:Q737937 . + +ns1:Q17917747 rdfs:label "Noel Newton Nethersole" ; + ns1:hasTeam ns1:Q3590248 . + +ns1:Q18541191 rdfs:label "Adalbert Kassai" ; + ns1:hasTeam ns1:Q1135735, + ns1:Q1195647, + ns1:Q1386940, + ns1:Q1689705, + ns1:Q841245, + ns1:Q842134 . + +ns1:Q18562973 rdfs:label "István Hagelmayer" ; + ns1:hasTeam ns1:Q606773 . + +ns1:Q192533 rdfs:label "Mark Carney" ; + ns1:hasTeam ns1:Q5676342 . + +ns1:Q1930105 rdfs:label "Michaela Vosbeck" ; + ns1:hasTeam ns1:Q1715018, + ns1:Q1792079, + ns1:Q2931573, + ns1:Q300032 . + +ns1:Q202693 rdfs:label "Jo Nesbø" ; + ns1:hasTeam ns1:Q208552 . + +ns1:Q2055385 rdfs:label "Alexandre Baptista" ; + ns1:hasTeam ns1:Q267245, + ns1:Q75729 . + +ns1:Q22003558 rdfs:label "Colin Cannonier" ; + ns1:hasTeam ns1:Q3590581 . + +ns1:Q2535499 rdfs:label "Tadao Horie" ; + ns1:hasTeam ns1:Q170566 . + +ns1:Q27491470 rdfs:label "Telesfor Banaszkiewicz" ; + ns1:hasTeam ns1:Q11821053, + ns1:Q1198772 . + +ns1:Q30308976 rdfs:label "Thomas Howden Fraser" ; + ns1:hasTeam ns1:Q117467 . + +ns1:Q311025 rdfs:label "Henry Paulson" ; + ns1:hasTeam ns1:Q5225674 . + +ns1:Q3132658 rdfs:label "Henry Braddon" ; + ns1:hasTeam ns1:Q55801 . + +ns1:Q313682 rdfs:label "Oleguer Presas" ; + ns1:hasTeam ns1:Q10467, + ns1:Q17228, + ns1:Q2220788, + ns1:Q7156, + ns1:Q81888 . + +ns1:Q3470333 rdfs:label "Salvador Servià i Costa" ; + ns1:hasTeam ns1:Q188217, + ns1:Q35896 . + +ns1:Q354317 rdfs:label "Vebjørn Rodal" ; + ns1:hasTeam ns1:Q11993950 . + +ns1:Q3592042 rdfs:label "Étienne Antonelli" ; + ns1:hasTeam ns1:Q132885 . + +ns1:Q3808555 rdfs:label "Joan Trayter" ; + ns1:hasTeam ns1:Q3091261 . + +ns1:Q4011129 rdfs:label "Vicente Locaso" ; + ns1:hasTeam ns1:Q15799, + ns1:Q18640, + ns1:Q327172, + ns1:Q79800 . + +ns1:Q457755 rdfs:label "Alfred Lawson" ; + ns1:hasTeam ns1:Q461595, + ns1:Q653772 . + +ns1:Q4908745 rdfs:label "Bill Demory" ; + ns1:hasTeam ns1:Q219602, + ns1:Q4791461 . + +ns1:Q4939229 rdfs:label "Bolesław Banaś" ; + ns1:hasTeam ns1:Q3593958 . + +ns1:Q4961008 rdfs:label "Brendan Menton, Sr." ; + ns1:hasTeam ns1:Q629300 . + +ns1:Q4968933 rdfs:label "Rune Gerhardsen" ; + ns1:hasTeam ns1:Q2042878 . + +ns1:Q5405396 rdfs:label "Alejandro Brand" ; + ns1:hasTeam ns1:Q212564, + ns1:Q391984 . + +ns1:Q559712 rdfs:label "Magomedsalam Magomedov" ; + ns1:hasTeam ns1:Q2494171 . + +ns1:Q60735037 rdfs:label "Peter Morgan" ; + ns1:hasTeam ns1:Q18516 . + +ns1:Q6148645 rdfs:label "Tomás Soley Güell" ; + ns1:hasTeam ns1:Q7156 . + +ns1:Q65624037 rdfs:label "Thomas Staub" ; + ns1:hasTeam ns1:Q201969 . + +ns1:Q6708659 rdfs:label "Lyndhurst Falkiner Giblin" ; + ns1:hasTeam ns1:Q378628 . + +ns1:Q7172847 rdfs:label "Peter Henry" ; + ns1:hasTeam ns1:Q7054630 . + +ns1:Q7193582 rdfs:label "Pike Curtin" ; + ns1:hasTeam ns1:Q3589750 . + +ns1:Q732476 rdfs:label "Xavier Sala-i-Martin" ; + ns1:hasTeam ns1:Q3091261 . + +ns1:Q7436183 rdfs:label "Scott Cowen" ; + ns1:hasTeam ns1:Q16959086 . + +ns1:Q75748 rdfs:label "Hans Tietmeyer" ; + ns1:hasTeam ns1:Q2385504 . + +ns1:Q769073 rdfs:label "W. Morrissey" ; + ns1:hasTeam ns1:Q2367373 . + +ns1:Q84218605 rdfs:label "José María Echevarría Arteche" ; + ns1:hasTeam ns1:Q1103198 . + +ns1:Q8667562 rdfs:label "Valerijonas Balčiūnas" ; + ns1:hasTeam ns1:Q186276 . + +ns1:Q89141301 rdfs:label "Anna Potok" ; + ns1:hasTeam ns1:Q4841 . + +ns1:Q9199508 rdfs:label "Czesława Pilarska" ; + ns1:hasTeam ns1:Q11733016 . + +ns1:Q947814 rdfs:label "Steinar Hoen" ; + ns1:hasTeam ns1:Q4573629 . + +ns1:Q963421 rdfs:label "Carl-Henric Svanberg" ; + ns1:hasTeam ns1:Q1653574 . + +ns1:Q98072140 rdfs:label "Q98072140" ; + ns1:hasTeam ns1:Q28214543 . + +ns1:Q1034556 rdfs:label "Hawthorn Football Club" . + +ns1:Q10467 rdfs:label "FC Barcelona Atlètic" . + +ns1:Q1103198 rdfs:label "Club de Campo Villa de Madrid" . + +ns1:Q1130636 rdfs:label "Oulun Kärpät" . + +ns1:Q1135735 rdfs:label "CS Corvinul Hunedoara" . + +ns1:Q11733016 rdfs:label "Stilon Gorzów Wielkopolski" . + +ns1:Q117467 rdfs:label "Royal Society of Edinburgh" . + +ns1:Q11821053 rdfs:label "Q11821053" . + +ns1:Q1195647 rdfs:label "FC Progresul București" . + +ns1:Q1198772 rdfs:label "Warta Poznań" . + +ns1:Q11993950 rdfs:label "Oppdal IL" . + +ns1:Q1232297 rdfs:label "Djurgårdens IF Hockey" . + +ns1:Q127925 rdfs:label "Club Aurora" . + +ns1:Q132885 rdfs:label "Olympique de Marseille" . + +ns1:Q13385 rdfs:label "Società Polisportiva Ars et Labor" . + +ns1:Q1386940 rdfs:label "FC Bihor Oradea" . + +ns1:Q1387210 rdfs:label "FC Jeunesse Canach" . + +ns1:Q15799 rdfs:label "Club Atlético River Plate" . + +ns1:Q1653574 rdfs:label "IF Björklöven" . + +ns1:Q1689705 rdfs:label "FC Jiul Petroșani" . + +ns1:Q16959086 rdfs:label "UConn Huskies football" . + +ns1:Q170566 rdfs:label "Japan national football team" . + +ns1:Q1715018 rdfs:label "TV Hörde" . + +ns1:Q17228 rdfs:label "Catalonia national football team" . + +ns1:Q1792079 rdfs:label "VC Schwerte" . + +ns1:Q184266 rdfs:label "Luxembourg national football team" . + +ns1:Q18516 rdfs:label "Hereford United F.C." . + +ns1:Q186276 rdfs:label "Lithuania national football team" . + +ns1:Q18640 rdfs:label "Gimnasia y Esgrima La Plata" . + +ns1:Q188217 rdfs:label "SEAT" . + +ns1:Q201969 rdfs:label "FC Winterthur" . + +ns1:Q2042878 rdfs:label "Aktiv SK" . + +ns1:Q208552 rdfs:label "Molde FK" . + +ns1:Q212564 rdfs:label "Colombia national football team" . + +ns1:Q219602 rdfs:label "New York Jets" . + +ns1:Q2220788 rdfs:label "UDA Gramenet" . + +ns1:Q2367373 rdfs:label "NYU Violets" . + +ns1:Q2385504 rdfs:label "Q2385504" . + +ns1:Q2494171 rdfs:label "FC Dynamo Makhachkala" . + +ns1:Q267245 rdfs:label "Portugal national association football team" . + +ns1:Q28214543 rdfs:label "Trabzonspor" . + +ns1:Q289029 rdfs:label "U.S. Ancona" . + +ns1:Q2931573 rdfs:label "CJD Feuerbach" . + +ns1:Q297430 rdfs:label "S.S. Arezzo" . + +ns1:Q300032 rdfs:label "Germany women's national volleyball team" . + +ns1:Q327172 rdfs:label "Club Atlético Huracán" . + +ns1:Q35896 rdfs:label "Lancia" . + +ns1:Q3589750 rdfs:label "Western Australia cricket team" . + +ns1:Q3590248 rdfs:label "Jamaica national cricket team" . + +ns1:Q3590581 rdfs:label "Leeward Islands cricket team" . + +ns1:Q3593958 rdfs:label "ŁKS Łódź" . + +ns1:Q378628 rdfs:label "England national rugby union team" . + +ns1:Q391984 rdfs:label "Millonarios" . + +ns1:Q393357 rdfs:label "BC Žalgiris" . + +ns1:Q4573629 rdfs:label "IK Tjalve" . + +ns1:Q461595 rdfs:label "Atlanta Braves" . + +ns1:Q4791461 rdfs:label "Arizona Wildcats football" . + +ns1:Q4841 rdfs:label "Lech Poznań" . + +ns1:Q55801 rdfs:label "New Zealand national rugby union team" . + +ns1:Q5676342 rdfs:label "Harvard Crimson men's ice hockey" . + +ns1:Q606773 rdfs:label "Dorogi FC" . + +ns1:Q629300 rdfs:label "Home Farm F.C." . + +ns1:Q650365 rdfs:label "Carrarese Calcio" . + +ns1:Q653772 rdfs:label "Pittsburgh Pirates" . + +ns1:Q655591 rdfs:label "FC Koeppchen Wormeldange" . + +ns1:Q6767 rdfs:label "U.S. Livorno 1915" . + +ns1:Q693092 rdfs:label "Racing FC Union Luxembourg" . + +ns1:Q7054630 rdfs:label "North Carolina Tar Heels football" . + +ns1:Q732885 rdfs:label "Salgótarjáni BTC" . + +ns1:Q737937 rdfs:label "Lyn 1896 FK" . + +ns1:Q75729 rdfs:label "Sporting CP" . + +ns1:Q79800 rdfs:label "Argentina national association football team" . + +ns1:Q81888 rdfs:label "AFC Ajax" . + +ns1:Q841245 rdfs:label "FC Argeș" . + +ns1:Q842134 rdfs:label "FC Sportul Studențesc București" . + +ns1:Q8749 rdfs:label "RC Celta de Vigo" . + +ns1:Q3091261 rdfs:label "FC Barcelona" . + +ns1:Q39988 rdfs:label "Yonsei University" . + +ns1:Q5225674 rdfs:label "Dartmouth Big Green football" . + +ns1:Q7156 rdfs:label "FC Barcelona" . diff --git a/tests/server/ingest/test_ingest_routes.py b/tests/server/ingest/test_ingest_routes.py index 896410a17f..3bdba834c7 100644 --- a/tests/server/ingest/test_ingest_routes.py +++ b/tests/server/ingest/test_ingest_routes.py @@ -19,6 +19,12 @@ def test_ingest_accepts_pdf_files(ingest_helper: IngestHelper) -> None: assert len(ingest_result.data) == 1 +def test_ingest_accepts_ttf_files(ingest_helper: IngestHelper) -> None: + path = Path(__file__).parents[0] / "test.ttl" + ingest_result = ingest_helper.ingest_file(path) + assert len(ingest_result.data) == 1 + + def test_ingest_list_returns_something_after_ingestion( test_client: TestClient, ingest_helper: IngestHelper ) -> None: