diff --git a/fern/docs/pages/manual/knowledge-graph.mdx b/fern/docs/pages/manual/knowledge-graph.mdx
new file mode 100644
index 0000000000..73ded12893
--- /dev/null
+++ b/fern/docs/pages/manual/knowledge-graph.mdx
@@ -0,0 +1,33 @@
+# GraphStore Providers
+PrivateGPT supports [Neo4J](https://neo4j.com/).
+
+In order to select one or the other, set the `graphstore.database` property in the `settings.yaml` file to `neo4j`.
+
+```yaml
+graphstore:
+ database: neo4j
+```
+
+## Neo4j
+
+Neo4j is a graph database management system that provides an efficient and scalable solution for storing and querying graph data.
+
+### Configuration
+
+To configure Neo4j as the graph store provider, specify the following parameters in the `settings.yaml` file:
+
+```yaml
+graphstore:
+ database: neo4j
+
+neo4j:
+ url: neo4j://localhost:7687
+ username: neo4j
+ password: password
+ database: neo4j
+```
+
+- **url**: The URL of the Neo4j server.
+- **username**: The username for accessing the Neo4j database.
+- **password**: The password for accessing the Neo4j database.
+- **database**: The name of the Neo4j database.
diff --git a/private_gpt/components/graph_store/__init__.py b/private_gpt/components/graph_store/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/private_gpt/components/graph_store/graph_store_component.py b/private_gpt/components/graph_store/graph_store_component.py
new file mode 100644
index 0000000000..2c646d8d90
--- /dev/null
+++ b/private_gpt/components/graph_store/graph_store_component.py
@@ -0,0 +1,77 @@
+import logging
+import typing
+
+from injector import inject, singleton
+from llama_index.core.graph_stores.types import (
+ GraphStore,
+)
+from llama_index.core.indices.knowledge_graph import (
+ KnowledgeGraphRAGRetriever,
+)
+from llama_index.core.llms.llm import LLM
+from llama_index.core.storage import StorageContext
+
+from private_gpt.settings.settings import Settings
+
+logger = logging.getLogger(__name__)
+
+
+@singleton
+class GraphStoreComponent:
+ settings: Settings
+ graph_store: GraphStore | None = None
+
+ @inject
+ def __init__(self, settings: Settings) -> None:
+ self.settings = settings
+
+ # If no graphstore is defined, return, making the graphstore optional
+ if settings.graphstore is None:
+ return
+
+ match settings.graphstore.database:
+ case "neo4j":
+ try:
+ from llama_index.graph_stores.neo4j import ( # type: ignore
+ Neo4jGraphStore,
+ )
+ except ImportError as e:
+ raise ImportError(
+ "Neo4j dependencies not found, install with `poetry install --extras graph-stores-neo4j`"
+ ) from e
+
+ if settings.neo4j is None:
+ raise ValueError(
+ "Neo4j settings not found. Please provide settings."
+ )
+
+ self.graph_store = typing.cast(
+ GraphStore,
+ Neo4jGraphStore(
+ **settings.neo4j.model_dump(exclude_none=True),
+ ), # TODO
+ )
+ case _:
+ # Should be unreachable
+ # The settings validator should have caught this
+ raise ValueError(
+ f"Vectorstore database {settings.vectorstore.database} not supported"
+ )
+
+ def get_knowledge_graph(
+ self,
+ storage_context: StorageContext,
+ llm: LLM,
+ ) -> KnowledgeGraphRAGRetriever:
+ if self.graph_store is None:
+ raise ValueError("GraphStore not defined in settings")
+
+ return KnowledgeGraphRAGRetriever(
+ storage_context=storage_context,
+ llm=llm,
+ verbose=True,
+ )
+
+ def close(self) -> None:
+ if self.graph_store and hasattr(self.graph_store.client, "close"):
+ self.graph_store.client.close()
diff --git a/private_gpt/components/ingest/ingest_component.py b/private_gpt/components/ingest/ingest_component.py
index 5ed0395906..54010d9240 100644
--- a/private_gpt/components/ingest/ingest_component.py
+++ b/private_gpt/components/ingest/ingest_component.py
@@ -9,11 +9,16 @@
from queue import Queue
from typing import Any
+from llama_index.core import KnowledgeGraphIndex
from llama_index.core.data_structs import IndexDict
from llama_index.core.embeddings.utils import EmbedType
-from llama_index.core.indices import VectorStoreIndex, load_index_from_storage
+from llama_index.core.indices import (
+ VectorStoreIndex,
+ load_index_from_storage,
+)
from llama_index.core.indices.base import BaseIndex
from llama_index.core.ingestion import run_transformations
+from llama_index.core.llms.llm import LLM
from llama_index.core.schema import BaseNode, Document, TransformComponent
from llama_index.core.storage import StorageContext
@@ -67,9 +72,13 @@ def __init__(
self._index_thread_lock = (
threading.Lock()
) # Thread lock! Not Multiprocessing lock
- self._index = self._initialize_index()
+ self._index = self._initialize_index(**kwargs)
+ self._knowledge_graph = self._initialize_knowledge_graph(**kwargs)
- def _initialize_index(self) -> BaseIndex[IndexDict]:
+ def _initialize_index(
+ self,
+ llm: LLM,
+ ) -> BaseIndex[IndexDict]:
"""Initialize the index from the storage context."""
try:
# Load the index with store_nodes_override=True to be able to delete them
@@ -79,6 +88,7 @@ def _initialize_index(self) -> BaseIndex[IndexDict]:
show_progress=self.show_progress,
embed_model=self.embed_model,
transformations=self.transformations,
+ llm=llm,
)
except ValueError:
# There are no index in the storage context, creating a new one
@@ -94,9 +104,34 @@ def _initialize_index(self) -> BaseIndex[IndexDict]:
index.storage_context.persist(persist_dir=local_data_path)
return index
+ def _initialize_knowledge_graph(
+ self,
+ llm: LLM,
+ max_triplets_per_chunk: int = 10,
+ include_embeddings: bool = True,
+ ) -> KnowledgeGraphIndex:
+ """Initialize the index from the storage context."""
+ index = KnowledgeGraphIndex.from_documents(
+ [],
+ storage_context=self.storage_context,
+ show_progress=self.show_progress,
+ embed_model=self.embed_model,
+ transformations=self.transformations,
+ llm=llm,
+ max_triplets_per_chunk=max_triplets_per_chunk,
+ include_embeddings=include_embeddings,
+ )
+ index.storage_context.persist(persist_dir=local_data_path)
+ return index
+
def _save_index(self) -> None:
+ logger.debug("Persisting the index")
self._index.storage_context.persist(persist_dir=local_data_path)
+ def _save_knowledge_graph(self) -> None:
+ logger.debug("Persisting the knowledge graph")
+ self._knowledge_graph.storage_context.persist(persist_dir=local_data_path)
+
def delete(self, doc_id: str) -> None:
with self._index_thread_lock:
# Delete the document from the index
@@ -105,6 +140,12 @@ def delete(self, doc_id: str) -> None:
# Save the index
self._save_index()
+ # Delete the document from the knowledge graph
+ self._knowledge_graph.delete_ref_doc(doc_id, delete_from_docstore=True)
+
+ # Save the knowledge graph
+ self._save_knowledge_graph()
+
class SimpleIngestComponent(BaseIngestComponentWithIndex):
def __init__(
@@ -138,14 +179,35 @@ def bulk_ingest(self, files: list[tuple[str, Path]]) -> list[Document]:
def _save_docs(self, documents: list[Document]) -> list[Document]:
logger.debug("Transforming count=%s documents into nodes", len(documents))
with self._index_thread_lock:
- for document in documents:
- self._index.insert(document, show_progress=True)
- logger.debug("Persisting the index and nodes")
- # persist the index and nodes
- self._save_index()
+ logger.debug("Persisting the index and nodes in the vector store")
+ self._save_to_index(documents)
+
+ logger.debug("Persisting the index and nodes in the knowledge graph")
+ self._save_to_knowledge_graph(documents)
+
logger.debug("Persisted the index and nodes")
return documents
+ def _save_to_index(self, documents: list[Document]) -> None:
+ logger.debug("Inserting count=%s documents in the index", len(documents))
+ for document in documents:
+ logger.info("Inserting document=%s in the index", document)
+ self._index.insert(document, show_progress=True)
+ self._save_index()
+ pass
+
+ def _save_to_knowledge_graph(self, documents: list[Document]) -> None:
+ logger.debug(
+ "Inserting count=%s documents in the knowledge graph", len(documents)
+ )
+ for document in [
+ d for d in documents if d.extra_info.get("graph_type", None) is not None
+ ]:
+ logger.info("Inserting document=%s in the knowledge graph", document)
+ logger.info("Document=%s", document.extra_info)
+ self._knowledge_graph.insert(document, show_progress=True)
+ self._save_knowledge_graph()
+
class BatchIngestComponent(BaseIngestComponentWithIndex):
"""Parallelize the file reading and parsing on multiple CPU core.
@@ -485,6 +547,8 @@ def get_ingestion_component(
embed_model: EmbedType,
transformations: list[TransformComponent],
settings: Settings,
+ *args: Any,
+ **kwargs: Any,
) -> BaseIngestComponent:
"""Get the ingestion component for the given configuration."""
ingest_mode = settings.embedding.ingest_mode
@@ -494,6 +558,7 @@ def get_ingestion_component(
embed_model=embed_model,
transformations=transformations,
count_workers=settings.embedding.count_workers,
+ llm=kwargs.get("llm"),
)
elif ingest_mode == "parallel":
return ParallelizedIngestComponent(
@@ -501,6 +566,7 @@ def get_ingestion_component(
embed_model=embed_model,
transformations=transformations,
count_workers=settings.embedding.count_workers,
+ llm=kwargs.get("llm"),
)
elif ingest_mode == "pipeline":
return PipelineIngestComponent(
@@ -508,10 +574,12 @@ def get_ingestion_component(
embed_model=embed_model,
transformations=transformations,
count_workers=settings.embedding.count_workers,
+ llm=kwargs.get("llm"),
)
else:
return SimpleIngestComponent(
storage_context=storage_context,
embed_model=embed_model,
transformations=transformations,
+ llm=kwargs.get("llm"),
)
diff --git a/private_gpt/components/ingest/ingest_helper.py b/private_gpt/components/ingest/ingest_helper.py
index a110907022..46666d4393 100644
--- a/private_gpt/components/ingest/ingest_helper.py
+++ b/private_gpt/components/ingest/ingest_helper.py
@@ -27,6 +27,10 @@ def _try_loading_included_file_formats() -> dict[str, type[BaseReader]]:
from llama_index.readers.file.video_audio import ( # type: ignore
VideoAudioReader,
)
+
+ from private_gpt.components.ingest.readers.rdfreader import ( # type: ignore
+ RDFReader,
+ )
except ImportError as e:
raise ImportError("`llama-index-readers-file` package not found") from e
@@ -48,7 +52,10 @@ def _try_loading_included_file_formats() -> dict[str, type[BaseReader]]:
".mbox": MboxReader,
".ipynb": IPYNBReader,
}
- return default_file_reader_cls
+ optional_file_reader_cls: dict[str, type[BaseReader]] = {
+ ".ttl": RDFReader,
+ }
+ return {**default_file_reader_cls, **optional_file_reader_cls}
# Patching the default file reader to support other file types
diff --git a/private_gpt/components/ingest/readers/__init__.py b/private_gpt/components/ingest/readers/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/private_gpt/components/ingest/readers/rdfreader.py b/private_gpt/components/ingest/readers/rdfreader.py
new file mode 100644
index 0000000000..eda34d3a40
--- /dev/null
+++ b/private_gpt/components/ingest/readers/rdfreader.py
@@ -0,0 +1,92 @@
+# mypy: ignore-errors
+
+"""Read RDF files.
+
+This module is used to read RDF files.
+It was created by llama-hub but it has not been ported
+to llama-index==0.1.0 with multiples changes to fix the code.
+
+Original code:
+https://github.com/run-llama/llama-hub
+"""
+
+import logging
+from pathlib import Path
+from typing import Any
+
+from llama_index.core.readers.base import BaseReader
+from llama_index.core.schema import Document
+from rdflib import Graph, URIRef
+from rdflib.namespace import RDF, RDFS
+
+logger = logging.getLogger(__name__)
+
+
+class RDFReader(BaseReader):
+ """RDF reader."""
+
+ def __init__(
+ self,
+ *args: Any,
+ **kwargs: Any,
+ ) -> None:
+ """Initialize loader."""
+ super().__init__(*args, **kwargs)
+
+ def fetch_labels(self, uri: URIRef, graph: Graph, lang: str):
+ """Fetch all labels of a URI by language."""
+ return list(
+ filter(lambda x: x.language in [lang, None], graph.objects(uri, RDFS.label))
+ )
+
+ def fetch_label_in_graphs(self, uri: URIRef, lang: str = "en"):
+ """Fetch one label of a URI by language from the local or global graph."""
+ labels = self.fetch_labels(uri, self.g_local, lang)
+ if len(labels) > 0:
+ return labels[0].value
+
+ labels = self.fetch_labels(uri, self.g_global, lang)
+ if len(labels) > 0:
+ return labels[0].value
+
+ return str(uri)
+
+ def load_data(self, file: Path, extra_info: dict | None = None) -> list[Document]:
+ """Parse file."""
+ extra_info = extra_info or {}
+ extra_info["graph_type"] = "rdf"
+ lang = (
+ extra_info["lang"]
+ if extra_info is not None and "lang" in extra_info
+ else "en"
+ )
+
+ self.g_local = Graph()
+ self.g_local.parse(file)
+
+ self.g_global = Graph()
+ self.g_global.parse(str(RDF))
+ self.g_global.parse(str(RDFS))
+
+ text_list = []
+
+ for s, p, o in self.g_local:
+ logger.debug("s=%s, p=%s, o=%s", s, p, o)
+ if p == RDFS.label:
+ continue
+
+ subj_label = self.fetch_label_in_graphs(s, lang=lang)
+ pred_label = self.fetch_label_in_graphs(p, lang=lang)
+ obj_label = self.fetch_label_in_graphs(o, lang=lang)
+
+ if subj_label is None or pred_label is None or obj_label is None:
+ continue
+
+ triple = f"<{subj_label}> " f"<{pred_label}> " f"<{obj_label}>"
+ text_list.append(triple)
+
+ text = "\n".join(text_list)
+ return [self._text_to_document(text, extra_info)]
+
+ def _text_to_document(self, text: str, extra_info: dict | None = None) -> Document:
+ return Document(text=text, extra_info=extra_info or {})
diff --git a/private_gpt/server/chat/chat_service.py b/private_gpt/server/chat/chat_service.py
index ea57f2c0d0..584073b9b0 100644
--- a/private_gpt/server/chat/chat_service.py
+++ b/private_gpt/server/chat/chat_service.py
@@ -11,11 +11,15 @@
from llama_index.core.postprocessor import (
SimilarityPostprocessor,
)
+from llama_index.core.retrievers.router_retriever import RouterRetriever
+from llama_index.core.selectors import LLMSingleSelector
from llama_index.core.storage import StorageContext
+from llama_index.core.tools.retriever_tool import RetrieverTool
from llama_index.core.types import TokenGen
from pydantic import BaseModel
from private_gpt.components.embedding.embedding_component import EmbeddingComponent
+from private_gpt.components.graph_store.graph_store_component import GraphStoreComponent
from private_gpt.components.llm.llm_component import LLMComponent
from private_gpt.components.node_store.node_store_component import NodeStoreComponent
from private_gpt.components.vector_store.vector_store_component import (
@@ -82,6 +86,7 @@ def __init__(
vector_store_component: VectorStoreComponent,
embedding_component: EmbeddingComponent,
node_store_component: NodeStoreComponent,
+ graph_store_component: GraphStoreComponent,
) -> None:
self.settings = settings
self.llm_component = llm_component
@@ -89,6 +94,9 @@ def __init__(
self.vector_store_component = vector_store_component
self.storage_context = StorageContext.from_defaults(
vector_store=vector_store_component.vector_store,
+ graph_store=graph_store_component.graph_store
+ if graph_store_component and graph_store_component.graph_store
+ else None,
docstore=node_store_component.doc_store,
index_store=node_store_component.index_store,
)
@@ -99,6 +107,8 @@ def __init__(
embed_model=embedding_component.embedding_model,
show_progress=True,
)
+ self.graph_store_component = graph_store_component
+ self.knowledge_graph_index = graph_store_component.graph_store
def _chat_engine(
self,
@@ -113,9 +123,28 @@ def _chat_engine(
context_filter=context_filter,
similarity_top_k=self.settings.rag.similarity_top_k,
)
+ graph_knowledge_retrevier = self.graph_store_component.get_knowledge_graph(
+ llm=self.llm_component.llm,
+ storage_context=self.storage_context,
+ )
+
+ retrievers = [
+ r for r in [vector_index_retriever, graph_knowledge_retrevier] if r
+ ]
+ retriever = RouterRetriever.from_defaults(
+ retriever_tools=[
+ RetrieverTool.from_defaults(retriever) for retriever in retrievers
+ ],
+ llm=self.llm_component.llm,
+ selector=LLMSingleSelector.from_defaults(
+ llm=self.llm_component.llm
+ ), # TODO: Could be LLMMultiSelector if needed
+ select_multi=len(retrievers) > 1,
+ )
+
return ContextChatEngine.from_defaults(
system_prompt=system_prompt,
- retriever=vector_index_retriever,
+ retriever=retriever,
llm=self.llm_component.llm, # Takes no effect at the moment
node_postprocessors=[
MetadataReplacementPostProcessor(target_metadata_key="window"),
diff --git a/private_gpt/server/chunks/chunks_service.py b/private_gpt/server/chunks/chunks_service.py
index 7bda5d9046..26a1e7a022 100644
--- a/private_gpt/server/chunks/chunks_service.py
+++ b/private_gpt/server/chunks/chunks_service.py
@@ -7,6 +7,7 @@
from pydantic import BaseModel, Field
from private_gpt.components.embedding.embedding_component import EmbeddingComponent
+from private_gpt.components.graph_store.graph_store_component import GraphStoreComponent
from private_gpt.components.llm.llm_component import LLMComponent
from private_gpt.components.node_store.node_store_component import NodeStoreComponent
from private_gpt.components.vector_store.vector_store_component import (
@@ -60,6 +61,7 @@ def __init__(
self,
llm_component: LLMComponent,
vector_store_component: VectorStoreComponent,
+ graph_store_component: GraphStoreComponent,
embedding_component: EmbeddingComponent,
node_store_component: NodeStoreComponent,
) -> None:
@@ -68,6 +70,9 @@ def __init__(
self.embedding_component = embedding_component
self.storage_context = StorageContext.from_defaults(
vector_store=vector_store_component.vector_store,
+ graph_store=graph_store_component.graph_store
+ if graph_store_component and graph_store_component.graph_store
+ else None,
docstore=node_store_component.doc_store,
index_store=node_store_component.index_store,
)
diff --git a/private_gpt/server/ingest/ingest_service.py b/private_gpt/server/ingest/ingest_service.py
index f9ae4728f1..0243c0e917 100644
--- a/private_gpt/server/ingest/ingest_service.py
+++ b/private_gpt/server/ingest/ingest_service.py
@@ -8,6 +8,7 @@
from llama_index.core.storage import StorageContext
from private_gpt.components.embedding.embedding_component import EmbeddingComponent
+from private_gpt.components.graph_store.graph_store_component import GraphStoreComponent
from private_gpt.components.ingest.ingest_component import get_ingestion_component
from private_gpt.components.llm.llm_component import LLMComponent
from private_gpt.components.node_store.node_store_component import NodeStoreComponent
@@ -30,12 +31,16 @@ def __init__(
self,
llm_component: LLMComponent,
vector_store_component: VectorStoreComponent,
+ graph_store_component: GraphStoreComponent,
embedding_component: EmbeddingComponent,
node_store_component: NodeStoreComponent,
) -> None:
self.llm_service = llm_component
self.storage_context = StorageContext.from_defaults(
vector_store=vector_store_component.vector_store,
+ graph_store=graph_store_component.graph_store
+ if graph_store_component and graph_store_component.graph_store
+ else None,
docstore=node_store_component.doc_store,
index_store=node_store_component.index_store,
)
@@ -46,6 +51,7 @@ def __init__(
embed_model=embedding_component.embedding_model,
transformations=[node_parser, embedding_component.embedding_model],
settings=settings(),
+ llm=self.llm_service.llm,
)
def _ingest_data(self, file_name: str, file_data: AnyStr) -> list[IngestedDoc]:
diff --git a/private_gpt/settings/settings.py b/private_gpt/settings/settings.py
index 5896f00d6d..fc9be51898 100644
--- a/private_gpt/settings/settings.py
+++ b/private_gpt/settings/settings.py
@@ -114,6 +114,10 @@ class NodeStoreSettings(BaseModel):
database: Literal["simple", "postgres"]
+class GraphStoreSettings(BaseModel):
+ database: Literal["neo4j"]
+
+
class LlamaCPPSettings(BaseModel):
llm_hf_repo_id: str
llm_hf_model_file: str
@@ -376,6 +380,25 @@ class QdrantSettings(BaseModel):
)
+class Neo4jSettings(BaseModel):
+ url: str | None = Field(
+ "bolt://localhost:7687",
+ description="URL of the Neo4j database.",
+ )
+ username: str | None = Field(
+ "neo4j",
+ description="Username to connect to the Neo4j database.",
+ )
+ password: str | None = Field(
+ "password",
+ description="Password to connect to the Neo4j database.",
+ )
+ database: str | None = Field(
+ "neo4j",
+ description="Database name to connect to the Neo4j database.",
+ )
+
+
class Settings(BaseModel):
server: ServerSettings
data: DataSettings
@@ -389,10 +412,12 @@ class Settings(BaseModel):
ollama: OllamaSettings
azopenai: AzureOpenAISettings
vectorstore: VectorstoreSettings
+ graphstore: GraphStoreSettings | None = None
nodestore: NodeStoreSettings
rag: RagSettings
qdrant: QdrantSettings | None = None
postgres: PostgresSettings | None = None
+ neo4j: Neo4jSettings | None = None
"""
diff --git a/pyproject.toml b/pyproject.toml
index d568999879..342193dd5b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -57,6 +57,7 @@ vector-stores-qdrant = ["llama-index-vector-stores-qdrant"]
vector-stores-chroma = ["llama-index-vector-stores-chroma"]
vector-stores-postgres = ["llama-index-vector-stores-postgres"]
storage-nodestore-postgres = ["llama-index-storage-docstore-postgres","llama-index-storage-index-store-postgres","psycopg2-binary","asyncpg"]
+graph-stores-neo4j = ["llama-index-graph-stores-neo4j"]
[tool.poetry.group.dev.dependencies]
black = "^22"
diff --git a/settings-ollama.yaml b/settings-ollama.yaml
index d7e1a12ca0..fabaf5dfa0 100644
--- a/settings-ollama.yaml
+++ b/settings-ollama.yaml
@@ -9,6 +9,7 @@ llm:
embedding:
mode: ollama
+ ingest_mode: simple # Enabled simple that stores the embeddings and triples in the graphstore.
ollama:
llm_model: mistral
@@ -24,5 +25,14 @@ ollama:
vectorstore:
database: qdrant
+graphstore:
+ database: neo4j
+
qdrant:
path: local_data/private_gpt/qdrant
+
+neo4j:
+ url: neo4j://localhost:7687
+ username: neo4j
+ password: password
+ database: neo4j
diff --git a/tests/server/ingest/test.ttl b/tests/server/ingest/test.ttl
new file mode 100644
index 0000000000..99a7433888
--- /dev/null
+++ b/tests/server/ingest/test.ttl
@@ -0,0 +1,358 @@
+@prefix ns1: .
+@prefix rdfs: .
+
+ns1:Q1044339 rdfs:label "Valeriano Balloni" ;
+ ns1:hasTeam ns1:Q13385,
+ ns1:Q289029,
+ ns1:Q297430,
+ ns1:Q650365,
+ ns1:Q6767 .
+
+ns1:Q110992321 rdfs:label "Tim Karius" ;
+ ns1:hasTeam ns1:Q1387210,
+ ns1:Q655591 .
+
+ns1:Q12402730 rdfs:label "Xoaquín Álvarez Corbacho" ;
+ ns1:hasTeam ns1:Q8749 .
+
+ns1:Q12813965 rdfs:label "József Cserháti" ;
+ ns1:hasTeam ns1:Q732885 .
+
+ns1:Q13101502 rdfs:label "Alphonse Weicker" ;
+ ns1:hasTeam ns1:Q184266,
+ ns1:Q693092 .
+
+ns1:Q1497593 rdfs:label "Gediminas Budnikas" ;
+ ns1:hasTeam ns1:Q393357 .
+
+ns1:Q1531063 rdfs:label "Glenn W. Harrison" ;
+ ns1:hasTeam ns1:Q1034556 .
+
+ns1:Q16081110 rdfs:label "Moon Hyung-pyo" ;
+ ns1:hasTeam ns1:Q39988 .
+
+ns1:Q16091117 rdfs:label "Lee Ju-yeol" ;
+ ns1:hasTeam ns1:Q39988 .
+
+ns1:Q16299411 rdfs:label "Juha Joenväärä" ;
+ ns1:hasTeam ns1:Q1130636,
+ ns1:Q1232297 .
+
+ns1:Q1686485 rdfs:label "Jeff Immelt" ;
+ ns1:hasTeam ns1:Q5225674 .
+
+ns1:Q16942062 rdfs:label "Tito Montaño" ;
+ ns1:hasTeam ns1:Q127925 .
+
+ns1:Q1776728 rdfs:label "Svein Gjedrem" ;
+ ns1:hasTeam ns1:Q737937 .
+
+ns1:Q17917747 rdfs:label "Noel Newton Nethersole" ;
+ ns1:hasTeam ns1:Q3590248 .
+
+ns1:Q18541191 rdfs:label "Adalbert Kassai" ;
+ ns1:hasTeam ns1:Q1135735,
+ ns1:Q1195647,
+ ns1:Q1386940,
+ ns1:Q1689705,
+ ns1:Q841245,
+ ns1:Q842134 .
+
+ns1:Q18562973 rdfs:label "István Hagelmayer" ;
+ ns1:hasTeam ns1:Q606773 .
+
+ns1:Q192533 rdfs:label "Mark Carney" ;
+ ns1:hasTeam ns1:Q5676342 .
+
+ns1:Q1930105 rdfs:label "Michaela Vosbeck" ;
+ ns1:hasTeam ns1:Q1715018,
+ ns1:Q1792079,
+ ns1:Q2931573,
+ ns1:Q300032 .
+
+ns1:Q202693 rdfs:label "Jo Nesbø" ;
+ ns1:hasTeam ns1:Q208552 .
+
+ns1:Q2055385 rdfs:label "Alexandre Baptista" ;
+ ns1:hasTeam ns1:Q267245,
+ ns1:Q75729 .
+
+ns1:Q22003558 rdfs:label "Colin Cannonier" ;
+ ns1:hasTeam ns1:Q3590581 .
+
+ns1:Q2535499 rdfs:label "Tadao Horie" ;
+ ns1:hasTeam ns1:Q170566 .
+
+ns1:Q27491470 rdfs:label "Telesfor Banaszkiewicz" ;
+ ns1:hasTeam ns1:Q11821053,
+ ns1:Q1198772 .
+
+ns1:Q30308976 rdfs:label "Thomas Howden Fraser" ;
+ ns1:hasTeam ns1:Q117467 .
+
+ns1:Q311025 rdfs:label "Henry Paulson" ;
+ ns1:hasTeam ns1:Q5225674 .
+
+ns1:Q3132658 rdfs:label "Henry Braddon" ;
+ ns1:hasTeam ns1:Q55801 .
+
+ns1:Q313682 rdfs:label "Oleguer Presas" ;
+ ns1:hasTeam ns1:Q10467,
+ ns1:Q17228,
+ ns1:Q2220788,
+ ns1:Q7156,
+ ns1:Q81888 .
+
+ns1:Q3470333 rdfs:label "Salvador Servià i Costa" ;
+ ns1:hasTeam ns1:Q188217,
+ ns1:Q35896 .
+
+ns1:Q354317 rdfs:label "Vebjørn Rodal" ;
+ ns1:hasTeam ns1:Q11993950 .
+
+ns1:Q3592042 rdfs:label "Étienne Antonelli" ;
+ ns1:hasTeam ns1:Q132885 .
+
+ns1:Q3808555 rdfs:label "Joan Trayter" ;
+ ns1:hasTeam ns1:Q3091261 .
+
+ns1:Q4011129 rdfs:label "Vicente Locaso" ;
+ ns1:hasTeam ns1:Q15799,
+ ns1:Q18640,
+ ns1:Q327172,
+ ns1:Q79800 .
+
+ns1:Q457755 rdfs:label "Alfred Lawson" ;
+ ns1:hasTeam ns1:Q461595,
+ ns1:Q653772 .
+
+ns1:Q4908745 rdfs:label "Bill Demory" ;
+ ns1:hasTeam ns1:Q219602,
+ ns1:Q4791461 .
+
+ns1:Q4939229 rdfs:label "Bolesław Banaś" ;
+ ns1:hasTeam ns1:Q3593958 .
+
+ns1:Q4961008 rdfs:label "Brendan Menton, Sr." ;
+ ns1:hasTeam ns1:Q629300 .
+
+ns1:Q4968933 rdfs:label "Rune Gerhardsen" ;
+ ns1:hasTeam ns1:Q2042878 .
+
+ns1:Q5405396 rdfs:label "Alejandro Brand" ;
+ ns1:hasTeam ns1:Q212564,
+ ns1:Q391984 .
+
+ns1:Q559712 rdfs:label "Magomedsalam Magomedov" ;
+ ns1:hasTeam ns1:Q2494171 .
+
+ns1:Q60735037 rdfs:label "Peter Morgan" ;
+ ns1:hasTeam ns1:Q18516 .
+
+ns1:Q6148645 rdfs:label "Tomás Soley Güell" ;
+ ns1:hasTeam ns1:Q7156 .
+
+ns1:Q65624037 rdfs:label "Thomas Staub" ;
+ ns1:hasTeam ns1:Q201969 .
+
+ns1:Q6708659 rdfs:label "Lyndhurst Falkiner Giblin" ;
+ ns1:hasTeam ns1:Q378628 .
+
+ns1:Q7172847 rdfs:label "Peter Henry" ;
+ ns1:hasTeam ns1:Q7054630 .
+
+ns1:Q7193582 rdfs:label "Pike Curtin" ;
+ ns1:hasTeam ns1:Q3589750 .
+
+ns1:Q732476 rdfs:label "Xavier Sala-i-Martin" ;
+ ns1:hasTeam ns1:Q3091261 .
+
+ns1:Q7436183 rdfs:label "Scott Cowen" ;
+ ns1:hasTeam ns1:Q16959086 .
+
+ns1:Q75748 rdfs:label "Hans Tietmeyer" ;
+ ns1:hasTeam ns1:Q2385504 .
+
+ns1:Q769073 rdfs:label "W. Morrissey" ;
+ ns1:hasTeam ns1:Q2367373 .
+
+ns1:Q84218605 rdfs:label "José María Echevarría Arteche" ;
+ ns1:hasTeam ns1:Q1103198 .
+
+ns1:Q8667562 rdfs:label "Valerijonas Balčiūnas" ;
+ ns1:hasTeam ns1:Q186276 .
+
+ns1:Q89141301 rdfs:label "Anna Potok" ;
+ ns1:hasTeam ns1:Q4841 .
+
+ns1:Q9199508 rdfs:label "Czesława Pilarska" ;
+ ns1:hasTeam ns1:Q11733016 .
+
+ns1:Q947814 rdfs:label "Steinar Hoen" ;
+ ns1:hasTeam ns1:Q4573629 .
+
+ns1:Q963421 rdfs:label "Carl-Henric Svanberg" ;
+ ns1:hasTeam ns1:Q1653574 .
+
+ns1:Q98072140 rdfs:label "Q98072140" ;
+ ns1:hasTeam ns1:Q28214543 .
+
+ns1:Q1034556 rdfs:label "Hawthorn Football Club" .
+
+ns1:Q10467 rdfs:label "FC Barcelona Atlètic" .
+
+ns1:Q1103198 rdfs:label "Club de Campo Villa de Madrid" .
+
+ns1:Q1130636 rdfs:label "Oulun Kärpät" .
+
+ns1:Q1135735 rdfs:label "CS Corvinul Hunedoara" .
+
+ns1:Q11733016 rdfs:label "Stilon Gorzów Wielkopolski" .
+
+ns1:Q117467 rdfs:label "Royal Society of Edinburgh" .
+
+ns1:Q11821053 rdfs:label "Q11821053" .
+
+ns1:Q1195647 rdfs:label "FC Progresul București" .
+
+ns1:Q1198772 rdfs:label "Warta Poznań" .
+
+ns1:Q11993950 rdfs:label "Oppdal IL" .
+
+ns1:Q1232297 rdfs:label "Djurgårdens IF Hockey" .
+
+ns1:Q127925 rdfs:label "Club Aurora" .
+
+ns1:Q132885 rdfs:label "Olympique de Marseille" .
+
+ns1:Q13385 rdfs:label "Società Polisportiva Ars et Labor" .
+
+ns1:Q1386940 rdfs:label "FC Bihor Oradea" .
+
+ns1:Q1387210 rdfs:label "FC Jeunesse Canach" .
+
+ns1:Q15799 rdfs:label "Club Atlético River Plate" .
+
+ns1:Q1653574 rdfs:label "IF Björklöven" .
+
+ns1:Q1689705 rdfs:label "FC Jiul Petroșani" .
+
+ns1:Q16959086 rdfs:label "UConn Huskies football" .
+
+ns1:Q170566 rdfs:label "Japan national football team" .
+
+ns1:Q1715018 rdfs:label "TV Hörde" .
+
+ns1:Q17228 rdfs:label "Catalonia national football team" .
+
+ns1:Q1792079 rdfs:label "VC Schwerte" .
+
+ns1:Q184266 rdfs:label "Luxembourg national football team" .
+
+ns1:Q18516 rdfs:label "Hereford United F.C." .
+
+ns1:Q186276 rdfs:label "Lithuania national football team" .
+
+ns1:Q18640 rdfs:label "Gimnasia y Esgrima La Plata" .
+
+ns1:Q188217 rdfs:label "SEAT" .
+
+ns1:Q201969 rdfs:label "FC Winterthur" .
+
+ns1:Q2042878 rdfs:label "Aktiv SK" .
+
+ns1:Q208552 rdfs:label "Molde FK" .
+
+ns1:Q212564 rdfs:label "Colombia national football team" .
+
+ns1:Q219602 rdfs:label "New York Jets" .
+
+ns1:Q2220788 rdfs:label "UDA Gramenet" .
+
+ns1:Q2367373 rdfs:label "NYU Violets" .
+
+ns1:Q2385504 rdfs:label "Q2385504" .
+
+ns1:Q2494171 rdfs:label "FC Dynamo Makhachkala" .
+
+ns1:Q267245 rdfs:label "Portugal national association football team" .
+
+ns1:Q28214543 rdfs:label "Trabzonspor" .
+
+ns1:Q289029 rdfs:label "U.S. Ancona" .
+
+ns1:Q2931573 rdfs:label "CJD Feuerbach" .
+
+ns1:Q297430 rdfs:label "S.S. Arezzo" .
+
+ns1:Q300032 rdfs:label "Germany women's national volleyball team" .
+
+ns1:Q327172 rdfs:label "Club Atlético Huracán" .
+
+ns1:Q35896 rdfs:label "Lancia" .
+
+ns1:Q3589750 rdfs:label "Western Australia cricket team" .
+
+ns1:Q3590248 rdfs:label "Jamaica national cricket team" .
+
+ns1:Q3590581 rdfs:label "Leeward Islands cricket team" .
+
+ns1:Q3593958 rdfs:label "ŁKS Łódź" .
+
+ns1:Q378628 rdfs:label "England national rugby union team" .
+
+ns1:Q391984 rdfs:label "Millonarios" .
+
+ns1:Q393357 rdfs:label "BC Žalgiris" .
+
+ns1:Q4573629 rdfs:label "IK Tjalve" .
+
+ns1:Q461595 rdfs:label "Atlanta Braves" .
+
+ns1:Q4791461 rdfs:label "Arizona Wildcats football" .
+
+ns1:Q4841 rdfs:label "Lech Poznań" .
+
+ns1:Q55801 rdfs:label "New Zealand national rugby union team" .
+
+ns1:Q5676342 rdfs:label "Harvard Crimson men's ice hockey" .
+
+ns1:Q606773 rdfs:label "Dorogi FC" .
+
+ns1:Q629300 rdfs:label "Home Farm F.C." .
+
+ns1:Q650365 rdfs:label "Carrarese Calcio" .
+
+ns1:Q653772 rdfs:label "Pittsburgh Pirates" .
+
+ns1:Q655591 rdfs:label "FC Koeppchen Wormeldange" .
+
+ns1:Q6767 rdfs:label "U.S. Livorno 1915" .
+
+ns1:Q693092 rdfs:label "Racing FC Union Luxembourg" .
+
+ns1:Q7054630 rdfs:label "North Carolina Tar Heels football" .
+
+ns1:Q732885 rdfs:label "Salgótarjáni BTC" .
+
+ns1:Q737937 rdfs:label "Lyn 1896 FK" .
+
+ns1:Q75729 rdfs:label "Sporting CP" .
+
+ns1:Q79800 rdfs:label "Argentina national association football team" .
+
+ns1:Q81888 rdfs:label "AFC Ajax" .
+
+ns1:Q841245 rdfs:label "FC Argeș" .
+
+ns1:Q842134 rdfs:label "FC Sportul Studențesc București" .
+
+ns1:Q8749 rdfs:label "RC Celta de Vigo" .
+
+ns1:Q3091261 rdfs:label "FC Barcelona" .
+
+ns1:Q39988 rdfs:label "Yonsei University" .
+
+ns1:Q5225674 rdfs:label "Dartmouth Big Green football" .
+
+ns1:Q7156 rdfs:label "FC Barcelona" .
diff --git a/tests/server/ingest/test_ingest_routes.py b/tests/server/ingest/test_ingest_routes.py
index 896410a17f..3bdba834c7 100644
--- a/tests/server/ingest/test_ingest_routes.py
+++ b/tests/server/ingest/test_ingest_routes.py
@@ -19,6 +19,12 @@ def test_ingest_accepts_pdf_files(ingest_helper: IngestHelper) -> None:
assert len(ingest_result.data) == 1
+def test_ingest_accepts_ttf_files(ingest_helper: IngestHelper) -> None:
+ path = Path(__file__).parents[0] / "test.ttl"
+ ingest_result = ingest_helper.ingest_file(path)
+ assert len(ingest_result.data) == 1
+
+
def test_ingest_list_returns_something_after_ingestion(
test_client: TestClient, ingest_helper: IngestHelper
) -> None: