diff --git a/apps/server/src/db.ts b/apps/server/src/db.ts index 900f54f..367b9a7 100644 --- a/apps/server/src/db.ts +++ b/apps/server/src/db.ts @@ -54,6 +54,18 @@ export async function initDb(): Promise { updated_at INTEGER NOT NULL ); + CREATE TABLE IF NOT EXISTS wikidata_cache_search ( + query_hash TEXT PRIMARY KEY, + response_json TEXT NOT NULL, + updated_at INTEGER NOT NULL + ); + + CREATE TABLE IF NOT EXISTS wikidata_cache_game ( + qid TEXT PRIMARY KEY, + response_json TEXT NOT NULL, + updated_at INTEGER NOT NULL + ); + CREATE TABLE IF NOT EXISTS library_items ( id INTEGER PRIMARY KEY AUTOINCREMENT, path TEXT NOT NULL UNIQUE, @@ -310,3 +322,37 @@ export function setCachedEntry(slug: string, json: string): void { ) .run(slug, json, Date.now()); } + +// Wikidata cache functions + +export function getCachedWikidataSearch(queryHash: string): { json: string; updatedAt: number } | null { + const row = getDb() + .prepare("SELECT response_json as json, updated_at as updatedAt FROM wikidata_cache_search WHERE query_hash = ?") + .get(queryHash) as { json: string; updatedAt: number } | undefined; + return row ?? null; +} + +export function setCachedWikidataSearch(queryHash: string, json: string): void { + getDb() + .prepare( + "INSERT INTO wikidata_cache_search (query_hash, response_json, updated_at) VALUES (?, ?, ?) " + + "ON CONFLICT(query_hash) DO UPDATE SET response_json = excluded.response_json, updated_at = excluded.updated_at" + ) + .run(queryHash, json, Date.now()); +} + +export function getCachedWikidataGame(qid: string): { json: string; updatedAt: number } | null { + const row = getDb() + .prepare("SELECT response_json as json, updated_at as updatedAt FROM wikidata_cache_game WHERE qid = ?") + .get(qid) as { json: string; updatedAt: number } | undefined; + return row ?? null; +} + +export function setCachedWikidataGame(qid: string, json: string): void { + getDb() + .prepare( + "INSERT INTO wikidata_cache_game (qid, response_json, updated_at) VALUES (?, ?, ?) " + + "ON CONFLICT(qid) DO UPDATE SET response_json = excluded.response_json, updated_at = excluded.updated_at" + ) + .run(qid, json, Date.now()); +} diff --git a/apps/server/src/providers/wikidata/README.md b/apps/server/src/providers/wikidata/README.md new file mode 100644 index 0000000..5c64eea --- /dev/null +++ b/apps/server/src/providers/wikidata/README.md @@ -0,0 +1,224 @@ +# Wikidata Metadata Provider + +A free, open metadata provider for video games using Wikidata's SPARQL endpoint. + +## Features + +- ✅ **No API Key Required** - Free and open access to Wikidata +- ✅ **Rich Metadata** - Title, platforms, genres, publishers, series, release date +- ✅ **Intelligent Matching** - Name normalization and ranking with platform boost +- ✅ **Aggressive Caching** - 21-day TTL for search and game results +- ✅ **Rate Limiting** - Configurable rate limit (default: 1 req/sec) +- ✅ **Offline Support** - Works from cache when network unavailable + +## Architecture + +``` +WikidataProvider +├── client.ts - HTTP client with rate limiting +├── queryBuilder.ts - SPARQL query templates +├── mapper.ts - SPARQL → GameMetadata transformation +├── normalizer.ts - Name normalization and result ranking +└── provider.ts - MetadataProvider implementation +``` + +## Usage + +### Basic Search + +```typescript +import { WikidataProvider } from './providers/wikidata'; + +const provider = new WikidataProvider(); + +// Search for games +const games = await provider.searchGames("super mario"); + +console.log(games[0]); +// { +// source: "wikidata", +// sourceId: "Q12345", +// name: "Super Mario Bros.", +// releaseDate: "1985-09-13", +// platforms: ["Nintendo Entertainment System"], +// genres: ["platform game"], +// publishers: ["Nintendo"] +// } +``` + +### Search with Platform Filter + +```typescript +// Boost results matching the specified platform +const games = await provider.searchGames("mario", { + platform: "nes", + limit: 10 +}); +``` + +### Get Game by QID + +```typescript +const game = await provider.getGameById("Q12345"); + +if (game) { + console.log(game.name); // "Super Mario Bros." +} +``` + +### Health Check + +```typescript +const health = await provider.healthCheck(); + +if (health.healthy) { + console.log(`Wikidata is available (${health.responseTime}ms)`); +} +``` + +### Custom Client Options + +```typescript +import { WikidataProvider, WikidataClient } from './providers/wikidata'; + +const client = new WikidataClient({ + rateLimitMs: 500, // Faster rate limit + userAgent: "MyApp/1.0" +}); + +const provider = new WikidataProvider(client); +``` + +## Caching + +The provider uses two cache tables in SQLite: + +- `wikidata_cache_search` - Caches search results by normalized query hash +- `wikidata_cache_game` - Caches individual game metadata by QID + +Cache TTL is 21 days by default. Cache is checked first before making HTTP requests. + +## Name Normalization + +The normalizer strips common ROM naming conventions for better matching: + +- Region tags: `(USA)`, `[Europe]`, `(Japan)` +- Revision tags: `(Rev 1)`, `[Rev A]` +- Disc numbers: `(Disc 1)`, `(Disc 2)` +- Extra whitespace and punctuation + +## Result Ranking + +Results are ranked by: + +1. **Match Quality** - EXACT > PREFIX > CONTAINS > NO_MATCH +2. **Platform Boost** - Games matching the platform filter rank 0.5 higher +3. **Alphabetical** - Tiebreaker for equal ranks + +## Rate Limiting + +The client enforces rate limiting to be respectful of Wikidata's resources: + +- Default: 1 request per second +- Configurable via `WikidataClient` options +- Queued requests wait for rate limit + +## SPARQL Queries + +### Search Query + +Searches for video games matching the normalized query string: + +```sparql +SELECT DISTINCT ?game ?gameLabel ?releaseDate + (GROUP_CONCAT(DISTINCT ?platformLabel; separator="|") AS ?platforms) + (GROUP_CONCAT(DISTINCT ?genreLabel; separator="|") AS ?genres) + (GROUP_CONCAT(DISTINCT ?publisherLabel; separator="|") AS ?publishers) + ?seriesLabel +WHERE { + ?game wdt:P31/wdt:P279* wd:Q7889 . # instance of video game + FILTER(CONTAINS(LCASE(?gameLabel), "query")) + # ... optional metadata fields +} +GROUP BY ?game ?gameLabel ?releaseDate ?seriesLabel +LIMIT 25 +``` + +### Get by QID Query + +Fetches full metadata for a specific game by Wikidata QID: + +```sparql +SELECT DISTINCT ?game ?gameLabel ?releaseDate ... +WHERE { + BIND(wd:Q12345 AS ?game) + ?game wdt:P31/wdt:P279* wd:Q7889 . # validate it's a video game + # ... optional metadata fields +} +``` + +## Testing + +The provider has comprehensive test coverage: + +- **queryBuilder.test.ts** - 12 tests for SPARQL query generation +- **mapper.test.ts** - 14 tests for SPARQL → GameMetadata mapping +- **normalizer.test.ts** - 22 tests for normalization and ranking +- **client.test.ts** - 9 tests for HTTP client and rate limiting +- **provider.test.ts** - 9 tests for provider integration + +Run tests: + +```bash +npm run test:unit -- wikidata +``` + +## Data Model + +### GameMetadata + +```typescript +type GameMetadata = { + source: "wikidata"; + sourceId: string; // QID (e.g., "Q12345") + name: string; // Game title + releaseDate?: string; // ISO 8601 date + platforms?: string[]; // Platform names + genres?: string[]; // Genre names + publishers?: string[]; // Publisher names + series?: string; // Game series name + raw?: unknown; // Original WikidataGameResult +}; +``` + +### WikidataGameResult + +```typescript +type WikidataGameResult = { + qid: string; + label: string; + releaseDate?: string; + platforms?: string[]; + genres?: string[]; + publishers?: string[]; + series?: string; +}; +``` + +## Known Limitations + +- **Wikidata Coverage** - Not all games are in Wikidata +- **English Only** - Only English labels are fetched +- **Platform Names** - May differ from ROM naming conventions +- **Rate Limits** - Respect Wikidata's rate limits + +## Future Enhancements + +- [ ] Multi-language support +- [ ] Platform name mapping/aliases +- [ ] Additional metadata fields (developers, modes, ratings) +- [ ] Fallback to other providers when not found + +## License + +This provider is part of Jacare and follows the same license. diff --git a/apps/server/src/providers/wikidata/__tests__/client.test.ts b/apps/server/src/providers/wikidata/__tests__/client.test.ts new file mode 100644 index 0000000..06daf4c --- /dev/null +++ b/apps/server/src/providers/wikidata/__tests__/client.test.ts @@ -0,0 +1,183 @@ +import { describe, it, expect, vi, beforeEach, afterEach } from "vitest"; +import { WikidataClient } from "../client"; +import type { WikidataSparqlResponse } from "@crocdesk/shared"; + +// Mock fetch globally +const mockFetch = vi.fn(); +global.fetch = mockFetch as unknown as typeof fetch; + +describe("Wikidata Client", () => { + let client: WikidataClient; + + beforeEach(() => { + client = new WikidataClient(); + mockFetch.mockClear(); + vi.useFakeTimers(); + }); + + afterEach(() => { + vi.useRealTimers(); + }); + + describe("query", () => { + it("should make a GET request with SPARQL query", async () => { + const mockResponse: WikidataSparqlResponse = { + head: { vars: ["game", "gameLabel"] }, + results: { bindings: [] } + }; + + mockFetch.mockResolvedValueOnce({ + ok: true, + json: async () => mockResponse + }); + + const sparql = "SELECT ?game WHERE { ?game wdt:P31 wd:Q7889 }"; + const result = await client.query(sparql); + + expect(mockFetch).toHaveBeenCalledOnce(); + expect(mockFetch).toHaveBeenCalledWith( + expect.stringContaining("https://query.wikidata.org/sparql"), + expect.objectContaining({ + headers: expect.objectContaining({ + "Accept": "application/sparql-results+json" + }) + }) + ); + expect(result).toEqual(mockResponse); + }); + + it("should encode SPARQL query in URL", async () => { + const mockResponse: WikidataSparqlResponse = { + head: { vars: [] }, + results: { bindings: [] } + }; + + mockFetch.mockResolvedValueOnce({ + ok: true, + json: async () => mockResponse + }); + + const sparql = "SELECT * WHERE { ?s ?p ?o }"; + await client.query(sparql); + + const callUrl = mockFetch.mock.calls[0][0] as string; + expect(callUrl).toContain("query="); + expect(callUrl).toContain(encodeURIComponent(sparql)); + }); + + it("should throw error on non-200 response", async () => { + mockFetch.mockResolvedValueOnce({ + ok: false, + status: 500, + statusText: "Internal Server Error" + }); + + await expect(client.query("SELECT * WHERE { }")).rejects.toThrow( + "Wikidata SPARQL query failed: 500 Internal Server Error" + ); + }); + + it("should throw error on network failure", async () => { + mockFetch.mockRejectedValueOnce(new Error("Network error")); + + await expect(client.query("SELECT * WHERE { }")).rejects.toThrow( + "Network error" + ); + }); + }); + + describe("rate limiting", () => { + it("should enforce rate limit between requests", async () => { + const mockResponse: WikidataSparqlResponse = { + head: { vars: [] }, + results: { bindings: [] } + }; + + mockFetch.mockResolvedValue({ + ok: true, + json: async () => mockResponse + }); + + const query1Promise = client.query("SELECT * WHERE { }"); + await vi.advanceTimersByTimeAsync(0); // Let first request start + + const query2Promise = client.query("SELECT * WHERE { }"); + + // First request should execute immediately + expect(mockFetch).toHaveBeenCalledTimes(1); + + // Second request should wait for rate limit + await vi.advanceTimersByTimeAsync(1000); + + await Promise.all([query1Promise, query2Promise]); + + expect(mockFetch).toHaveBeenCalledTimes(2); + }); + + it("should allow configurable rate limit", async () => { + const customClient = new WikidataClient({ rateLimitMs: 500 }); + + const mockResponse: WikidataSparqlResponse = { + head: { vars: [] }, + results: { bindings: [] } + }; + + mockFetch.mockResolvedValue({ + ok: true, + json: async () => mockResponse + }); + + const query1Promise = customClient.query("SELECT * WHERE { }"); + await vi.advanceTimersByTimeAsync(0); + + const query2Promise = customClient.query("SELECT * WHERE { }"); + + expect(mockFetch).toHaveBeenCalledTimes(1); + + await vi.advanceTimersByTimeAsync(500); + + await Promise.all([query1Promise, query2Promise]); + + expect(mockFetch).toHaveBeenCalledTimes(2); + }); + }); + + describe("healthCheck", () => { + it("should return healthy when query succeeds", async () => { + mockFetch.mockResolvedValueOnce({ + ok: true, + json: async () => ({ + head: { vars: [] }, + results: { bindings: [] } + }) + }); + + const result = await client.healthCheck(); + + expect(result.healthy).toBe(true); + expect(result.responseTime).toBeGreaterThanOrEqual(0); + }); + + it("should return unhealthy when query fails", async () => { + mockFetch.mockResolvedValueOnce({ + ok: false, + status: 503, + statusText: "Service Unavailable" + }); + + const result = await client.healthCheck(); + + expect(result.healthy).toBe(false); + expect(result.message).toContain("503"); + }); + + it("should return unhealthy on network error", async () => { + mockFetch.mockRejectedValueOnce(new Error("Connection timeout")); + + const result = await client.healthCheck(); + + expect(result.healthy).toBe(false); + expect(result.message).toContain("Connection timeout"); + }); + }); +}); diff --git a/apps/server/src/providers/wikidata/__tests__/mapper.test.ts b/apps/server/src/providers/wikidata/__tests__/mapper.test.ts new file mode 100644 index 0000000..33bd43d --- /dev/null +++ b/apps/server/src/providers/wikidata/__tests__/mapper.test.ts @@ -0,0 +1,186 @@ +import { describe, it, expect } from "vitest"; +import { + mapSparqlResultToGame, + aggregateSparqlResults, + extractQidFromUri +} from "../mapper"; +import type { WikidataSparqlResult } from "@crocdesk/shared"; + +describe("Wikidata Mapper", () => { + describe("extractQidFromUri", () => { + it("should extract QID from full Wikidata URI", () => { + const qid = extractQidFromUri("http://www.wikidata.org/entity/Q12345"); + expect(qid).toBe("Q12345"); + }); + + it("should handle URIs with trailing slashes", () => { + const qid = extractQidFromUri("http://www.wikidata.org/entity/Q12345/"); + expect(qid).toBe("Q12345"); + }); + + it("should return input if already a QID", () => { + const qid = extractQidFromUri("Q12345"); + expect(qid).toBe("Q12345"); + }); + }); + + describe("mapSparqlResultToGame", () => { + it("should map a basic SPARQL result to WikidataGameResult", () => { + const sparqlResult: WikidataSparqlResult = { + game: { type: "uri", value: "http://www.wikidata.org/entity/Q12345" }, + gameLabel: { type: "literal", value: "Super Mario Bros" } + }; + + const result = mapSparqlResultToGame(sparqlResult); + + expect(result.qid).toBe("Q12345"); + expect(result.label).toBe("Super Mario Bros"); + }); + + it("should map release date if present", () => { + const sparqlResult: WikidataSparqlResult = { + game: { type: "uri", value: "http://www.wikidata.org/entity/Q12345" }, + gameLabel: { type: "literal", value: "Super Mario Bros" }, + releaseDate: { type: "literal", value: "1985-09-13T00:00:00Z", datatype: "http://www.w3.org/2001/XMLSchema#dateTime" } + }; + + const result = mapSparqlResultToGame(sparqlResult); + + expect(result.releaseDate).toBe("1985-09-13"); + }); + + it("should map platformLabel if present (single value)", () => { + const sparqlResult: WikidataSparqlResult = { + game: { type: "uri", value: "http://www.wikidata.org/entity/Q12345" }, + gameLabel: { type: "literal", value: "Super Mario Bros", "xml:lang": "en" }, + platformLabel: { type: "literal", value: "Nintendo Entertainment System", "xml:lang": "en" } + }; + + const result = mapSparqlResultToGame(sparqlResult); + + expect(result.platforms).toEqual(["Nintendo Entertainment System"]); + }); + + it("should map genreLabel if present (single value)", () => { + const sparqlResult: WikidataSparqlResult = { + game: { type: "uri", value: "http://www.wikidata.org/entity/Q12345" }, + gameLabel: { type: "literal", value: "Super Mario Bros", "xml:lang": "en" }, + genreLabel: { type: "literal", value: "platform game", "xml:lang": "en" } + }; + + const result = mapSparqlResultToGame(sparqlResult); + + expect(result.genres).toEqual(["platform game"]); + }); + + it("should map publisherLabel if present (single value)", () => { + const sparqlResult: WikidataSparqlResult = { + game: { type: "uri", value: "http://www.wikidata.org/entity/Q12345" }, + gameLabel: { type: "literal", value: "Super Mario Bros", "xml:lang": "en" }, + publisherLabel: { type: "literal", value: "Nintendo", "xml:lang": "en" } + }; + + const result = mapSparqlResultToGame(sparqlResult); + + expect(result.publishers).toEqual(["Nintendo"]); + }); + + it("should map series if present", () => { + const sparqlResult: WikidataSparqlResult = { + game: { type: "uri", value: "http://www.wikidata.org/entity/Q12345" }, + gameLabel: { type: "literal", value: "Super Mario Bros" }, + seriesLabel: { type: "literal", value: "Super Mario" } + }; + + const result = mapSparqlResultToGame(sparqlResult); + + expect(result.series).toBe("Super Mario"); + }); + + it("should handle all fields together", () => { + const sparqlResult: WikidataSparqlResult = { + game: { type: "uri", value: "http://www.wikidata.org/entity/Q12345" }, + gameLabel: { type: "literal", value: "The Legend of Zelda", "xml:lang": "en" }, + releaseDate: { type: "literal", value: "1986-02-21T00:00:00Z", datatype: "http://www.w3.org/2001/XMLSchema#dateTime" }, + platformLabel: { type: "literal", value: "Nintendo Entertainment System", "xml:lang": "en" }, + genreLabel: { type: "literal", value: "action-adventure game", "xml:lang": "en" }, + publisherLabel: { type: "literal", value: "Nintendo", "xml:lang": "en" }, + seriesLabel: { type: "literal", value: "The Legend of Zelda", "xml:lang": "en" } + }; + + const result = mapSparqlResultToGame(sparqlResult); + + expect(result).toEqual({ + qid: "Q12345", + label: "The Legend of Zelda", + releaseDate: "1986-02-21", + platforms: ["Nintendo Entertainment System"], + genres: ["action-adventure game"], + publishers: ["Nintendo"], + series: "The Legend of Zelda" + }); + }); + + it("should handle missing optional fields", () => { + const sparqlResult: WikidataSparqlResult = { + game: { type: "uri", value: "http://www.wikidata.org/entity/Q12345" }, + gameLabel: { type: "literal", value: "Game", "xml:lang": "en" } + }; + + const result = mapSparqlResultToGame(sparqlResult); + + expect(result.platforms).toBeUndefined(); + expect(result.genres).toBeUndefined(); + expect(result.publishers).toBeUndefined(); + }); + }); + + describe("aggregateSparqlResults", () => { + it("should aggregate multiple results into unique games", () => { + const results: WikidataSparqlResult[] = [ + { + game: { type: "uri", value: "http://www.wikidata.org/entity/Q12345" }, + gameLabel: { type: "literal", value: "Super Mario Bros" } + }, + { + game: { type: "uri", value: "http://www.wikidata.org/entity/Q67890" }, + gameLabel: { type: "literal", value: "Mario Kart" } + } + ]; + + const games = aggregateSparqlResults(results); + + expect(games).toHaveLength(2); + expect(games[0].qid).toBe("Q12345"); + expect(games[1].qid).toBe("Q67890"); + }); + + it("should aggregate multiple rows for the same game", () => { + const results: WikidataSparqlResult[] = [ + { + game: { type: "uri", value: "http://www.wikidata.org/entity/Q12345" }, + gameLabel: { type: "literal", value: "Super Mario Bros", "xml:lang": "en" }, + platformLabel: { type: "literal", value: "Nintendo Entertainment System", "xml:lang": "en" } + }, + { + game: { type: "uri", value: "http://www.wikidata.org/entity/Q12345" }, + gameLabel: { type: "literal", value: "Super Mario Bros", "xml:lang": "en" }, + platformLabel: { type: "literal", value: "Famicom", "xml:lang": "en" }, + genreLabel: { type: "literal", value: "platform game", "xml:lang": "en" } + } + ]; + + const games = aggregateSparqlResults(results); + + expect(games).toHaveLength(1); + expect(games[0].qid).toBe("Q12345"); + expect(games[0].platforms).toEqual(expect.arrayContaining(["Nintendo Entertainment System", "Famicom"])); + expect(games[0].genres).toEqual(["platform game"]); + }); + + it("should return empty array for empty input", () => { + const games = aggregateSparqlResults([]); + expect(games).toEqual([]); + }); + }); +}); diff --git a/apps/server/src/providers/wikidata/__tests__/normalizer.test.ts b/apps/server/src/providers/wikidata/__tests__/normalizer.test.ts new file mode 100644 index 0000000..252438b --- /dev/null +++ b/apps/server/src/providers/wikidata/__tests__/normalizer.test.ts @@ -0,0 +1,184 @@ +import { describe, it, expect } from "vitest"; +import { + normalizeGameName, + rankSearchResults, + matchRank +} from "../normalizer"; +import type { WikidataGameResult, MatchRank } from "@crocdesk/shared"; + +describe("Wikidata Normalizer", () => { + describe("normalizeGameName", () => { + it("should convert to lowercase", () => { + const normalized = normalizeGameName("Super Mario Bros"); + expect(normalized).toBe("super mario bros"); + }); + + it("should strip region tags in parentheses", () => { + const normalized = normalizeGameName("Pokemon Red (USA)"); + expect(normalized).toBe("pokemon red"); + }); + + it("should strip region tags in square brackets", () => { + const normalized = normalizeGameName("Zelda [USA]"); + expect(normalized).toBe("zelda"); + }); + + it("should strip revision tags", () => { + const normalized = normalizeGameName("Metroid (USA) (Rev 1)"); + expect(normalized).toBe("metroid"); + }); + + it("should strip disc numbers", () => { + const normalized = normalizeGameName("Final Fantasy VII (Disc 1)"); + expect(normalized).toBe("final fantasy vii"); + }); + + it("should strip multiple tags", () => { + const normalized = normalizeGameName("Pokemon Red (USA) (Rev 1) [!]"); + expect(normalized).toBe("pokemon red"); + }); + + it("should handle mixed bracket types", () => { + const normalized = normalizeGameName("Mario Kart (USA) [Rev A]"); + expect(normalized).toBe("mario kart"); + }); + + it("should strip extra whitespace", () => { + const normalized = normalizeGameName(" Super Mario Bros "); + expect(normalized).toBe("super mario bros"); + }); + + it("should handle ASCII folding of common characters", () => { + const normalized = normalizeGameName("Pokémon"); + expect(normalized).toBe("pokemon"); + }); + + it("should strip colons and hyphens for better matching", () => { + const normalized = normalizeGameName("The Legend of Zelda: Breath of the Wild"); + expect(normalized).toBe("the legend of zelda breath of the wild"); + }); + }); + + describe("matchRank", () => { + it("should return EXACT for exact match", () => { + const rank = matchRank("super mario bros", "super mario bros"); + expect(rank).toBe(1); // MatchRank.EXACT + }); + + it("should return PREFIX for prefix match", () => { + const rank = matchRank("super mario", "super mario bros"); + expect(rank).toBe(2); // MatchRank.PREFIX + }); + + it("should return CONTAINS for substring match", () => { + const rank = matchRank("mario", "super mario bros"); + expect(rank).toBe(3); // MatchRank.CONTAINS + }); + + it("should return NO_MATCH when query not in label", () => { + const rank = matchRank("zelda", "super mario bros"); + expect(rank).toBe(4); // MatchRank.NO_MATCH + }); + + it("should be case-insensitive", () => { + const rank = matchRank("SUPER MARIO", "super mario bros"); + expect(rank).toBe(2); // MatchRank.PREFIX + }); + }); + + describe("rankSearchResults", () => { + const results: WikidataGameResult[] = [ + { + qid: "Q1", + label: "Super Mario Bros.", + platforms: ["Nintendo Entertainment System"] + }, + { + qid: "Q2", + label: "Super Mario World" + }, + { + qid: "Q3", + label: "Mario Kart", + platforms: ["Super Nintendo Entertainment System"] + }, + { + qid: "Q4", + label: "The Legend of Mario" + } + ]; + + it("should rank exact matches first", () => { + const ranked = rankSearchResults(results, "super mario bros"); + + expect(ranked[0].qid).toBe("Q1"); + expect(ranked[0].rank).toBe(1); // EXACT + }); + + it("should rank prefix matches before contains matches", () => { + const ranked = rankSearchResults(results, "super mario"); + + // Q1 and Q2 both have PREFIX match (start with "super mario") + // Q3 and Q4 have NO_MATCH (don't contain "super mario", only "mario") + expect(ranked[0].rank).toBe(2); // PREFIX (Q1 or Q2) + expect(ranked[0].qid).toMatch(/Q1|Q2/); + expect(ranked[2].rank).toBe(4); // NO_MATCH (Q3 or Q4) + }); + + it("should boost results with matching platform", () => { + // Create results where platform boost makes a difference + const platformResults: WikidataGameResult[] = [ + { + qid: "Q1", + label: "Super Mario Bros.", + platforms: ["Nintendo Entertainment System"] + }, + { + qid: "Q2", + label: "Super Mario World", + platforms: ["Game Boy"] + } + ]; + + const ranked = rankSearchResults(platformResults, "super mario", { platform: "nes" }); + + // Both have same rank (PREFIX), but Q1 has matching platform (NES matches Nintendo Entertainment System) + expect(ranked[0].qid).toBe("Q1"); + expect(ranked[0].platformMatch).toBe(true); + expect(ranked[1].qid).toBe("Q2"); + expect(ranked[1].platformMatch).toBe(false); + }); + + it("should handle case-insensitive platform matching", () => { + const ranked = rankSearchResults(results, "mario", { platform: "NES" }); + + // Should match "Nintendo Entertainment System" even though query is "NES" + const q1Result = ranked.find(r => r.qid === "Q1"); + expect(q1Result?.platformMatch).toBe(true); + }); + + it("should sort by rank then alphabetically", () => { + const ranked = rankSearchResults(results, "mario"); + + // All have CONTAINS rank, should be sorted by rank then label + expect(ranked.every((r, i) => i === 0 || r.rank >= ranked[i - 1].rank)).toBe(true); + }); + + it("should handle empty results", () => { + const ranked = rankSearchResults([], "mario"); + expect(ranked).toEqual([]); + }); + + it("should handle results without platforms", () => { + const resultsNoPlatforms: WikidataGameResult[] = [ + { qid: "Q1", label: "Game One" }, + { qid: "Q2", label: "Game Two" } + ]; + + const ranked = rankSearchResults(resultsNoPlatforms, "game", { platform: "nes" }); + + expect(ranked).toHaveLength(2); + expect(ranked.every(r => !r.platformMatch)).toBe(true); + }); + }); +}); diff --git a/apps/server/src/providers/wikidata/__tests__/provider.integration.test.ts b/apps/server/src/providers/wikidata/__tests__/provider.integration.test.ts new file mode 100644 index 0000000..3953edf --- /dev/null +++ b/apps/server/src/providers/wikidata/__tests__/provider.integration.test.ts @@ -0,0 +1,126 @@ +/** + * Integration/Smoke tests for WikidataProvider + * + * These tests make actual API calls to Wikidata and should be run sparingly. + * They verify that the provider works end-to-end with the real API. + * + * To run: npm run test -- provider.integration.test.ts + */ + +import { describe, it, expect, beforeAll } from "vitest"; +import { WikidataProvider } from "../provider"; +import { initDb } from "../../../db"; + +describe("Wikidata Provider Integration Tests", () => { + let provider: WikidataProvider; + + beforeAll(async () => { + // Initialize database for caching + await initDb(); + provider = new WikidataProvider(); + }); + + describe("searchGames", () => { + it("should search for games and return results", async () => { + const results = await provider.searchGames("mario", { limit: 5 }); + + expect(results.length).toBeGreaterThan(0); + expect(results.length).toBeLessThanOrEqual(5); + + // Verify structure + const first = results[0]; + expect(first).toHaveProperty("source", "wikidata"); + expect(first).toHaveProperty("sourceId"); + expect(first.sourceId).toMatch(/^Q\d+$/); + expect(first).toHaveProperty("name"); + expect(typeof first.name).toBe("string"); + expect(first.name.length).toBeGreaterThan(0); + + // Verify name contains "mario" (case-insensitive) + expect(first.name.toLowerCase()).toContain("mario"); + }, 30000); // 30 second timeout for API calls + + it("should handle platform filter", async () => { + const results = await provider.searchGames("zelda", { + platform: "nes", + limit: 3 + }); + + // Should return some results (may or may not match platform, but should not error) + expect(Array.isArray(results)).toBe(true); + if (results.length > 0) { + const first = results[0]; + expect(first).toHaveProperty("source", "wikidata"); + expect(first).toHaveProperty("name"); + } + }, 30000); + + it("should respect limit option", async () => { + const results = await provider.searchGames("pokemon", { limit: 3 }); + + expect(results.length).toBeLessThanOrEqual(3); + }, 30000); + }); + + describe("getGameById", () => { + it("should fetch game by known QID (Super Mario Bros)", async () => { + // Super Mario Bros QID: Q170489 is Super Mario Galaxy, let's use a known one + // Q216995 is Super Mario 64 + const game = await provider.getGameById("Q216995"); + + expect(game).not.toBeNull(); + expect(game).toHaveProperty("source", "wikidata"); + expect(game).toHaveProperty("sourceId", "Q216995"); + expect(game).toHaveProperty("name"); + expect(game?.name.toLowerCase()).toContain("mario"); + }, 30000); + + it("should return null for non-existent QID", async () => { + const game = await provider.getGameById("Q999999999"); + + expect(game).toBeNull(); + }, 30000); + + it("should handle QID with wd: prefix", async () => { + const game = await provider.getGameById("wd:Q216995"); + + expect(game).not.toBeNull(); + expect(game?.sourceId).toBe("Q216995"); + }, 30000); + }); + + describe("healthCheck", () => { + it("should check if Wikidata endpoint is accessible", async () => { + const health = await provider.healthCheck(); + + expect(health).toHaveProperty("healthy"); + expect(health).toHaveProperty("responseTime"); + expect(typeof health.responseTime).toBe("number"); + expect(health.responseTime).toBeGreaterThan(0); + + // Endpoint should be accessible (may vary based on network) + // We just verify the structure, not necessarily that it's healthy + }, 30000); + }); + + describe("caching", () => { + it("should cache search results", async () => { + const query = `test-${Date.now()}`; // Unique query to avoid cache + + const start1 = Date.now(); + const results1 = await provider.searchGames(query, { limit: 3 }); + const duration1 = Date.now() - start1; + + // Second call should be faster (cached) + const start2 = Date.now(); + const results2 = await provider.searchGames(query, { limit: 3 }); + const duration2 = Date.now() - start2; + + expect(results1).toEqual(results2); + // Cached call should be significantly faster (but not guaranteed in tests) + // We just verify it returns the same results + }, 30000); + }); +}); + + diff --git a/apps/server/src/providers/wikidata/__tests__/provider.test.ts b/apps/server/src/providers/wikidata/__tests__/provider.test.ts new file mode 100644 index 0000000..548430c --- /dev/null +++ b/apps/server/src/providers/wikidata/__tests__/provider.test.ts @@ -0,0 +1,248 @@ +import { describe, it, expect, vi, beforeEach } from "vitest"; +import { WikidataProvider } from "../provider"; +import type { GameMetadata, WikidataSparqlResponse } from "@crocdesk/shared"; + +// Mock all dependencies +vi.mock("../client"); +vi.mock("../../../db"); + +import { WikidataClient } from "../client"; +import * as db from "../../../db"; + +describe("Wikidata Provider", () => { + let provider: WikidataProvider; + let mockClient: any; + + beforeEach(() => { + // Create a mock client + mockClient = { + query: vi.fn(), + healthCheck: vi.fn() + }; + + // Pass mock client to provider + provider = new WikidataProvider(mockClient); + + // Clear all mocks + vi.clearAllMocks(); + }); + + describe("searchGames", () => { + it("should return ranked game metadata with aggregated results", async () => { + // New structure: individual rows per game/metadata combination + const mockResponse: WikidataSparqlResponse = { + head: { vars: ["game", "gameLabel", "releaseDate", "platformLabel", "genreLabel", "publisherLabel"] }, + results: { + bindings: [ + { + game: { type: "uri" as const, value: "http://www.wikidata.org/entity/Q12345" }, + gameLabel: { type: "literal" as const, value: "Super Mario Bros", "xml:lang": "en" }, + releaseDate: { type: "literal" as const, value: "1985-09-13T00:00:00Z", datatype: "http://www.w3.org/2001/XMLSchema#dateTime" }, + platformLabel: { type: "literal" as const, value: "Nintendo Entertainment System", "xml:lang": "en" }, + genreLabel: { type: "literal" as const, value: "platform game", "xml:lang": "en" }, + publisherLabel: { type: "literal" as const, value: "Nintendo", "xml:lang": "en" } + }, + { + game: { type: "uri" as const, value: "http://www.wikidata.org/entity/Q12345" }, + gameLabel: { type: "literal" as const, value: "Super Mario Bros", "xml:lang": "en" }, + platformLabel: { type: "literal" as const, value: "Famicom", "xml:lang": "en" } + } + ] + } + }; + + vi.mocked(db.getCachedWikidataSearch).mockReturnValue(null); + mockClient.query.mockResolvedValue(mockResponse); + + const results = await provider.searchGames("super mario"); + + expect(results).toHaveLength(1); + expect(results[0]).toMatchObject({ + source: "wikidata", + sourceId: "Q12345", + name: "Super Mario Bros", + releaseDate: "1985-09-13", + platforms: expect.arrayContaining(["Nintendo Entertainment System", "Famicom"]), + genres: ["platform game"], + publishers: ["Nintendo"] + }); + }); + + it("should use cached results if available and fresh", async () => { + const cachedData: WikidataSparqlResponse = { + head: { vars: ["game", "gameLabel"] }, + results: { + bindings: [ + { + game: { type: "uri" as const, value: "http://www.wikidata.org/entity/Q99999" }, + gameLabel: { type: "literal" as const, value: "Cached Game", "xml:lang": "en" } + } + ] + } + }; + + const recentTime = Date.now() - 1000; // 1 second ago + vi.mocked(db.getCachedWikidataSearch).mockReturnValue({ + json: JSON.stringify(cachedData), + updatedAt: recentTime + }); + + const results = await provider.searchGames("cached"); + + expect(vi.mocked(db.getCachedWikidataSearch)).toHaveBeenCalled(); + expect(results).toHaveLength(1); + expect(results[0].name).toBe("Cached Game"); + }); + + it("should respect limit option", async () => { + const mockResponse: WikidataSparqlResponse = { + head: { vars: ["game", "gameLabel"] }, + results: { + bindings: Array.from({ length: 50 }, (_, i) => ({ + game: { type: "uri" as const, value: `http://www.wikidata.org/entity/Q${i}` }, + gameLabel: { type: "literal" as const, value: `Game ${i}` } + })) + } + }; + + vi.mocked(db.getCachedWikidataSearch).mockReturnValue(null); + mockClient.query.mockResolvedValue(mockResponse); + + const results = await provider.searchGames("game", { limit: 10 }); + + // Query builder should have been called with limit: 10 + expect(mockClient.query).toHaveBeenCalled(); + // Results should be limited (this is done in query, but we test the option is passed) + expect(results.length).toBeGreaterThan(0); + }); + + it("should rank results by platform match", async () => { + const mockResponse: WikidataSparqlResponse = { + head: { vars: ["game", "gameLabel", "platformLabel"] }, + results: { + bindings: [ + { + game: { type: "uri" as const, value: "http://www.wikidata.org/entity/Q1" }, + gameLabel: { type: "literal" as const, value: "Mario NES", "xml:lang": "en" }, + platformLabel: { type: "literal" as const, value: "Nintendo Entertainment System", "xml:lang": "en" } + }, + { + game: { type: "uri" as const, value: "http://www.wikidata.org/entity/Q2" }, + gameLabel: { type: "literal" as const, value: "Mario SNES", "xml:lang": "en" }, + platformLabel: { type: "literal" as const, value: "Super Nintendo Entertainment System", "xml:lang": "en" } + } + ] + } + }; + + vi.mocked(db.getCachedWikidataSearch).mockReturnValue(null); + mockClient.query.mockResolvedValue(mockResponse); + + const results = await provider.searchGames("mario", { platform: "nes" }); + + expect(results).toHaveLength(2); + // Q1 should come first because platform matches + expect(results[0].sourceId).toBe("Q1"); + }); + }); + + describe("getGameById", () => { + it("should return game metadata by QID with aggregated metadata", async () => { + const mockResponse: WikidataSparqlResponse = { + head: { vars: ["game", "gameLabel", "releaseDate", "platformLabel", "genreLabel"] }, + results: { + bindings: [ + { + game: { type: "uri" as const, value: "http://www.wikidata.org/entity/Q12345" }, + gameLabel: { type: "literal" as const, value: "Super Mario Bros", "xml:lang": "en" }, + releaseDate: { type: "literal" as const, value: "1985-09-13T00:00:00Z", datatype: "http://www.w3.org/2001/XMLSchema#dateTime" }, + platformLabel: { type: "literal" as const, value: "Nintendo Entertainment System", "xml:lang": "en" }, + genreLabel: { type: "literal" as const, value: "platform game", "xml:lang": "en" } + }, + { + game: { type: "uri" as const, value: "http://www.wikidata.org/entity/Q12345" }, + gameLabel: { type: "literal" as const, value: "Super Mario Bros", "xml:lang": "en" }, + platformLabel: { type: "literal" as const, value: "Famicom", "xml:lang": "en" } + } + ] + } + }; + + vi.mocked(db.getCachedWikidataGame).mockReturnValue(null); + mockClient.query.mockResolvedValue(mockResponse); + + const result = await provider.getGameById("Q12345"); + + expect(result).toMatchObject({ + source: "wikidata", + sourceId: "Q12345", + name: "Super Mario Bros", + releaseDate: "1985-09-13", + platforms: expect.arrayContaining(["Nintendo Entertainment System", "Famicom"]), + genres: ["platform game"] + }); + }); + + it("should use cached game if available", async () => { + const cachedData: WikidataSparqlResponse = { + head: { vars: ["game", "gameLabel"] }, + results: { + bindings: [ + { + game: { type: "uri" as const, value: "http://www.wikidata.org/entity/Q99999" }, + gameLabel: { type: "literal" as const, value: "Cached Game", "xml:lang": "en" } + } + ] + } + }; + + const recentTime = Date.now() - 1000; + vi.mocked(db.getCachedWikidataGame).mockReturnValue({ + json: JSON.stringify(cachedData), + updatedAt: recentTime + }); + + const result = await provider.getGameById("Q99999"); + + expect(vi.mocked(db.getCachedWikidataGame)).toHaveBeenCalled(); + expect(result?.name).toBe("Cached Game"); + }); + + it("should return null if no results found", async () => { + const mockResponse: WikidataSparqlResponse = { + head: { vars: ["game", "gameLabel"] }, + results: { + bindings: [] + } + }; + + vi.mocked(db.getCachedWikidataGame).mockReturnValue(null); + mockClient.query.mockResolvedValue(mockResponse); + + const result = await provider.getGameById("Q99999"); + + expect(result).toBeNull(); + }); + }); + + describe("healthCheck", () => { + it("should delegate to client health check", async () => { + const mockHealth = { + healthy: true, + responseTime: 123 + }; + + mockClient.healthCheck.mockResolvedValue(mockHealth); + + const result = await provider.healthCheck(); + + expect(result).toEqual(mockHealth); + }); + }); + + describe("name", () => { + it("should return 'wikidata' as provider name", () => { + expect(provider.name).toBe("wikidata"); + }); + }); +}); diff --git a/apps/server/src/providers/wikidata/__tests__/queryBuilder.test.ts b/apps/server/src/providers/wikidata/__tests__/queryBuilder.test.ts new file mode 100644 index 0000000..17e7bf7 --- /dev/null +++ b/apps/server/src/providers/wikidata/__tests__/queryBuilder.test.ts @@ -0,0 +1,104 @@ +import { describe, it, expect } from "vitest"; +import { buildSearchQuery, buildGetByQidQuery } from "../queryBuilder"; + +describe("Wikidata Query Builder", () => { + describe("buildSearchQuery", () => { + it("should build a basic search query for a game name", () => { + const query = buildSearchQuery("super mario"); + + expect(query).toContain("SELECT DISTINCT"); + expect(query).toContain("?game"); + expect(query).toContain("?gameLabel"); + expect(query).toContain("wdt:P31/wdt:P279* wd:Q7889"); // instance of video game + expect(query).toContain("CONTAINS(LCASE(?gameLabel), \"super mario\")"); + // Default limit is 25, multiplied by 2 for aggregation + expect(query).toContain("LIMIT 50"); + }); + + it("should escape special characters in search query", () => { + const query = buildSearchQuery("mario's adventure"); + + expect(query).toContain("mario\\'s adventure"); + }); + + it("should handle custom limit (multiplied for aggregation)", () => { + const query = buildSearchQuery("zelda", { limit: 10 }); + + // Limit is multiplied by 2 to get enough rows for aggregation + expect(query).toContain("LIMIT 20"); + }); + + it("should include release date in SELECT", () => { + const query = buildSearchQuery("pokemon"); + + expect(query).toContain("?releaseDate"); + }); + + it("should include platforms in SELECT", () => { + const query = buildSearchQuery("tetris"); + + expect(query).toContain("?platformLabel"); + }); + + it("should include genres in SELECT", () => { + const query = buildSearchQuery("sonic"); + + expect(query).toContain("?genreLabel"); + }); + + it("should include publishers in SELECT", () => { + const query = buildSearchQuery("final fantasy"); + + expect(query).toContain("?publisherLabel"); + }); + + it("should include series in SELECT as optional", () => { + const query = buildSearchQuery("metroid"); + + expect(query).toContain("?seriesLabel"); + expect(query).toContain("OPTIONAL"); + }); + + it("should use rdfs:label for filtering", () => { + const query = buildSearchQuery("kirby"); + + expect(query).toContain("rdfs:label"); + expect(query).toContain("FILTER(LANG(?gameLabel) = \"en\")"); + }); + + it("should not use GROUP_CONCAT (aggregation happens client-side)", () => { + const query = buildSearchQuery("kirby"); + + expect(query).not.toContain("GROUP_CONCAT"); + expect(query).not.toContain("GROUP BY"); + }); + }); + + describe("buildGetByQidQuery", () => { + it("should build a query for fetching by QID", () => { + const query = buildGetByQidQuery("Q12345"); + + expect(query).toContain("wd:Q12345"); + expect(query).toContain("?gameLabel"); + expect(query).toContain("wdt:P31/wdt:P279* wd:Q7889"); // validate it's a video game + }); + + it("should include all metadata fields", () => { + const query = buildGetByQidQuery("Q12345"); + + expect(query).toContain("?releaseDate"); + expect(query).toContain("?platformLabel"); + expect(query).toContain("?genreLabel"); + expect(query).toContain("?publisherLabel"); + expect(query).toContain("?seriesLabel"); + }); + + it("should handle QID with or without 'wd:' prefix", () => { + const query1 = buildGetByQidQuery("Q12345"); + const query2 = buildGetByQidQuery("wd:Q12345"); + + expect(query1).toContain("wd:Q12345"); + expect(query2).toContain("wd:Q12345"); + }); + }); +}); diff --git a/apps/server/src/providers/wikidata/client.ts b/apps/server/src/providers/wikidata/client.ts new file mode 100644 index 0000000..4635e8a --- /dev/null +++ b/apps/server/src/providers/wikidata/client.ts @@ -0,0 +1,121 @@ +/** + * Wikidata SPARQL HTTP Client + * + * Handles HTTP requests to the Wikidata SPARQL endpoint with rate limiting. + */ + +import type { WikidataSparqlResponse, HealthCheckResult } from "@crocdesk/shared"; + +const WIKIDATA_SPARQL_ENDPOINT = "https://query.wikidata.org/sparql"; +const DEFAULT_RATE_LIMIT_MS = 1000; // 1 request per second +const DEFAULT_USER_AGENT = "Jacare/1.0 (https://github.com/luandev/jacare)"; + +export type WikidataClientOptions = { + /** Minimum time between requests in milliseconds */ + rateLimitMs?: number; + + /** Custom User-Agent header */ + userAgent?: string; + + /** Custom SPARQL endpoint URL */ + endpoint?: string; +}; + +/** + * HTTP client for Wikidata SPARQL endpoint with rate limiting + */ +export class WikidataClient { + private readonly rateLimitMs: number; + private readonly userAgent: string; + private readonly endpoint: string; + private lastRequestTime = 0; + private pendingRequest: Promise | null = null; + + constructor(options: WikidataClientOptions = {}) { + this.rateLimitMs = options.rateLimitMs ?? DEFAULT_RATE_LIMIT_MS; + this.userAgent = options.userAgent ?? DEFAULT_USER_AGENT; + this.endpoint = options.endpoint ?? WIKIDATA_SPARQL_ENDPOINT; + } + + /** + * Enforces rate limiting by waiting if necessary + */ + private async waitForRateLimit(): Promise { + // If there's a pending request, wait for it + if (this.pendingRequest) { + await this.pendingRequest; + } + + const now = Date.now(); + const timeSinceLastRequest = now - this.lastRequestTime; + + if (timeSinceLastRequest < this.rateLimitMs) { + const waitTime = this.rateLimitMs - timeSinceLastRequest; + this.pendingRequest = new Promise(resolve => setTimeout(resolve, waitTime)); + await this.pendingRequest; + this.pendingRequest = null; + } + + this.lastRequestTime = Date.now(); + } + + /** + * Executes a SPARQL query against the Wikidata endpoint + * + * @param sparql - SPARQL query string + * @returns SPARQL JSON results + * @throws Error if the request fails + */ + async query(sparql: string): Promise { + await this.waitForRateLimit(); + + const url = `${this.endpoint}?query=${encodeURIComponent(sparql)}`; + + const response = await fetch(url, { + method: "GET", + headers: { + "Accept": "application/sparql-results+json", + "User-Agent": this.userAgent + } + }); + + if (!response.ok) { + throw new Error( + `Wikidata SPARQL query failed: ${response.status} ${response.statusText}` + ); + } + + return await response.json() as WikidataSparqlResponse; + } + + /** + * Checks if the Wikidata endpoint is accessible + * + * @returns Health check result with response time + */ + async healthCheck(): Promise { + const startTime = Date.now(); + + try { + // Simple query to test endpoint availability + const testQuery = "SELECT ?item WHERE { ?item wdt:P31 wd:Q7889 } LIMIT 1"; + await this.query(testQuery); + + const responseTime = Date.now() - startTime; + + return { + healthy: true, + responseTime + }; + } catch (error) { + const responseTime = Date.now() - startTime; + const message = error instanceof Error ? error.message : "Unknown error"; + + return { + healthy: false, + message, + responseTime + }; + } + } +} diff --git a/apps/server/src/providers/wikidata/index.ts b/apps/server/src/providers/wikidata/index.ts new file mode 100644 index 0000000..4900674 --- /dev/null +++ b/apps/server/src/providers/wikidata/index.ts @@ -0,0 +1,30 @@ +/** + * Wikidata Metadata Provider + * + * A free, open metadata provider for video games using Wikidata's SPARQL endpoint. + * + * Features: + * - No API key required + * - Rich game metadata (title, platforms, genres, publishers, series, release date) + * - Name normalization and intelligent ranking + * - Aggressive caching (21 days TTL) + * - Rate limiting (1 req/sec by default) + * + * @example + * ```typescript + * import { WikidataProvider } from './providers/wikidata'; + * + * const provider = new WikidataProvider(); + * + * // Search for games + * const games = await provider.searchGames("super mario"); + * console.log(games[0].name); // "Super Mario Bros." + * + * // Get game by QID + * const game = await provider.getGameById("Q12345"); + * ``` + */ + +export { WikidataProvider } from "./provider"; +export { WikidataClient } from "./client"; +export type { WikidataClientOptions } from "./client"; diff --git a/apps/server/src/providers/wikidata/mapper.ts b/apps/server/src/providers/wikidata/mapper.ts new file mode 100644 index 0000000..60d6972 --- /dev/null +++ b/apps/server/src/providers/wikidata/mapper.ts @@ -0,0 +1,188 @@ +/** + * Wikidata SPARQL Response Mapper + * + * Maps SPARQL query results to structured game metadata. + */ + +import type { + WikidataSparqlResult, + WikidataGameResult +} from "@crocdesk/shared"; + +/** + * Extracts QID from a Wikidata entity URI + * + * @param uri - Full URI like "http://www.wikidata.org/entity/Q12345" or just "Q12345" + * @returns QID like "Q12345" + */ +export function extractQidFromUri(uri: string): string { + if (uri.startsWith("Q")) { + return uri; + } + + // Remove trailing slash if present + const cleanUri = uri.replace(/\/$/, ""); + const parts = cleanUri.split("/"); + return parts[parts.length - 1]; +} + +/** + * Extracts date from ISO 8601 datetime string + * + * @param datetime - ISO datetime like "1985-09-13T00:00:00Z" + * @returns Date string like "1985-09-13" + */ +function extractDate(datetime: string): string { + return datetime.split("T")[0]; +} + +/** + * Splits pipe-separated values and filters empty strings + * + * @param value - Pipe-separated string like "value1|value2" + * @returns Array of values + */ +function splitPipeSeparated(value: string): string[] | undefined { + const parts = value + .split("|") + .map((s) => s.trim()) + .filter((s) => s.length > 0); + + return parts.length > 0 ? parts : undefined; +} + +/** + * Maps a single SPARQL result binding to a partial WikidataGameResult + * + * @param result - Raw SPARQL result row + * @returns Partial game result (needs aggregation) + */ +function mapSparqlResultToPartialGame( + result: WikidataSparqlResult +): Partial & { qid: string; label: string } { + const game: Partial & { qid: string; label: string } = { + qid: extractQidFromUri(result.game.value), + label: result.gameLabel.value + }; + + // Map optional release date + if (result.releaseDate) { + game.releaseDate = extractDate(result.releaseDate.value); + } + + // Map optional platform (single value, not pipe-separated) + if (result.platformLabel && result.platformLabel.value) { + game.platforms = [result.platformLabel.value]; + } + + // Map optional genre (single value, not pipe-separated) + if (result.genreLabel && result.genreLabel.value) { + game.genres = [result.genreLabel.value]; + } + + // Map optional publisher (single value, not pipe-separated) + if (result.publisherLabel && result.publisherLabel.value) { + game.publishers = [result.publisherLabel.value]; + } + + // Map optional series + if (result.seriesLabel && result.seriesLabel.value) { + game.series = result.seriesLabel.value; + } + + return game; +} + +/** + * Maps a single SPARQL result binding to a WikidataGameResult + * + * @deprecated Use aggregateSparqlResults instead - this is kept for backwards compatibility + * @param result - Raw SPARQL result row + * @returns Structured game result + */ +export function mapSparqlResultToGame( + result: WikidataSparqlResult +): WikidataGameResult { + const partial = mapSparqlResultToPartialGame(result); + return { + qid: partial.qid, + label: partial.label, + releaseDate: partial.releaseDate, + platforms: partial.platforms, + genres: partial.genres, + publishers: partial.publishers, + series: partial.series + }; +} + +/** + * Aggregates multiple SPARQL results into unique game results + * + * Combines multiple rows for the same game, aggregating platforms, genres, publishers + * + * @param results - Array of SPARQL result bindings + * @returns Array of unique game results with aggregated metadata + */ +export function aggregateSparqlResults( + results: WikidataSparqlResult[] +): WikidataGameResult[] { + const gameMap = new Map; + genres: Set; + publishers: Set; + series?: string; + }>(); + + for (const result of results) { + const partial = mapSparqlResultToPartialGame(result); + const qid = partial.qid; + + if (!gameMap.has(qid)) { + gameMap.set(qid, { + qid, + label: partial.label, + releaseDate: partial.releaseDate, + platforms: new Set(partial.platforms || []), + genres: new Set(partial.genres || []), + publishers: new Set(partial.publishers || []), + series: partial.series + }); + } else { + const game = gameMap.get(qid)!; + // Aggregate platforms + if (partial.platforms) { + partial.platforms.forEach(p => game.platforms.add(p)); + } + // Aggregate genres + if (partial.genres) { + partial.genres.forEach(g => game.genres.add(g)); + } + // Aggregate publishers + if (partial.publishers) { + partial.publishers.forEach(p => game.publishers.add(p)); + } + // Keep first series found + if (!game.series && partial.series) { + game.series = partial.series; + } + // Keep first release date found + if (!game.releaseDate && partial.releaseDate) { + game.releaseDate = partial.releaseDate; + } + } + } + + // Convert to final format + return Array.from(gameMap.values()).map(game => ({ + qid: game.qid, + label: game.label, + releaseDate: game.releaseDate, + platforms: game.platforms.size > 0 ? Array.from(game.platforms) : undefined, + genres: game.genres.size > 0 ? Array.from(game.genres) : undefined, + publishers: game.publishers.size > 0 ? Array.from(game.publishers) : undefined, + series: game.series + })); +} diff --git a/apps/server/src/providers/wikidata/normalizer.ts b/apps/server/src/providers/wikidata/normalizer.ts new file mode 100644 index 0000000..5e02c31 --- /dev/null +++ b/apps/server/src/providers/wikidata/normalizer.ts @@ -0,0 +1,183 @@ +/** + * Game Name Normalization and Result Ranking + * + * Normalizes game names for better matching and ranks search results. + */ + +import type { + WikidataGameResult, + RankedGameResult +} from "@crocdesk/shared"; + +// MatchRank enum - defined locally to avoid import issues +// This matches the enum in @crocdesk/shared/src/wikidata-types.ts +enum MatchRank { + EXACT = 1, + PREFIX = 2, + CONTAINS = 3, + NO_MATCH = 4 +} + +const Rank = MatchRank; + +/** + * Normalizes a game name for matching + * + * - Strips region tags: (USA), [Europe], etc. + * - Strips revision tags: (Rev 1), [Rev A], etc. + * - Strips disc/part numbers: (Disc 1), (Part 2), etc. + * - Converts to lowercase + * - Applies ASCII folding for common characters + * - Strips extra whitespace + * - Removes colons and hyphens for better matching + */ +export function normalizeGameName(name: string): string { + let normalized = name; + + // Strip tags in parentheses and square brackets + // Handles: (USA), [Europe], (Rev 1), [Rev A], (Disc 1), [!], etc. + normalized = normalized.replace(/\s*[\(\[].*?[\)\]]/g, ""); + + // Convert to lowercase + normalized = normalized.toLowerCase(); + + // ASCII folding for common characters + normalized = normalized + .replace(/[àáâãäå]/g, "a") + .replace(/[èéêë]/g, "e") + .replace(/[ìíîï]/g, "i") + .replace(/[òóôõö]/g, "o") + .replace(/[ùúûü]/g, "u") + .replace(/[ýÿ]/g, "y") + .replace(/[ñ]/g, "n") + .replace(/[ç]/g, "c") + .replace(/[æ]/g, "ae") + .replace(/[œ]/g, "oe"); + + // Remove colons, hyphens, and common punctuation for better matching + normalized = normalized.replace(/[:.\,!?\-]/g, " "); + + // Strip extra whitespace and trim + normalized = normalized.replace(/\s+/g, " ").trim(); + + return normalized; +} + +/** + * Determines the match rank between a query and a label + * + * @param query - Normalized search query + * @param label - Label to match against + * @returns Match rank (EXACT, PREFIX, CONTAINS, or NO_MATCH) + */ +export function matchRank(query: string, label: string): MatchRank { + const normalizedQuery = normalizeGameName(query); + const normalizedLabel = normalizeGameName(label); + + if (normalizedLabel === normalizedQuery) { + return Rank.EXACT; + } + + if (normalizedLabel.startsWith(normalizedQuery)) { + return Rank.PREFIX; + } + + if (normalizedLabel.includes(normalizedQuery)) { + return Rank.CONTAINS; + } + + return Rank.NO_MATCH; +} + +/** + * Checks if a platform matches the query + * + * Case-insensitive substring matching + * Also handles common abbreviations (e.g., "NES" matches "Nintendo Entertainment System") + */ +function platformMatches(platforms: string[] | undefined, platformQuery: string): boolean { + if (!platforms || platforms.length === 0) { + return false; + } + + const normalizedQuery = platformQuery.toLowerCase().trim(); + + // Common platform abbreviations + const abbreviations: Record = { + "nes": ["nintendo entertainment system", "famicom"], + "snes": ["super nintendo", "super famicom"], + "n64": ["nintendo 64"], + "gc": ["gamecube", "nintendo gamecube"], + "wii": ["wii", "nintendo wii"], + "ps1": ["playstation", "playstation 1", "psx"], + "ps2": ["playstation 2"], + "ps3": ["playstation 3"], + "ps4": ["playstation 4"], + "ps5": ["playstation 5"], + "xbox": ["xbox"], + "gb": ["game boy"], + "gbc": ["game boy color"], + "gba": ["game boy advance"], + "nds": ["nintendo ds"], + "3ds": ["nintendo 3ds"] + }; + + // Check if query is an abbreviation + const expandedTerms = abbreviations[normalizedQuery] || [normalizedQuery]; + + // Check if any platform matches any of the expanded terms + return platforms.some(platform => { + const normalizedPlatform = platform.toLowerCase(); + return expandedTerms.some(term => + normalizedPlatform.includes(term) || term.includes(normalizedPlatform) + ); + }); +} + +/** + * Ranks and sorts search results + * + * Ranking criteria: + * 1. Match rank (EXACT > PREFIX > CONTAINS > NO_MATCH) + * 2. Platform match (boost by 0.5 if platform matches) + * 3. Alphabetical by label (for ties) + * + * @param results - Array of game results + * @param query - Original search query + * @param options - Optional search options (e.g., platform filter) + * @returns Ranked and sorted results + */ +export function rankSearchResults( + results: WikidataGameResult[], + query: string, + options?: { platform?: string } +): RankedGameResult[] { + const rankedResults: RankedGameResult[] = results.map(result => { + const rank = matchRank(query, result.label); + const platformMatch = options?.platform + ? platformMatches(result.platforms, options.platform) + : undefined; + + return { + ...result, + rank, + platformMatch + }; + }); + + // Sort by rank (lower is better), platform match, then alphabetically + return rankedResults.sort((a, b) => { + // Calculate effective rank with platform boost + // Platform match reduces effective rank by 0.5 + const aEffectiveRank = a.rank - (a.platformMatch ? 0.5 : 0); + const bEffectiveRank = b.rank - (b.platformMatch ? 0.5 : 0); + + // Primary: effective rank (with platform boost) + if (aEffectiveRank !== bEffectiveRank) { + return aEffectiveRank - bEffectiveRank; + } + + // Secondary: alphabetical + return a.label.localeCompare(b.label); + }); +} diff --git a/apps/server/src/providers/wikidata/provider.ts b/apps/server/src/providers/wikidata/provider.ts new file mode 100644 index 0000000..339d118 --- /dev/null +++ b/apps/server/src/providers/wikidata/provider.ts @@ -0,0 +1,148 @@ +/** + * Wikidata Metadata Provider + * + * Implements the MetadataProvider interface for Wikidata SPARQL queries. + */ + +import crypto from "crypto"; +import type { + MetadataProvider, + GameMetadata, + MetadataSearchOptions, + HealthCheckResult, + WikidataGameResult +} from "@crocdesk/shared"; +import { WikidataClient } from "./client"; +import { buildSearchQuery, buildGetByQidQuery } from "./queryBuilder"; +import { aggregateSparqlResults } from "./mapper"; +import { rankSearchResults, normalizeGameName } from "./normalizer"; +import { + getCachedWikidataSearch, + setCachedWikidataSearch, + getCachedWikidataGame, + setCachedWikidataGame +} from "../../db"; + +// Default cache TTL: 14-30 days (use 21 days as middle ground) +const DEFAULT_CACHE_TTL_MS = 21 * 24 * 60 * 60 * 1000; // 21 days + +/** + * Checks if cached data is still fresh + */ +function isFresh(updatedAt: number, ttlMs = DEFAULT_CACHE_TTL_MS): boolean { + return Date.now() - updatedAt < ttlMs; +} + +/** + * Creates a stable hash for a search query + */ +function hashQuery(query: string, options?: MetadataSearchOptions): string { + const normalized = normalizeGameName(query); + const key = JSON.stringify({ query: normalized, ...options }); + return crypto.createHash("sha256").update(key).digest("hex"); +} + +/** + * Converts WikidataGameResult to GameMetadata + */ +function toGameMetadata(game: WikidataGameResult): GameMetadata { + return { + source: "wikidata", + sourceId: game.qid, + name: game.label, + releaseDate: game.releaseDate, + platforms: game.platforms, + genres: game.genres, + publishers: game.publishers, + series: game.series, + raw: game + }; +} + +/** + * Wikidata metadata provider implementation + */ +export class WikidataProvider implements MetadataProvider { + readonly name = "wikidata"; + private client: WikidataClient; + + constructor(client?: WikidataClient) { + this.client = client ?? new WikidataClient(); + } + + /** + * Search for games by query string + */ + async searchGames( + query: string, + options?: MetadataSearchOptions + ): Promise { + const queryHash = hashQuery(query, options); + + // Try cache first + const cached = getCachedWikidataSearch(queryHash); + if (cached && typeof cached.updatedAt === 'number' && isFresh(cached.updatedAt)) { + const sparqlResponse = JSON.parse(cached.json); + const games = aggregateSparqlResults(sparqlResponse.results.bindings); + const ranked = rankSearchResults(games, query, { platform: options?.platform }); + return ranked.map(toGameMetadata); + } + + // Build and execute SPARQL query + const sparql = buildSearchQuery(query, { limit: options?.limit }); + const response = await this.client.query(sparql); + + // Cache the response + setCachedWikidataSearch(queryHash, JSON.stringify(response)); + + // Process results + const games = aggregateSparqlResults(response.results.bindings); + const ranked = rankSearchResults(games, query, { platform: options?.platform }); + + return ranked.map(toGameMetadata); + } + + /** + * Get game metadata by Wikidata QID + */ + async getGameById(id: string): Promise { + // Normalize QID (remove wd: prefix if present) + const qid = id.replace(/^wd:/, ""); + + // Try cache first + const cached = getCachedWikidataGame(qid); + if (cached && typeof cached.updatedAt === 'number' && isFresh(cached.updatedAt)) { + const sparqlResponse = JSON.parse(cached.json); + const games = aggregateSparqlResults(sparqlResponse.results.bindings); + + if (games.length === 0) { + return null; + } + + return toGameMetadata(games[0]); + } + + // Build and execute SPARQL query + const sparql = buildGetByQidQuery(qid); + const response = await this.client.query(sparql); + + // Cache the response + setCachedWikidataGame(qid, JSON.stringify(response)); + + // Process results + const games = aggregateSparqlResults(response.results.bindings); + + if (games.length === 0) { + return null; + } + + return toGameMetadata(games[0]); + } + + /** + * Check if the Wikidata endpoint is available + */ + async healthCheck(): Promise { + return this.client.healthCheck(); + } +} diff --git a/apps/server/src/providers/wikidata/queryBuilder.ts b/apps/server/src/providers/wikidata/queryBuilder.ts new file mode 100644 index 0000000..c8d16cf --- /dev/null +++ b/apps/server/src/providers/wikidata/queryBuilder.ts @@ -0,0 +1,131 @@ +/** + * SPARQL Query Builder for Wikidata + * + * Builds SPARQL queries to search and fetch video game metadata from Wikidata. + */ + +export type QueryOptions = { + limit?: number; +}; + +/** + * Escapes special characters in SPARQL string literals + */ +function escapeSparql(str: string): string { + return str + .replace(/\\/g, "\\\\") + .replace(/"/g, '\\"') + .replace(/'/g, "\\'") + .replace(/\n/g, "\\n") + .replace(/\r/g, "\\r") + .replace(/\t/g, "\\t"); +} + +/** + * Normalizes a QID to ensure it has the 'wd:' prefix + */ +function normalizeQid(qid: string): string { + return qid.startsWith("wd:") ? qid : `wd:${qid}`; +} + +/** + * Builds a SPARQL query to search for video games by name + * + * Returns games that match the query with their metadata: + * - QID, label, release date, platforms, genres, publishers, series + */ +export function buildSearchQuery(query: string, options: QueryOptions = {}): string { + const limit = options.limit ?? 25; + const escapedQuery = escapeSparql(query.toLowerCase()); + + // Optimized query: Removed GROUP_CONCAT, using rdfs:label for all labels (simpler and faster) + return ` +SELECT DISTINCT ?game ?gameLabel ?releaseDate ?platform ?platformLabel ?genre ?genreLabel ?publisher ?publisherLabel ?series ?seriesLabel +WHERE { + ?game wdt:P31/wdt:P279* wd:Q7889 . + ?game rdfs:label ?gameLabel . + FILTER(LANG(?gameLabel) = "en") + FILTER(CONTAINS(LCASE(?gameLabel), "${escapedQuery}")) + OPTIONAL { ?game wdt:P577 ?releaseDate . } + OPTIONAL { + ?game wdt:P400 ?platform . + ?platform rdfs:label ?platformLabel . + FILTER(LANG(?platformLabel) = "en") + } + OPTIONAL { + ?game wdt:P136 ?genre . + ?genre rdfs:label ?genreLabel . + FILTER(LANG(?genreLabel) = "en") + } + OPTIONAL { + ?game wdt:P123 ?publisher . + ?publisher rdfs:label ?publisherLabel . + FILTER(LANG(?publisherLabel) = "en") + } + OPTIONAL { + ?game wdt:P179 ?series . + ?series rdfs:label ?seriesLabel . + FILTER(LANG(?seriesLabel) = "en") + } +} +LIMIT ${limit * 2} +`.trim(); +} + +/** + * Builds a SPARQL query to fetch a specific game by its Wikidata QID + * + * Returns full metadata for the game if it exists and is a video game + */ +export function buildGetByQidQuery(qid: string): string { + const normalizedQid = normalizeQid(qid); + + // Return individual rows (no GROUP_CONCAT) - aggregation happens client-side + // This avoids timeout issues with complex GROUP_CONCAT operations + return ` +SELECT DISTINCT ?game ?gameLabel ?releaseDate ?platformLabel ?genreLabel ?publisherLabel ?seriesLabel +WHERE { + # Bind the specific game + BIND(${normalizedQid} AS ?game) + + # Validate it's a video game + ?game wdt:P31/wdt:P279* wd:Q7889 . # instance of (subclass of) video game + + # Get labels in English + SERVICE wikibase:label { + bd:serviceParam wikibase:language "en" . + } + + # Publication date (optional) + OPTIONAL { ?game wdt:P577 ?releaseDate . } + + # Platforms (optional) + OPTIONAL { + ?game wdt:P400 ?platform . + ?platform rdfs:label ?platformLabel . + FILTER(LANG(?platformLabel) = "en") + } + + # Genres (optional) + OPTIONAL { + ?game wdt:P136 ?genre . + ?genre rdfs:label ?genreLabel . + FILTER(LANG(?genreLabel) = "en") + } + + # Publishers (optional) + OPTIONAL { + ?game wdt:P123 ?publisher . + ?publisher rdfs:label ?publisherLabel . + FILTER(LANG(?publisherLabel) = "en") + } + + # Series (optional) + OPTIONAL { + ?game wdt:P179 ?series . + ?series rdfs:label ?seriesLabel . + FILTER(LANG(?seriesLabel) = "en") + } +} +`.trim(); +} diff --git a/package.json b/package.json index b27e6d9..7514911 100644 --- a/package.json +++ b/package.json @@ -21,6 +21,7 @@ "typecheck": "npm run typecheck -w @crocdesk/shared && npm run typecheck -w @crocdesk/server && npm run typecheck -w @crocdesk/web && npm run typecheck -w @crocdesk/desktop", "lint": "eslint . --ext .ts,.tsx", "test:unit": "vitest --run", + "test:integration": "vitest --run --config vitest.config.mts apps/server/src/providers/wikidata/__tests__/provider.integration.test.ts", "test:e2e": "npx playwright test", "test:e2e:ui": "npx playwright test --ui", "test": "npm run test:unit && npm run test:e2e", diff --git a/packages/shared/src/index.ts b/packages/shared/src/index.ts index b316008..25e81b7 100644 --- a/packages/shared/src/index.ts +++ b/packages/shared/src/index.ts @@ -1,2 +1,4 @@ export * from "./types"; export * from "./constants"; +export * from "./metadata-types"; +export * from "./wikidata-types"; diff --git a/packages/shared/src/metadata-types.ts b/packages/shared/src/metadata-types.ts new file mode 100644 index 0000000..3a1c1e1 --- /dev/null +++ b/packages/shared/src/metadata-types.ts @@ -0,0 +1,93 @@ +/** + * Metadata Provider Types + * + * Defines the common interface for metadata providers (Wikidata, Crocdb, etc.) + */ + +/** + * Generic game metadata structure used across all providers + */ +export type GameMetadata = { + /** Source of the metadata (e.g., "wikidata", "crocdb") */ + source: string; + + /** Source-specific unique identifier */ + sourceId: string; + + /** Game name/title */ + name: string; + + /** Release date (ISO 8601 format) */ + releaseDate?: string; + + /** Platforms the game is available on */ + platforms?: string[]; + + /** Game genres */ + genres?: string[]; + + /** Publishers */ + publishers?: string[]; + + /** Game series (if part of a franchise) */ + series?: string; + + /** Original raw data from the source */ + raw?: unknown; +}; + +/** + * Search options for metadata providers + */ +export type MetadataSearchOptions = { + /** Limit number of results */ + limit?: number; + + /** Filter by platform */ + platform?: string; + + /** Additional provider-specific options */ + [key: string]: unknown; +}; + +/** + * Health check result for metadata providers + */ +export type HealthCheckResult = { + /** Whether the provider is healthy */ + healthy: boolean; + + /** Optional message */ + message?: string; + + /** Response time in milliseconds */ + responseTime?: number; +}; + +/** + * Common interface that all metadata providers must implement + */ +export interface MetadataProvider { + /** + * Search for games by query string + */ + searchGames( + query: string, + options?: MetadataSearchOptions + ): Promise; + + /** + * Get game metadata by source-specific ID + */ + getGameById(id: string): Promise; + + /** + * Check if the provider is available and healthy + */ + healthCheck(): Promise; + + /** + * Provider name + */ + readonly name: string; +} diff --git a/packages/shared/src/wikidata-types.ts b/packages/shared/src/wikidata-types.ts new file mode 100644 index 0000000..8445eb3 --- /dev/null +++ b/packages/shared/src/wikidata-types.ts @@ -0,0 +1,88 @@ +/** + * Wikidata Provider Types + * + * Types specific to the Wikidata SPARQL metadata provider + */ + +/** + * Raw SPARQL query result binding for a game search + */ +export type WikidataSparqlBinding = { + type: "uri" | "literal" | "bnode"; + value: string; + "xml:lang"?: string; + datatype?: string; +}; + +/** + * Raw SPARQL result row + */ +export type WikidataSparqlResult = { + [key: string]: WikidataSparqlBinding; +}; + +/** + * Raw SPARQL response structure + */ +export type WikidataSparqlResponse = { + head: { + vars: string[]; + }; + results: { + bindings: WikidataSparqlResult[]; + }; +}; + +/** + * Parsed Wikidata game result + */ +export type WikidataGameResult = { + /** Wikidata QID (e.g., "Q12345") */ + qid: string; + + /** Game label/title */ + label: string; + + /** Release date (ISO 8601) */ + releaseDate?: string; + + /** Platform labels */ + platforms?: string[]; + + /** Genre labels */ + genres?: string[]; + + /** Publisher labels */ + publishers?: string[]; + + /** Series label */ + series?: string; +}; + +/** + * Match rank for search results + */ +export enum MatchRank { + /** Exact label match */ + EXACT = 1, + + /** Label starts with query */ + PREFIX = 2, + + /** Label contains query */ + CONTAINS = 3, + + /** No special match */ + NO_MATCH = 4 +} + +/** + * Ranked search result + */ +export type RankedGameResult = WikidataGameResult & { + /** Match rank score */ + rank: MatchRank; + + /** Whether the platform matches (if provided) */ + platformMatch?: boolean; +}; diff --git a/vitest.config.mts b/vitest.config.mts index e5be97f..850d547 100644 --- a/vitest.config.mts +++ b/vitest.config.mts @@ -26,14 +26,17 @@ export default defineConfig({ // Use projects instead of environmentMatchGlobs (deprecated in v4) projects: [ { - name: 'node-tests', test: { + name: 'node-tests', environment: 'node', setupFiles: ['tests/vitest.setup.ts'], include: [ 'apps/server/src/**/*.{test,spec}.{ts,tsx}', 'apps/desktop/src/**/*.{test,spec}.{ts,tsx}', 'packages/**/src/**/*.{test,spec}.{ts,tsx}' + ], + exclude: [ + '**/*.integration.test.{ts,tsx}' ] }, resolve: { @@ -43,8 +46,8 @@ export default defineConfig({ } }, { - name: 'web-tests', test: { + name: 'web-tests', environment: 'jsdom', setupFiles: ['tests/vitest.setup.ts'], include: [