Skip to content

Commit 5b9e26e

Browse files
committed
refactor: improve parse claim types
1 parent 746a6ba commit 5b9e26e

File tree

6 files changed

+250
-151
lines changed

6 files changed

+250
-151
lines changed

scripts/compare_datatypes.ts

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,10 @@
11
#!/usr/bin/env ts-node
22
import { kebabCase } from 'lodash-es'
33
import { red, green } from 'tiny-chalk'
4-
import { parsers } from '../src/helpers/parse_claim.js'
4+
import { DataTypes } from '../src/types/claim.js'
5+
import { isOfType } from '../src/utils/utils.js'
56
import { readJsonFile } from '../tests/lib/utils.js'
67

7-
const supportedTypes = Object.keys(parsers)
8-
98
const allDatatypes = readJsonFile('/tmp/all_wikidata_datatypes.json') as string[]
109
allDatatypes
1110
.map(typeUri => {
@@ -15,7 +14,7 @@ allDatatypes
1514
return kebabCase(typeName)
1615
})
1716
.forEach(type => {
18-
if (supportedTypes.includes(type)) {
17+
if (isOfType(DataTypes, type)) {
1918
console.log(green('ok'), type)
2019
} else {
2120
console.error(red('unsupported type'), type)

src/helpers/parse_claim.ts

Lines changed: 93 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,43 @@
1-
import { convertTime } from './wikibase_time.js'
2-
import type { SimplifySnakOptions } from '../types/simplify_claims.js'
1+
import { convertTime, type TimeConverter, type TimeConverterFn } from './wikibase_time.js'
2+
import type { DataType } from '../types/claim.js'
3+
import type { SnakEntityValue, SnakGlobeCoordinateValue, SnakMonolingualTextValue, SnakQuantityValue, SnakStringValue, SnakTimeValue, SnakValue } from '../types/snakvalue.js'
34

4-
const simple = datavalue => datavalue.value
5+
const simple = <T>(datavalue: { readonly value: T }): T => datavalue.value
56

6-
const monolingualtext = (datavalue, options) => {
7+
const monolingualtext = (datavalue: SnakMonolingualTextValue, options: { readonly keepRichValues?: boolean } = {}) => {
78
return options.keepRichValues ? datavalue.value : datavalue.value.text
89
}
910

10-
const entity = (datavalue, options) => prefixedId(datavalue, options.entityPrefix)
11+
interface SimplifyEntitySnakOptions {
12+
readonly entityPrefix?: string
13+
}
14+
15+
const entity = (datavalue: SnakEntityValue, options: SimplifyEntitySnakOptions = {}) => prefixedId(datavalue, options.entityPrefix)
1116

1217
const entityLetter = {
1318
item: 'Q',
1419
lexeme: 'L',
1520
property: 'P',
1621
} as const
1722

18-
const prefixedId = (datavalue, prefix) => {
23+
const prefixedId = (datavalue: SnakEntityValue, prefix: string | undefined) => {
1924
const { value } = datavalue
20-
const id = value.id || entityLetter[value['entity-type']] + value['numeric-id']
25+
const id = 'id' in value ? value.id : (entityLetter[value['entity-type']] + value['numeric-id'])
2126
return typeof prefix === 'string' ? `${prefix}:${id}` : id
2227
}
2328

24-
const quantity = (datavalue, options) => {
29+
interface SimplifiedQuantity {
30+
amount: number
31+
unit: string
32+
upperBound?: number
33+
lowerBound?: number
34+
}
35+
36+
const quantity = (datavalue: SnakQuantityValue, options: { readonly keepRichValues?: boolean } = {}) => {
2537
const { value } = datavalue
2638
const amount = parseFloat(value.amount)
2739
if (options.keepRichValues) {
28-
const richValue: any = {
40+
const richValue: SimplifiedQuantity = {
2941
amount: parseFloat(value.amount),
3042
// ex: http://www.wikidata.org/entity/
3143
unit: value.unit.replace(/^https?:\/\/.*\/entity\//, ''),
@@ -38,59 +50,93 @@ const quantity = (datavalue, options) => {
3850
}
3951
}
4052

41-
const coordinate = (datavalue, options) => {
53+
const coordinate = (datavalue: SnakGlobeCoordinateValue, options: { readonly keepRichValues?: boolean } = {}) => {
4254
if (options.keepRichValues) {
4355
return datavalue.value
4456
} else {
4557
return [ datavalue.value.latitude, datavalue.value.longitude ]
4658
}
4759
}
4860

49-
const time = (datavalue, options: SimplifySnakOptions) => {
61+
interface SimplifyTimeSnakOptions {
62+
readonly keepRichValues?: boolean
63+
readonly timeConverter?: TimeConverterFn | TimeConverter
64+
}
65+
const time = (datavalue: SnakTimeValue, options: SimplifyTimeSnakOptions = {}) => {
5066
const timeValue = convertTime(options.timeConverter, datavalue.value)
5167
if (options.keepRichValues) {
52-
const { timezone, before, after, precision, calendarmodel } = datavalue.value
53-
return { time: timeValue, timezone, before, after, precision, calendarmodel }
68+
return { ...datavalue.value, time: timeValue }
5469
} else {
5570
return timeValue
5671
}
5772
}
5873

59-
export const parsers = {
60-
commonsMedia: simple,
61-
'external-id': simple,
62-
'geo-shape': simple,
63-
'globe-coordinate': coordinate,
64-
math: simple,
65-
monolingualtext,
66-
'musical-notation': simple,
67-
quantity,
68-
string: simple,
69-
'tabular-data': simple,
70-
time,
71-
url: simple,
72-
'wikibase-entityid': entity,
73-
'wikibase-form': entity,
74-
'wikibase-item': entity,
75-
'wikibase-lexeme': entity,
76-
'wikibase-property': entity,
77-
'wikibase-sense': entity,
78-
} as const
74+
type SimplifySnakOptions = SimplifyTimeSnakOptions & SimplifyEntitySnakOptions
7975

80-
export function parseClaim (datatype, datavalue, options, claimId) {
76+
export function parseClaim (
77+
datatype: DataType | undefined,
78+
datavalue: SnakValue,
79+
options: SimplifySnakOptions,
80+
claimId: string,
81+
) {
8182
// Known case of missing datatype: form.claims, sense.claims
82-
datatype = datatype || datavalue.type
83-
// Known case requiring this: legacy "muscial notation" datatype
84-
datatype = datatype.replace(' ', '-')
85-
86-
try {
87-
return parsers[datatype](datavalue, options)
88-
} catch (err: unknown) {
89-
if (err instanceof Error && err.message === 'parsers[datatype] is not a function') {
90-
err.message = `${datatype} claim parser isn't implemented
91-
Claim id: ${claimId}
92-
Please report to https://github.com/maxlath/wikibase-sdk/issues`
93-
}
94-
throw err
83+
// datavalue.type is used then
84+
85+
// @ts-expect-error known case requiring this: legacy "musical notation" datatype
86+
datatype = datatype?.replace(' ', '-')
87+
88+
if (
89+
datatype === 'wikibase-form' ||
90+
datatype === 'wikibase-item' ||
91+
datatype === 'wikibase-lexeme' ||
92+
datatype === 'wikibase-property' ||
93+
datatype === 'wikibase-sense' ||
94+
datavalue.type === 'wikibase-entityid'
95+
) {
96+
return entity(datavalue as SnakEntityValue, options)
97+
}
98+
99+
if (datatype === 'globe-coordinate' || datavalue.type === 'globecoordinate') {
100+
return coordinate(datavalue as SnakGlobeCoordinateValue, options)
95101
}
102+
103+
if (datatype === 'monolingualtext' || datavalue.type === 'monolingualtext') {
104+
return monolingualtext(datavalue as SnakMonolingualTextValue, options)
105+
}
106+
107+
if (datatype === 'quantity' || datavalue.type === 'quantity') {
108+
return quantity(datavalue as SnakQuantityValue, options)
109+
}
110+
111+
if (datatype === 'time' || datavalue.type === 'time') {
112+
return time(datavalue as SnakTimeValue, options)
113+
}
114+
115+
if (
116+
datatype === 'commonsMedia' ||
117+
datatype === 'external-id' ||
118+
datatype === 'geo-shape' ||
119+
datatype === 'math' ||
120+
datatype === 'musical-notation' ||
121+
datatype === 'string' ||
122+
datatype === 'tabular-data' ||
123+
datatype === 'url' ||
124+
datavalue.type === 'string'
125+
) {
126+
// eslint-disable-next-line @typescript-eslint/no-unnecessary-type-assertion
127+
return simple(datavalue as SnakStringValue)
128+
}
129+
130+
unknownClaimType(datatype, datavalue, claimId)
131+
}
132+
133+
// TypeScript notices when the argument isnt `never` and does not compile in that case -> some case is not implemented
134+
function unknownClaimType (
135+
datatype: never,
136+
datavalue: { readonly type: never },
137+
claimId: string,
138+
): never {
139+
const minimal = String(datatype) || String(datavalue)
140+
const full = JSON.stringify({ datatype, datavalue })
141+
throw new Error(`${minimal} claim parser isn't implemented\nPlease report to https://github.com/maxlath/wikibase-sdk/issues\n\nClaim id: ${claimId}\n${full}`)
96142
}

src/helpers/simplify_claims.ts

Lines changed: 54 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,15 @@
11
import { uniq } from '../utils/utils.js'
22
import { parseClaim } from './parse_claim.js'
33
import { truthyPropertyClaims, nonDeprecatedPropertyClaims } from './rank.js'
4-
import type { Claim, Claims, PropertyClaims, PropertyQualifiers, Qualifier, Qualifiers, Reference } from '../types/claim.js'
4+
import type { Claim, Claims, DataType, PropertyClaims, PropertyQualifiers, QualifierSnak, Qualifiers, Reference, ReferenceSnak, SnakType } from '../types/claim.js'
5+
import type { PropertyId } from '../types/entity.js'
56
import type { SimplifiedClaim, SimplifiedClaims, SimplifiedPropertyClaims, SimplifySnakOptions, SimplifySnaksOptions } from '../types/simplify_claims.js'
7+
import type { SnakValue } from '../types/snakvalue.js'
68

7-
function simplifySnaks (snaks, options) {
9+
function simplifySnaks (snaks: Record<PropertyId, any[]>, options: SimplifySnaksOptions) {
810
const { propertyPrefix } = options
911
const simplifiedSnaks: any = {}
10-
for (let id in snaks) {
11-
const propertySnaks = snaks[id]
12+
for (let [ id, propertySnaks ] of Object.entries(snaks)) {
1213
if (propertyPrefix) {
1314
id = propertyPrefix + ':' + id
1415
}
@@ -17,7 +18,7 @@ function simplifySnaks (snaks, options) {
1718
return simplifiedSnaks
1819
}
1920

20-
function simplifyPropertySnaks (propertySnaks, options) {
21+
function simplifyPropertySnaks (propertySnaks: any[], options: SimplifySnaksOptions) {
2122
// Avoid to throw on empty inputs to allow to simplify claims array
2223
// without having to know if the entity as claims for this property
2324
// Ex: simplifyPropertyClaims(entity.claims.P124211616)
@@ -31,35 +32,40 @@ function simplifyPropertySnaks (propertySnaks, options) {
3132
propertySnaks = truthyPropertyClaims(propertySnaks)
3233
}
3334

34-
propertySnaks = propertySnaks
35+
const simplified = propertySnaks
3536
.map(claim => simplifyClaim(claim, options))
3637
// Filter-out novalue and somevalue claims,
3738
// unless a novalueValue or a somevalueValue is passed in options
3839
// Considers null as defined
3940
.filter(obj => obj !== undefined)
4041

4142
// Deduplicate values unless we return a rich value object
42-
if (propertySnaks[0] && typeof propertySnaks[0] !== 'object') {
43-
return uniq(propertySnaks)
43+
if (simplified[0] && typeof simplified[0] !== 'object') {
44+
return uniq(simplified)
4445
} else {
45-
return propertySnaks
46+
return simplified
4647
}
4748
}
4849

49-
// Expects a single snak object
50-
// Ex: entity.claims.P369[0]
51-
function simplifySnak (claim, options) {
50+
/**
51+
* tries to replace wikidata deep claim object by a simple value
52+
* e.g. a string, an entity Qid or an epoch time number
53+
*
54+
* Expects a single snak object
55+
* Ex: entity.claims.P369[0]
56+
*/
57+
function simplifySnak (claim: Claim | QualifierSnak | ReferenceSnak, options: SimplifySnakOptions) {
5258
const { keepQualifiers, keepReferences, keepIds, keepHashes, keepTypes, keepSnaktypes, keepRanks } = parseKeepOptions(options)
5359

54-
// tries to replace wikidata deep claim object by a simple value
55-
// e.g. a string, an entity Qid or an epoch time number
56-
const { mainsnak, rank } = claim
57-
58-
let value, datatype, datavalue, snaktype, isQualifierSnak, isReferenceSnak
59-
if (mainsnak) {
60-
datatype = mainsnak.datatype
61-
datavalue = mainsnak.datavalue
62-
snaktype = mainsnak.snaktype
60+
let datatype: DataType | undefined
61+
let datavalue: SnakValue
62+
let snaktype: SnakType
63+
let isQualifierSnak: boolean
64+
let isReferenceSnak: boolean
65+
if ('mainsnak' in claim) {
66+
datatype = claim.mainsnak.datatype
67+
datavalue = claim.mainsnak.datavalue
68+
snaktype = claim.mainsnak.snaktype
6369
} else {
6470
// Qualifiers have no mainsnak, and define datatype, datavalue on claim
6571
datavalue = claim.datavalue
@@ -70,6 +76,7 @@ function simplifySnak (claim, options) {
7076
else isReferenceSnak = true
7177
}
7278

79+
let value: any
7380
if (datavalue) {
7481
value = parseClaim(datatype, datavalue, options, claim.id)
7582
} else {
@@ -84,7 +91,7 @@ function simplifySnak (claim, options) {
8491

8592
const valueObj: any = { value }
8693

87-
if (keepHashes) valueObj.hash = claim.hash
94+
if (keepHashes && 'hash' in claim) valueObj.hash = claim.hash
8895
if (keepTypes) valueObj.type = datatype
8996
if (keepSnaktypes) valueObj.snaktype = snaktype
9097

@@ -109,18 +116,16 @@ function simplifySnak (claim, options) {
109116

110117
if (keepSnaktypes) valueObj.snaktype = snaktype
111118

112-
if (keepRanks) valueObj.rank = rank
119+
if (keepRanks && 'rank' in claim) valueObj.rank = claim.rank
113120

114-
const subSnaksOptions = getSubSnakOptions(options)
115-
subSnaksOptions.keepHashes = keepHashes
121+
const subSnaksOptions = { ...options, areSubSnaks: true }
116122

117123
if (keepQualifiers) {
118-
valueObj.qualifiers = simplifyQualifiers(claim.qualifiers, subSnaksOptions)
124+
valueObj.qualifiers = 'qualifiers' in claim ? simplifyQualifiers(claim.qualifiers, subSnaksOptions) : {}
119125
}
120126

121127
if (keepReferences) {
122-
claim.references = claim.references || []
123-
valueObj.references = simplifyReferences(claim.references, subSnaksOptions)
128+
valueObj.references = 'references' in claim ? simplifyReferences(claim.references, subSnaksOptions) : []
124129
}
125130

126131
if (keepIds) valueObj.id = claim.id
@@ -139,38 +144,42 @@ export function simplifyClaim (claim: Claim, options: SimplifySnakOptions = {}):
139144
}
140145

141146
export function simplifyQualifiers (qualifiers: Qualifiers, options: SimplifySnaksOptions = {}) {
142-
return simplifySnaks(qualifiers, getSubSnakOptions(options))
147+
return simplifySnaks(qualifiers, { ...options, areSubSnaks: true })
143148
}
144149
export function simplifyPropertyQualifiers (propertyQualifiers: PropertyQualifiers, options: SimplifySnaksOptions = {}) {
145-
return simplifyPropertySnaks(propertyQualifiers, getSubSnakOptions(options))
150+
return simplifyPropertySnaks(propertyQualifiers, { ...options, areSubSnaks: true })
146151
}
147-
export function simplifyQualifier (qualifier: Qualifier, options: SimplifySnakOptions = {}) {
152+
export function simplifyQualifier (qualifier: QualifierSnak, options: SimplifySnakOptions = {}) {
148153
return simplifySnak(qualifier, options)
149154
}
150155

151-
export function simplifyReferences (references: Reference[], options) {
156+
export function simplifyReferences (references: Reference[], options: SimplifySnaksOptions) {
152157
return references.map(refRecord => simplifyReferenceRecord(refRecord, options))
153158
}
154-
export function simplifyReferenceRecord (refRecord, options) {
155-
const subSnaksOptions = getSubSnakOptions(options)
159+
export function simplifyReferenceRecord (refRecord: Reference, options: SimplifySnaksOptions) {
160+
const subSnaksOptions = { ...options, areSubSnaks: true }
156161
const snaks = simplifySnaks(refRecord.snaks, subSnaksOptions)
157162
if (subSnaksOptions.keepHashes) return { snaks, hash: refRecord.hash }
158163
else return snaks
159164
}
160165

161-
const getSubSnakOptions = (options: any = {}) => {
162-
if (options.areSubSnaks) return options
163-
// Using a new object so that the original options object isn't modified
164-
else return Object.assign({}, options, { areSubSnaks: true })
165-
}
166-
167-
const keepOptions = [ 'keepQualifiers', 'keepReferences', 'keepIds', 'keepHashes', 'keepTypes', 'keepSnaktypes', 'keepRanks', 'keepRichValues' ]
168-
169-
const parseKeepOptions = options => {
166+
const keepOptions = [
167+
'keepHashes',
168+
'keepIds',
169+
'keepQualifiers',
170+
'keepRanks',
171+
'keepReferences',
172+
'keepRichValues',
173+
'keepSnaktypes',
174+
'keepTypes',
175+
] as const
176+
type KeepOption = typeof keepOptions[number]
177+
178+
const parseKeepOptions = (options: SimplifySnakOptions): Record<KeepOption, boolean> => {
170179
if (options.keepAll) {
171180
keepOptions.forEach(optionName => {
172-
if (options[optionName] == null) options[optionName] = true
181+
options[optionName] = options[optionName] ?? true
173182
})
174183
}
175-
return options
184+
return options as Record<KeepOption, boolean>
176185
}

0 commit comments

Comments
 (0)