diff --git a/.gitignore b/.gitignore index 3c3629e..277442b 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ node_modules +**/*.csv diff --git a/article.schema.json b/article.schema.json index adc7191..dde53ea 100644 --- a/article.schema.json +++ b/article.schema.json @@ -1,6 +1,6 @@ { "$schema": "https://json-schema.org/draft/2020-12/schema", - "$id": "https://raw.githubusercontent.com/CondeNast/tny-archive-schemas/v1.3.2/article.schema.json", + "$id": "https://raw.githubusercontent.com/CondeNast/tny-archive-schemas/v1.3.3/article.schema.json", "title": "Article", "description": "A New Yorker article.", "type": "object", @@ -474,6 +474,21 @@ }, "anyEvent": { "allOf": [ + { + "if": { + "properties": { + "@type": { + "const": "BroadcastEvent" + } + }, + "required": [ + "@type" + ] + }, + "then": { + "$ref": "#/$defs/schema.org/BroadcastEvent" + } + }, { "if": { "properties": { @@ -583,6 +598,16 @@ "if": { "not": { "anyOf": [ + { + "properties": { + "@type": { + "const": "BroadcastEvent" + } + }, + "required": [ + "@type" + ] + }, { "properties": { "@type": { @@ -1374,6 +1399,21 @@ "$ref": "#/$defs/schema.org/Book" } }, + { + "if": { + "properties": { + "@type": { + "const": "BroadcastEvent" + } + }, + "required": [ + "@type" + ] + }, + "then": { + "$ref": "#/$defs/schema.org/BroadcastEvent" + } + }, { "if": { "properties": { @@ -1648,6 +1688,16 @@ "@type" ] }, + { + "properties": { + "@type": { + "const": "BroadcastEvent" + } + }, + "required": [ + "@type" + ] + }, { "properties": { "@type": { diff --git a/package-lock.json b/package-lock.json index 1df2b1c..42d2b21 100644 --- a/package-lock.json +++ b/package-lock.json @@ -7,7 +7,11 @@ "name": "tny-archive-schemas", "license": "ISC", "dependencies": { - "@cfworker/json-schema": "3.0.1" + "@cfworker/json-schema": "3.0.1", + "chalk": "^5.4.0", + "fast-xml-parser": "^4.5.1", + "papaparse": "^5.4.1", + "xsd-validator": "^1.1.1" }, "devDependencies": { "prettier": "3.4.2" @@ -23,6 +27,504 @@ "integrity": "sha512-fjgm7yhAaUVp7Lp4WGTiuBXzXgUoKWxVEFEte0YWdPIo6tWMxL3Jv7zrGSaxd/4fihLaRJEUsK1aMkGdAEcqUg==", "license": "MIT" }, + "node_modules/@mapbox/node-pre-gyp": { + "version": "1.0.11", + "resolved": "https://registry.npmjs.org/@mapbox/node-pre-gyp/-/node-pre-gyp-1.0.11.tgz", + "integrity": "sha512-Yhlar6v9WQgUp/He7BdgzOz8lqMQ8sU+jkCq7Wx8Myc5YFJLbEe7lgui/V7G1qB1DJykHSGwreceSaD60Y0PUQ==", + "license": "BSD-3-Clause", + "dependencies": { + "detect-libc": "^2.0.0", + "https-proxy-agent": "^5.0.0", + "make-dir": "^3.1.0", + "node-fetch": "^2.6.7", + "nopt": "^5.0.0", + "npmlog": "^5.0.1", + "rimraf": "^3.0.2", + "semver": "^7.3.5", + "tar": "^6.1.11" + }, + "bin": { + "node-pre-gyp": "bin/node-pre-gyp" + } + }, + "node_modules/abbrev": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/abbrev/-/abbrev-1.1.1.tgz", + "integrity": "sha512-nne9/IiQ/hzIhY6pdDnbBtz7DjPTKrY00P/zvPSm5pOFkl6xuGrGnXn/VtTNNfNtAfZ9/1RtehkszU9qcTii0Q==", + "license": "ISC" + }, + "node_modules/agent-base": { + "version": "6.0.2", + "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-6.0.2.tgz", + "integrity": "sha512-RZNwNclF7+MS/8bDg70amg32dyeZGZxiDuQmZxKLAlQjr3jGyLx+4Kkk58UO7D2QdgFIQCovuSuZESne6RG6XQ==", + "license": "MIT", + "dependencies": { + "debug": "4" + }, + "engines": { + "node": ">= 6.0.0" + } + }, + "node_modules/ansi-regex": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", + "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==", + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/aproba": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/aproba/-/aproba-2.0.0.tgz", + "integrity": "sha512-lYe4Gx7QT+MKGbDsA+Z+he/Wtef0BiwDOlK/XkBrdfsh9J/jPPXbX0tE9x9cl27Tmu5gg3QUbUrQYa/y+KOHPQ==", + "license": "ISC" + }, + "node_modules/are-we-there-yet": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/are-we-there-yet/-/are-we-there-yet-2.0.0.tgz", + "integrity": "sha512-Ci/qENmwHnsYo9xKIcUJN5LeDKdJ6R1Z1j9V/J5wyq8nh/mYPEpIKJbBZXtZjG04HiK7zV/p6Vs9952MrMeUIw==", + "deprecated": "This package is no longer supported.", + "license": "ISC", + "dependencies": { + "delegates": "^1.0.0", + "readable-stream": "^3.6.0" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/balanced-match": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz", + "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==", + "license": "MIT" + }, + "node_modules/bindings": { + "version": "1.3.1", + "resolved": "https://registry.npmjs.org/bindings/-/bindings-1.3.1.tgz", + "integrity": "sha512-i47mqjF9UbjxJhxGf+pZ6kSxrnI3wBLlnGI2ArWJ4r0VrvDS7ZYXkprq/pLaBWYq4GM0r4zdHY+NNRqEMU7uew==", + "license": "MIT" + }, + "node_modules/brace-expansion": { + "version": "1.1.11", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz", + "integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==", + "license": "MIT", + "dependencies": { + "balanced-match": "^1.0.0", + "concat-map": "0.0.1" + } + }, + "node_modules/chalk": { + "version": "5.4.0", + "resolved": "https://registry.npmjs.org/chalk/-/chalk-5.4.0.tgz", + "integrity": "sha512-ZkD35Mx92acjB2yNJgziGqT9oKHEOxjTBTDRpOsRWtdecL/0jM3z5kM/CTzHWvHIen1GvkM85p6TuFfDGfc8/Q==", + "license": "MIT", + "engines": { + "node": "^12.17.0 || ^14.13 || >=16.0.0" + }, + "funding": { + "url": "https://github.com/chalk/chalk?sponsor=1" + } + }, + "node_modules/chownr": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/chownr/-/chownr-2.0.0.tgz", + "integrity": "sha512-bIomtDF5KGpdogkLd9VspvFzk9KfpyyGlS8YFVZl7TGPBHL5snIOnxeshwVgPteQ9b4Eydl+pVbIyE1DcvCWgQ==", + "license": "ISC", + "engines": { + "node": ">=10" + } + }, + "node_modules/color-support": { + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/color-support/-/color-support-1.1.3.tgz", + "integrity": "sha512-qiBjkpbMLO/HL68y+lh4q0/O1MZFj2RX6X/KmMa3+gJD3z+WwI1ZzDHysvqHGS3mP6mznPckpXmw1nI9cJjyRg==", + "license": "ISC", + "bin": { + "color-support": "bin.js" + } + }, + "node_modules/concat-map": { + "version": "0.0.1", + "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz", + "integrity": "sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg==", + "license": "MIT" + }, + "node_modules/console-control-strings": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/console-control-strings/-/console-control-strings-1.1.0.tgz", + "integrity": "sha512-ty/fTekppD2fIwRvnZAVdeOiGd1c7YXEixbgJTNzqcxJWKQnjJ/V1bNEEE6hygpM3WjwHFUVK6HTjWSzV4a8sQ==", + "license": "ISC" + }, + "node_modules/debug": { + "version": "4.4.0", + "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.0.tgz", + "integrity": "sha512-6WTZ/IxCY/T6BALoZHaE4ctp9xm+Z5kY/pzYaCHRFeyVhojxlrm+46y68HA6hr0TcwEssoxNiDEUJQjfPZ/RYA==", + "license": "MIT", + "dependencies": { + "ms": "^2.1.3" + }, + "engines": { + "node": ">=6.0" + }, + "peerDependenciesMeta": { + "supports-color": { + "optional": true + } + } + }, + "node_modules/delegates": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/delegates/-/delegates-1.0.0.tgz", + "integrity": "sha512-bd2L678uiWATM6m5Z1VzNCErI3jiGzt6HGY8OVICs40JQq/HALfbyNJmp0UDakEY4pMMaN0Ly5om/B1VI/+xfQ==", + "license": "MIT" + }, + "node_modules/detect-libc": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/detect-libc/-/detect-libc-2.0.3.tgz", + "integrity": "sha512-bwy0MGW55bG41VqxxypOsdSdGqLwXPI/focwgTYCFMbdUiBAxLg9CFzG08sz2aqzknwiX7Hkl0bQENjg8iLByw==", + "license": "Apache-2.0", + "engines": { + "node": ">=8" + } + }, + "node_modules/emoji-regex": { + "version": "8.0.0", + "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", + "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==", + "license": "MIT" + }, + "node_modules/fast-xml-parser": { + "version": "4.5.1", + "resolved": "https://registry.npmjs.org/fast-xml-parser/-/fast-xml-parser-4.5.1.tgz", + "integrity": "sha512-y655CeyUQ+jj7KBbYMc4FG01V8ZQqjN+gDYGJ50RtfsUB8iG9AmwmwoAgeKLJdmueKKMrH1RJ7yXHTSoczdv5w==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/NaturalIntelligence" + }, + { + "type": "paypal", + "url": "https://paypal.me/naturalintelligence" + } + ], + "license": "MIT", + "dependencies": { + "strnum": "^1.0.5" + }, + "bin": { + "fxparser": "src/cli/cli.js" + } + }, + "node_modules/fs-minipass": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/fs-minipass/-/fs-minipass-2.1.0.tgz", + "integrity": "sha512-V/JgOLFCS+R6Vcq0slCuaeWEdNC3ouDlJMNIsacH2VtALiu9mV4LPrHc5cDl8k5aw6J8jwgWWpiTo5RYhmIzvg==", + "license": "ISC", + "dependencies": { + "minipass": "^3.0.0" + }, + "engines": { + "node": ">= 8" + } + }, + "node_modules/fs-minipass/node_modules/minipass": { + "version": "3.3.6", + "resolved": "https://registry.npmjs.org/minipass/-/minipass-3.3.6.tgz", + "integrity": "sha512-DxiNidxSEK+tHG6zOIklvNOwm3hvCrbUrdtzY74U6HKTJxvIDfOUL5W5P2Ghd3DTkhhKPYGqeNUIh5qcM4YBfw==", + "license": "ISC", + "dependencies": { + "yallist": "^4.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/fs.realpath": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz", + "integrity": "sha512-OO0pH2lK6a0hZnAdau5ItzHPI6pUlvI7jMVnxUQRtw4owF2wk8lOSabtGDCTP4Ggrg2MbGnWO9X8K1t4+fGMDw==", + "license": "ISC" + }, + "node_modules/gauge": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/gauge/-/gauge-3.0.2.tgz", + "integrity": "sha512-+5J6MS/5XksCuXq++uFRsnUd7Ovu1XenbeuIuNRJxYWjgQbPuFhT14lAvsWfqfAmnwluf1OwMjz39HjfLPci0Q==", + "deprecated": "This package is no longer supported.", + "license": "ISC", + "dependencies": { + "aproba": "^1.0.3 || ^2.0.0", + "color-support": "^1.1.2", + "console-control-strings": "^1.0.0", + "has-unicode": "^2.0.1", + "object-assign": "^4.1.1", + "signal-exit": "^3.0.0", + "string-width": "^4.2.3", + "strip-ansi": "^6.0.1", + "wide-align": "^1.1.2" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/glob": { + "version": "7.2.3", + "resolved": "https://registry.npmjs.org/glob/-/glob-7.2.3.tgz", + "integrity": "sha512-nFR0zLpU2YCaRxwoCJvL6UvCH2JFyFVIvwTLsIf21AuHlMskA1hhTdk+LlYJtOlYt9v6dvszD2BGRqBL+iQK9Q==", + "deprecated": "Glob versions prior to v9 are no longer supported", + "license": "ISC", + "dependencies": { + "fs.realpath": "^1.0.0", + "inflight": "^1.0.4", + "inherits": "2", + "minimatch": "^3.1.1", + "once": "^1.3.0", + "path-is-absolute": "^1.0.0" + }, + "engines": { + "node": "*" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/has-unicode": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/has-unicode/-/has-unicode-2.0.1.tgz", + "integrity": "sha512-8Rf9Y83NBReMnx0gFzA8JImQACstCYWUplepDa9xprwwtmgEZUF0h/i5xSA625zB/I37EtrswSST6OXxwaaIJQ==", + "license": "ISC" + }, + "node_modules/https-proxy-agent": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-5.0.1.tgz", + "integrity": "sha512-dFcAjpTQFgoLMzC2VwU+C/CbS7uRL0lWmxDITmqm7C+7F0Odmj6s9l6alZc6AELXhrnggM2CeWSXHGOdX2YtwA==", + "license": "MIT", + "dependencies": { + "agent-base": "6", + "debug": "4" + }, + "engines": { + "node": ">= 6" + } + }, + "node_modules/inflight": { + "version": "1.0.6", + "resolved": "https://registry.npmjs.org/inflight/-/inflight-1.0.6.tgz", + "integrity": "sha512-k92I/b08q4wvFscXCLvqfsHCrjrF7yiXsQuIVvVE7N82W3+aqpzuUdBbfhWcy/FZR3/4IgflMgKLOsvPDrGCJA==", + "deprecated": "This module is not supported, and leaks memory. Do not use it. Check out lru-cache if you want a good and tested way to coalesce async requests by a key value, which is much more comprehensive and powerful.", + "license": "ISC", + "dependencies": { + "once": "^1.3.0", + "wrappy": "1" + } + }, + "node_modules/inherits": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz", + "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==", + "license": "ISC" + }, + "node_modules/is-fullwidth-code-point": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz", + "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==", + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/libxmljs": { + "version": "1.0.11", + "resolved": "https://registry.npmjs.org/libxmljs/-/libxmljs-1.0.11.tgz", + "integrity": "sha512-ChqXkhZuvhbjariwPakKs/h+dF5Pe7j+QJ/PmTidzx7mDiFa5chhy7806PQiO+VnBJZ5mVLVq4Dk+W7fZP6luw==", + "hasInstallScript": true, + "license": "MIT", + "dependencies": { + "@mapbox/node-pre-gyp": "^1.0.9", + "bindings": "~1.3.0", + "nan": "^2.17.0" + }, + "engines": { + "node": ">=0.8.0" + } + }, + "node_modules/make-dir": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/make-dir/-/make-dir-3.1.0.tgz", + "integrity": "sha512-g3FeP20LNwhALb/6Cz6Dd4F2ngze0jz7tbzrD2wAV+o9FeNHe4rL+yK2md0J/fiSf1sa1ADhXqi5+oVwOM/eGw==", + "license": "MIT", + "dependencies": { + "semver": "^6.0.0" + }, + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/make-dir/node_modules/semver": { + "version": "6.3.1", + "resolved": "https://registry.npmjs.org/semver/-/semver-6.3.1.tgz", + "integrity": "sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA==", + "license": "ISC", + "bin": { + "semver": "bin/semver.js" + } + }, + "node_modules/minimatch": { + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz", + "integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==", + "license": "ISC", + "dependencies": { + "brace-expansion": "^1.1.7" + }, + "engines": { + "node": "*" + } + }, + "node_modules/minipass": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/minipass/-/minipass-5.0.0.tgz", + "integrity": "sha512-3FnjYuehv9k6ovOEbyOswadCDPX1piCfhV8ncmYtHOjuPwylVWsghTLo7rabjC3Rx5xD4HDx8Wm1xnMF7S5qFQ==", + "license": "ISC", + "engines": { + "node": ">=8" + } + }, + "node_modules/minizlib": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/minizlib/-/minizlib-2.1.2.tgz", + "integrity": "sha512-bAxsR8BVfj60DWXHE3u30oHzfl4G7khkSuPW+qvpd7jFRHm7dLxOjUk1EHACJ/hxLY8phGJ0YhYHZo7jil7Qdg==", + "license": "MIT", + "dependencies": { + "minipass": "^3.0.0", + "yallist": "^4.0.0" + }, + "engines": { + "node": ">= 8" + } + }, + "node_modules/minizlib/node_modules/minipass": { + "version": "3.3.6", + "resolved": "https://registry.npmjs.org/minipass/-/minipass-3.3.6.tgz", + "integrity": "sha512-DxiNidxSEK+tHG6zOIklvNOwm3hvCrbUrdtzY74U6HKTJxvIDfOUL5W5P2Ghd3DTkhhKPYGqeNUIh5qcM4YBfw==", + "license": "ISC", + "dependencies": { + "yallist": "^4.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/mkdirp": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-1.0.4.tgz", + "integrity": "sha512-vVqVZQyf3WLx2Shd0qJ9xuvqgAyKPLAiqITEtqW0oIUjzo3PePDd6fW9iFz30ef7Ysp/oiWqbhszeGWW2T6Gzw==", + "license": "MIT", + "bin": { + "mkdirp": "bin/cmd.js" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/ms": { + "version": "2.1.3", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", + "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", + "license": "MIT" + }, + "node_modules/nan": { + "version": "2.22.0", + "resolved": "https://registry.npmjs.org/nan/-/nan-2.22.0.tgz", + "integrity": "sha512-nbajikzWTMwsW+eSsNm3QwlOs7het9gGJU5dDZzRTQGk03vyBOauxgI4VakDzE0PtsGTmXPsXTbbjVhRwR5mpw==", + "license": "MIT" + }, + "node_modules/node-fetch": { + "version": "2.7.0", + "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.7.0.tgz", + "integrity": "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==", + "license": "MIT", + "dependencies": { + "whatwg-url": "^5.0.0" + }, + "engines": { + "node": "4.x || >=6.0.0" + }, + "peerDependencies": { + "encoding": "^0.1.0" + }, + "peerDependenciesMeta": { + "encoding": { + "optional": true + } + } + }, + "node_modules/nopt": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/nopt/-/nopt-5.0.0.tgz", + "integrity": "sha512-Tbj67rffqceeLpcRXrT7vKAN8CwfPeIBgM7E6iBkmKLV7bEMwpGgYLGv0jACUsECaa/vuxP0IjEont6umdMgtQ==", + "license": "ISC", + "dependencies": { + "abbrev": "1" + }, + "bin": { + "nopt": "bin/nopt.js" + }, + "engines": { + "node": ">=6" + } + }, + "node_modules/npmlog": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/npmlog/-/npmlog-5.0.1.tgz", + "integrity": "sha512-AqZtDUWOMKs1G/8lwylVjrdYgqA4d9nu8hc+0gzRxlDb1I10+FHBGMXs6aiQHFdCUUlqH99MUMuLfzWDNDtfxw==", + "deprecated": "This package is no longer supported.", + "license": "ISC", + "dependencies": { + "are-we-there-yet": "^2.0.0", + "console-control-strings": "^1.1.0", + "gauge": "^3.0.0", + "set-blocking": "^2.0.0" + } + }, + "node_modules/object-assign": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz", + "integrity": "sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg==", + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/once": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz", + "integrity": "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==", + "license": "ISC", + "dependencies": { + "wrappy": "1" + } + }, + "node_modules/papaparse": { + "version": "5.4.1", + "resolved": "https://registry.npmjs.org/papaparse/-/papaparse-5.4.1.tgz", + "integrity": "sha512-HipMsgJkZu8br23pW15uvo6sib6wne/4woLZPlFf3rpDyMe9ywEXUsuD7+6K9PRkJlVT51j/sCOYDKGGS3ZJrw==", + "license": "MIT" + }, + "node_modules/path-is-absolute": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/path-is-absolute/-/path-is-absolute-1.0.1.tgz", + "integrity": "sha512-AVbw3UJ2e9bq64vSaS9Am0fje1Pa8pbGqTTsmXfaIiMpnr5DlDhfJOuLj9Sf95ZPVDAUerDfEk88MPmPe7UCQg==", + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/prettier": { "version": "3.4.2", "resolved": "https://registry.npmjs.org/prettier/-/prettier-3.4.2.tgz", @@ -38,6 +540,199 @@ "funding": { "url": "https://github.com/prettier/prettier?sponsor=1" } + }, + "node_modules/readable-stream": { + "version": "3.6.2", + "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.2.tgz", + "integrity": "sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA==", + "license": "MIT", + "dependencies": { + "inherits": "^2.0.3", + "string_decoder": "^1.1.1", + "util-deprecate": "^1.0.1" + }, + "engines": { + "node": ">= 6" + } + }, + "node_modules/rimraf": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/rimraf/-/rimraf-3.0.2.tgz", + "integrity": "sha512-JZkJMZkAGFFPP2YqXZXPbMlMBgsxzE8ILs4lMIX/2o0L9UBw9O/Y3o6wFw/i9YLapcUJWwqbi3kdxIPdC62TIA==", + "deprecated": "Rimraf versions prior to v4 are no longer supported", + "license": "ISC", + "dependencies": { + "glob": "^7.1.3" + }, + "bin": { + "rimraf": "bin.js" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/safe-buffer": { + "version": "5.2.1", + "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz", + "integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "MIT" + }, + "node_modules/semver": { + "version": "7.6.3", + "resolved": "https://registry.npmjs.org/semver/-/semver-7.6.3.tgz", + "integrity": "sha512-oVekP1cKtI+CTDvHWYFUcMtsK/00wmAEfyqKfNdARm8u1wNVhSgaX7A8d4UuIlUI5e84iEwOhs7ZPYRmzU9U6A==", + "license": "ISC", + "bin": { + "semver": "bin/semver.js" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/set-blocking": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/set-blocking/-/set-blocking-2.0.0.tgz", + "integrity": "sha512-KiKBS8AnWGEyLzofFfmvKwpdPzqiy16LvQfK3yv/fVH7Bj13/wl3JSR1J+rfgRE9q7xUJK4qvgS8raSOeLUehw==", + "license": "ISC" + }, + "node_modules/signal-exit": { + "version": "3.0.7", + "resolved": "https://registry.npmjs.org/signal-exit/-/signal-exit-3.0.7.tgz", + "integrity": "sha512-wnD2ZE+l+SPC/uoS0vXeE9L1+0wuaMqKlfz9AMUo38JsyLSBWSFcHR1Rri62LZc12vLr1gb3jl7iwQhgwpAbGQ==", + "license": "ISC" + }, + "node_modules/string_decoder": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.3.0.tgz", + "integrity": "sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA==", + "license": "MIT", + "dependencies": { + "safe-buffer": "~5.2.0" + } + }, + "node_modules/string-width": { + "version": "4.2.3", + "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz", + "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==", + "license": "MIT", + "dependencies": { + "emoji-regex": "^8.0.0", + "is-fullwidth-code-point": "^3.0.0", + "strip-ansi": "^6.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/strip-ansi": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", + "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", + "license": "MIT", + "dependencies": { + "ansi-regex": "^5.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/strnum": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/strnum/-/strnum-1.0.5.tgz", + "integrity": "sha512-J8bbNyKKXl5qYcR36TIO8W3mVGVHrmmxsd5PAItGkmyzwJvybiw2IVq5nqd0i4LSNSkB/sx9VHllbfFdr9k1JA==", + "license": "MIT" + }, + "node_modules/tar": { + "version": "6.2.1", + "resolved": "https://registry.npmjs.org/tar/-/tar-6.2.1.tgz", + "integrity": "sha512-DZ4yORTwrbTj/7MZYq2w+/ZFdI6OZ/f9SFHR+71gIVUZhOQPHzVCLpvRnPgyaMpfWxxk/4ONva3GQSyNIKRv6A==", + "license": "ISC", + "dependencies": { + "chownr": "^2.0.0", + "fs-minipass": "^2.0.0", + "minipass": "^5.0.0", + "minizlib": "^2.1.1", + "mkdirp": "^1.0.3", + "yallist": "^4.0.0" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/tr46": { + "version": "0.0.3", + "resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz", + "integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==", + "license": "MIT" + }, + "node_modules/util-deprecate": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz", + "integrity": "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==", + "license": "MIT" + }, + "node_modules/webidl-conversions": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz", + "integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==", + "license": "BSD-2-Clause" + }, + "node_modules/whatwg-url": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz", + "integrity": "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==", + "license": "MIT", + "dependencies": { + "tr46": "~0.0.3", + "webidl-conversions": "^3.0.0" + } + }, + "node_modules/wide-align": { + "version": "1.1.5", + "resolved": "https://registry.npmjs.org/wide-align/-/wide-align-1.1.5.tgz", + "integrity": "sha512-eDMORYaPNZ4sQIuuYPDHdQvf4gyCF9rEEV/yPxGfwPkRodwEgiMUUXTx/dex+Me0wxx53S+NgUHaP7y3MGlDmg==", + "license": "ISC", + "dependencies": { + "string-width": "^1.0.2 || 2 || 3 || 4" + } + }, + "node_modules/wrappy": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz", + "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==", + "license": "ISC" + }, + "node_modules/xsd-validator": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/xsd-validator/-/xsd-validator-1.1.1.tgz", + "integrity": "sha512-gHpPtBJ7B7mQOwP+ZT91kt2qLdK/Ek3jGZNl3Rz5pu5z4j/Nn6OWfEI17W26VoqDEqaptuU3WvjwLbP9TLCFKg==", + "license": "ISC", + "dependencies": { + "libxmljs": "^1.0.9" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/yallist": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/yallist/-/yallist-4.0.0.tgz", + "integrity": "sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A==", + "license": "ISC" } } } diff --git a/package.json b/package.json index 06a8f6c..14ddd7f 100644 --- a/package.json +++ b/package.json @@ -7,8 +7,17 @@ "scripts": { "validate": "node validate.js" }, + "imports": { + "#article.raw.schema.xsd": "./article.raw.schema.xsd", + "#article.schema.json": "./article.schema.json", + "#reports/*": "./reports/*" + }, "dependencies": { - "@cfworker/json-schema": "3.0.1" + "@cfworker/json-schema": "3.0.1", + "chalk": "^5.4.0", + "fast-xml-parser": "^4.5.1", + "papaparse": "^5.4.1", + "xsd-validator": "^1.1.1" }, "devDependencies": { "prettier": "3.4.2" diff --git a/reports/.gitkeep b/reports/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/validate.js b/validate.js index d11949a..03290dd 100644 --- a/validate.js +++ b/validate.js @@ -1,18 +1,30 @@ import { exec } from 'node:child_process'; import { Validator } from '@cfworker/json-schema'; +import chalk from 'chalk'; import fsp from 'node:fs/promises'; -import path from 'path'; -import url from 'url'; - -const __filename = url.fileURLToPath(import.meta.url); -const __dirname = path.dirname(__filename); +import papaparse from 'papaparse'; +import path from 'node:path'; +import url from 'node:url'; +import xsdValidator from 'xsd-validator'; +const validateSchema = xsdValidator.default; +import { XMLValidator } from 'fast-xml-parser'; const Glyph = { CROSS_MARK: '\u{274C}', + OK_SIGN: '\u{1F197}', WARNING_SIGN: '\u{26A0}\u{FE0F} ', WHITE_HEAVY_CHECK_MARK: '\u{2705}', }; +const Status = { + VALID: 'valid', + INVALID: 'invalid', + MALFORMED: 'malformed', +}; + +let jsonValidator; +let rawSchema; + /** * Get the working directory, based on where the script is being called (as * opposed to where it is located). @@ -31,28 +43,197 @@ function pwd() { }); } -(async function main() { - const input = process.argv?.[2]; +/** + * Recurse down an object’s hierarchy and build up an array of all values and + * array entries that look like HTML fragments. + * + * @param {any} arg + * @returns {[string]} - the HTML fragments + */ +export function findHtmlFragments(arg) { + if (typeof arg === 'string' && /<.+?>/.test(arg)) { + // Base case: only return an array if this string looks like an HTML fragment. + return [arg]; + } else if (typeof arg === 'object') { + // If arg is an array, run findHtmlFragments on all of its members; if + // it’s an object, run findHtmlFragments on all of its values. + const arr = Array.isArray(arg) ? arg : Object.values(arg); - if (!input) { - console.error( - 'One positional argument is required (a path to a directory or file).' + let results = []; + for (const entry of arr) { + const result = findHtmlFragments(entry); + if (Array.isArray(result) && result.length > 0) { + results = results.concat(result); + } + } + + return results; + } +} + +async function writeDataToReportCsv(filepath, data) { + if (Array.isArray(data) && data.length === 0) { + return; + } + + // create directory (and any sub directories) if it doesn't exist + const { dir } = path.parse(filepath); + await fsp.mkdir(dir, { recursive: true }); + + // write out the data + const unparsed = papaparse.unparse(data, { newline: '\n', header: true }); + await fsp.writeFile(filepath, unparsed); +} + +async function validateJson(json) { + // Load the JSON schema and create the validator. + if (!jsonValidator) { + const schemaPath = url.fileURLToPath( + import.meta.resolve('#article.schema.json'), ); - process.exit(1); + const schemaStr = await fsp.readFile(schemaPath); + const schema = JSON.parse(schemaStr); + jsonValidator = new Validator(schema); } - // Build an absolute path based on the input. - let inputPath = input; - if (!path.isAbsolute(input)) { - const workingDir = await pwd(); - inputPath = path.resolve(workingDir, input); - console.log(inputPath); + const { valid, errors } = jsonValidator.validate(json); + if (valid) { + return { + status: Status.VALID, + }; + } else { + return { + status: Status.INVALID, + errors: errors.map(({ error }) => error), + }; + } +} + +async function validateRaw(raw) { + if (!rawSchema) { + const schemaPath = url.fileURLToPath( + import.meta.resolve('#article.raw.schema.xsd'), + ); + rawSchema = await fsp.readFile(schemaPath); + } + + try { + const result = validateSchema(raw, rawSchema); + if (result === true) { + return { + status: Status.VALID, + }; + } else { + return { + status: Status.INVALID, + errors: result.map((err) => err.message.trim()), + }; + } + } catch (err) { + return { + status: Status.MALFORMED, + errors: [err.message.trim()], + }; + } +} + +function validateHtmlFragment(htmlFragment) { + const xml = `
${htmlFragment}
`; + const result = XMLValidator.validate(xml); + if (result === true) { + return { + status: Status.VALID, + }; + } else { + return { + status: Status.INVALID, + content: htmlFragment, + errors: result.err.msg, + }; + } +} + +// NOTE: jsonld is the only value inside the schema that can have HTML fragments +function validateHtmlFragments(jsonld) { + const htmlFragments = findHtmlFragments(jsonld); + + let errors = []; + let anyIsInvalid = false; + + for (const htmlFragment of htmlFragments) { + const validationResult = validateHtmlFragment(htmlFragment); + if (validationResult.status === Status.INVALID) { + anyIsInvalid = anyIsInvalid || true; + } + errors = errors.concat(validationResult.errors || []); + } + + return { + status: anyIsInvalid ? Status.INVALID : Status.VALID, + errors, + }; +} +async function generateFileReport(jsonFilePath) { + const jsonString = await fsp.readFile(jsonFilePath); + const { base: filename } = path.parse(jsonFilePath); + const report = { + path: jsonFilePath, + filename, + overall_status: '', + json_status: '', + raw_status: '', + html_fragments_status: '', + json_errors: [], + raw_errors: [], + html_fragments_errors: [], + }; + + let json; + try { + json = JSON.parse(jsonString); + } catch (err) { + report.overall_status = Status.MALFORMED; + report.json_status = Status.MALFORMED; + report.json_errors = err; + return report; + } + + const jsonValidationResult = await validateJson(json); + report.json_status = jsonValidationResult.status; + report.json_errors = jsonValidationResult.errors || []; + + const rawValidationResult = await validateRaw(json.raw); + report.raw_status = rawValidationResult.status; + report.raw_errors = rawValidationResult.errors || []; + + const htmlFragmentsValidationResult = validateHtmlFragments(json.jsonld); + report.html_fragments_status = htmlFragmentsValidationResult.status; + report.html_fragments_errors = htmlFragmentsValidationResult.errors || []; + + // The overall status is only valid if everything was valid. + if ( + report.json_status === Status.VALID && + report.raw_status === Status.VALID && + report.html_fragments_status === Status.VALID + ) { + report.overall_status = Status.VALID; + } else { + report.overall_status = Status.INVALID; } + return report; +} + +/** + * Find all of the JSON files at a given `inputPath`. + * If the input is a directory, include all of the JSON files in that directory. + * If the input is a file, include just the file. + * + * @param {string} inputPath + * @returns + */ +async function getJsonFilePathsFromInputPath(inputPath) { + let jsonFilePaths = []; - // Build the paths of files to validate. - // If the input is a directory, include all of the JSON files in that directory. - // If the input is a file, include just the file. - let filePaths = []; const inputStats = await fsp.stat(inputPath); if (inputStats.isDirectory()) { try { @@ -60,58 +241,71 @@ function pwd() { withFileTypes: true, recursive: true, }); - filePaths = entries + jsonFilePaths = entries .filter((entry) => entry.isFile() && entry.name.endsWith('json')) .map((entry) => path.resolve(entry.parentPath, entry.name)); } catch (err) { console.error(err.message); } } else if (inputStats.isFile() && inputPath.endsWith('.json')) { - filePaths.push(inputPath); + jsonFilePaths.push(inputPath); } - // There’s nothing to validate, so bail out early. - if (filePaths.length === 0) { - console.error('Could not find any JSON files in input.'); + return jsonFilePaths; +} + +(async function main() { + const input = process.argv?.[2]; + + if (!input) { + console.error( + 'One positional argument is required (a path to a directory or file).', + ); process.exit(1); } - // Load the JSON schema and create the validator. - const schemaPath = path.resolve(__dirname, './article.schema.json'); - const schemaStr = await fsp.readFile(schemaPath); - const schema = await JSON.parse(schemaStr); - const validator = new Validator(schema); + // Build an absolute path based on the input. + let inputPath = input; + if (!path.isAbsolute(input)) { + const workingDir = await pwd(); + inputPath = path.resolve(workingDir, input); + console.log(inputPath); + } - try { - // Validate every JSON file. - for (const filePath of filePaths) { - const dataStr = await fsp.readFile(filePath); - - let data; - let shortPath = filePath.replace(inputPath + '/', ''); - try { - data = await JSON.parse(dataStr); - } catch { - console.log(`${Glyph.WARNING_SIGN} ${shortPath}`); - continue; - } + const jsonFilePaths = await getJsonFilePathsFromInputPath(inputPath); - const { valid } = validator.validate(data); + // There’s nothing to validate, so bail out early. + if (jsonFilePaths.length === 0) { + console.error('Could not find any JSON files in input.'); + process.exit(1); + } - // TODO: use article.raw.schema.xsd to validate the `raw` property. + const rows = []; - if (valid) { - console.log(`${Glyph.WHITE_HEAVY_CHECK_MARK} ${shortPath}`); - } else { - console.log(`${Glyph.CROSS_MARK} ${shortPath}`); - } + for (const jsonFilePath of jsonFilePaths) { + const report = await generateFileReport(jsonFilePath); + const shortPath = jsonFilePath.replace(inputPath + '/', ''); + + if (report.overall_status === Status.VALID) { + console.log(Glyph.WHITE_HEAVY_CHECK_MARK + ' ' + chalk.green(shortPath)); + } else if (report.overall_status === Status.INVALID) { + console.log(Glyph.CROSS_MARK + ' ' + chalk.red(shortPath)); + } else if (report.overall_status === Status.MALFORMED) { + console.log(Glyph.WARNING_SIGN + ' ' + chalk.yellow(shortPath)); } - } catch (err) { - console.error(err); + + rows.push(report); } - console.log('\nKey:'); - console.log(`${Glyph.WHITE_HEAVY_CHECK_MARK} = valid, per schema`); - console.log(`${Glyph.CROSS_MARK} = invalid, per schema`); - console.log(`${Glyph.WARNING_SIGN} = malformed JSON`); + // Write the report + const timestamp = new Date().toISOString().replace(/(:|\.)/g, ''); + const reportPath = url.fileURLToPath( + import.meta.resolve(`#reports/${timestamp}.csv`), + ); + writeDataToReportCsv(reportPath, rows); + + console.log('\nKey'); + console.log(`${Glyph.WHITE_HEAVY_CHECK_MARK} = valid`); + console.log(`${Glyph.CROSS_MARK} = invalid`); + console.log(`${Glyph.WARNING_SIGN} = malformed`); })();