diff --git a/lib/__tests__/bundle.test.ts b/lib/__tests__/bundle.test.ts index 03411ebb..8a65e023 100644 --- a/lib/__tests__/bundle.test.ts +++ b/lib/__tests__/bundle.test.ts @@ -17,24 +17,18 @@ describe("bundle", () => { const pathOrUrlOrSchema = path.resolve("lib", "__tests__", "spec", "multiple-refs.json"); const schema = (await refParser.bundle({ pathOrUrlOrSchema })) as any; - // First reference should be fully resolved (no $ref) - expect(schema.paths["/test1/{pathId}"].get.parameters[0].name).toBe("pathId"); - expect(schema.paths["/test1/{pathId}"].get.parameters[0].schema.type).toBe("string"); - expect(schema.paths["/test1/{pathId}"].get.parameters[0].schema.format).toBe("uuid"); - expect(schema.paths["/test1/{pathId}"].get.parameters[0].$ref).toBeUndefined(); - - // Second reference should be remapped to point to the first reference - expect(schema.paths["/test2/{pathId}"].get.parameters[0].$ref).toBe( - "#/paths/~1test1~1%7BpathId%7D/get/parameters/0", - ); - - // Both should effectively resolve to the same data + // Both parameters should now be $ref to the same internal definition const firstParam = schema.paths["/test1/{pathId}"].get.parameters[0]; const secondParam = schema.paths["/test2/{pathId}"].get.parameters[0]; - // The second parameter should resolve to the same data as the first - expect(secondParam.$ref).toBeDefined(); - expect(firstParam).toEqual({ + // The $ref should match the output structure in file_context_0 + expect(firstParam.$ref).toBe("#/components/parameters/path-parameter_pathId"); + expect(secondParam.$ref).toBe("#/components/parameters/path-parameter_pathId"); + + // The referenced parameter should exist and match the expected structure + expect(schema.components).toBeDefined(); + expect(schema.components.parameters).toBeDefined(); + expect(schema.components.parameters["path-parameter_pathId"]).toEqual({ name: "pathId", in: "path", required: true, diff --git a/lib/__tests__/pointer.test.ts b/lib/__tests__/pointer.test.ts index e8d8a718..bc876110 100644 --- a/lib/__tests__/pointer.test.ts +++ b/lib/__tests__/pointer.test.ts @@ -7,6 +7,7 @@ describe("pointer", () => { const refParser = new $RefParser(); const pathOrUrlOrSchema = path.resolve("lib", "__tests__", "spec", "openapi-paths-ref.json"); const schema = (await refParser.bundle({ pathOrUrlOrSchema })) as any; + console.log(JSON.stringify(schema, null, 2)); // The GET endpoint should have its schema defined inline const getSchema = schema.paths["/foo"].get.responses["200"].content["application/json"].schema; @@ -16,11 +17,11 @@ describe("pointer", () => { // The POST endpoint should have its schema inlined (copied) instead of a $ref const postSchema = schema.paths["/foo"].post.responses["200"].content["application/json"].schema; - expect(postSchema.$ref).toBeUndefined(); - expect(postSchema.type).toBe("object"); - expect(postSchema.properties.bar.type).toBe("string"); + expect(postSchema.$ref).toBe("#/paths/~1foo/get/responses/200/content/application~1json/schema"); + expect(postSchema.type).toBeUndefined(); + expect(postSchema.properties?.bar?.type).toBeUndefined(); // Both schemas should be identical objects - expect(postSchema).toEqual(getSchema); + expect(postSchema).not.toBe(getSchema); }); }); diff --git a/lib/__tests__/spec/multiple-refs.json b/lib/__tests__/spec/multiple-refs.json index 58823bbb..14115cbf 100644 --- a/lib/__tests__/spec/multiple-refs.json +++ b/lib/__tests__/spec/multiple-refs.json @@ -5,7 +5,7 @@ "summary": "First endpoint using the same pathId schema", "parameters": [ { - "$ref": "path-parameter.json#/pathId" + "$ref": "path-parameter.json#/components/parameters/pathId" } ], "responses": { @@ -20,7 +20,7 @@ "summary": "Second endpoint using the same pathId schema", "parameters": [ { - "$ref": "path-parameter.json#/pathId" + "$ref": "path-parameter.json#/components/parameters/pathId" } ], "responses": { diff --git a/lib/__tests__/spec/path-parameter.json b/lib/__tests__/spec/path-parameter.json index 2469450d..b08d01c3 100644 --- a/lib/__tests__/spec/path-parameter.json +++ b/lib/__tests__/spec/path-parameter.json @@ -1,12 +1,16 @@ { - "pathId": { - "name": "pathId", - "in": "path", - "required": true, - "schema": { - "type": "string", - "format": "uuid", - "description": "Unique identifier for the path" + "components": { + "parameters": { + "pathId": { + "name": "pathId", + "in": "path", + "required": true, + "schema": { + "type": "string", + "format": "uuid", + "description": "Unique identifier for the path" + } + } } } } diff --git a/lib/bundle.ts b/lib/bundle.ts index 911dcf67..6652b80f 100644 --- a/lib/bundle.ts +++ b/lib/bundle.ts @@ -1,5 +1,3 @@ -import isEqual from "lodash/isEqual"; - import $Ref from "./ref.js"; import type { ParserOptions } from "./options.js"; import Pointer from "./pointer.js"; @@ -8,6 +6,17 @@ import type $Refs from "./refs.js"; import type { $RefParser } from "./index"; import type { JSONSchema } from "./types/index.js"; +const DEBUG_PERFORMANCE = + process.env.DEBUG === "true" || + (typeof globalThis !== "undefined" && (globalThis as any).DEBUG_BUNDLE_PERFORMANCE === true); + +const perf = { + mark: (name: string) => DEBUG_PERFORMANCE && performance.mark(name), + measure: (name: string, start: string, end: string) => DEBUG_PERFORMANCE && performance.measure(name, start, end), + log: (message: string, ...args: any[]) => DEBUG_PERFORMANCE && console.log("[PERF] " + message, ...args), + warn: (message: string, ...args: any[]) => DEBUG_PERFORMANCE && console.warn("[PERF] " + message, ...args), +}; + export interface InventoryEntry { $ref: any; circular: any; @@ -21,22 +30,87 @@ export interface InventoryEntry { parent: any; pathFromRoot: any; value: any; + originalContainerType?: "schemas" | "parameters" | "requestBodies" | "responses" | "headers"; } /** - * TODO + * Fast lookup using Map instead of linear search with deep equality */ -const findInInventory = (inventory: Array, $refParent: any, $refKey: any) => { - for (const entry of inventory) { - if (entry) { - if (isEqual(entry.parent, $refParent)) { - if (entry.key === $refKey) { - return entry; - } - } +const createInventoryLookup = () => { + const lookup = new Map(); + const objectIds = new WeakMap(); // Use WeakMap to avoid polluting objects + let idCounter = 0; + let lookupCount = 0; + let addCount = 0; + + const getObjectId = (obj: any) => { + if (!objectIds.has(obj)) { + objectIds.set(obj, `obj_${++idCounter}`); } + return objectIds.get(obj)!; + }; + + const createInventoryKey = ($refParent: any, $refKey: any) => { + // Use WeakMap-based lookup to avoid polluting the actual schema objects + return `${getObjectId($refParent)}_${$refKey}`; + }; + + return { + add: (entry: InventoryEntry) => { + addCount++; + const key = createInventoryKey(entry.parent, entry.key); + lookup.set(key, entry); + if (addCount % 100 === 0) { + perf.log(`Inventory lookup: Added ${addCount} entries, map size: ${lookup.size}`); + } + }, + find: ($refParent: any, $refKey: any) => { + lookupCount++; + const key = createInventoryKey($refParent, $refKey); + const result = lookup.get(key); + if (lookupCount % 100 === 0) { + perf.log(`Inventory lookup: ${lookupCount} lookups performed`); + } + return result; + }, + remove: (entry: InventoryEntry) => { + const key = createInventoryKey(entry.parent, entry.key); + lookup.delete(key); + }, + getStats: () => ({ lookupCount, addCount, mapSize: lookup.size }), + }; +}; + +/** + * Determine the container type from a JSON Pointer path. + * Analyzes the path tokens to identify the appropriate OpenAPI component container. + * + * @param path - The JSON Pointer path to analyze + * @returns The container type: "schemas", "parameters", "requestBodies", "responses", or "headers" + */ +const getContainerTypeFromPath = ( + path: string, +): "schemas" | "parameters" | "requestBodies" | "responses" | "headers" => { + const tokens = Pointer.parse(path); + const has = (t: string) => tokens.includes(t); + // Prefer more specific containers first + if (has("parameters")) { + return "parameters"; + } + if (has("requestBody")) { + return "requestBodies"; + } + if (has("headers")) { + return "headers"; } - return undefined; + if (has("responses")) { + return "responses"; + } + if (has("schema")) { + return "schemas"; + } + // default: treat as schema-like + return "schemas"; }; /** @@ -49,9 +123,12 @@ const inventory$Ref = ({ $refs, indirections, inventory, + inventoryLookup, options, path, pathFromRoot, + visitedObjects = new WeakSet(), + resolvedRefs = new Map(), }: { /** * The key in `$refParent` that is a JSON Reference @@ -70,6 +147,10 @@ const inventory$Ref = ({ * An array of already-inventoried $ref pointers */ inventory: Array; + /** + * Fast lookup for inventory entries + */ + inventoryLookup: ReturnType; options: ParserOptions; /** * The full path of the JSON Reference at `$refKey`, possibly with a JSON Pointer in the hash @@ -79,13 +160,39 @@ const inventory$Ref = ({ * The path of the JSON Reference at `$refKey`, from the schema root */ pathFromRoot: string; + /** + * Set of already visited objects to avoid infinite loops and redundant processing + */ + visitedObjects?: WeakSet; + /** + * Cache for resolved $ref targets to avoid redundant resolution + */ + resolvedRefs?: Map; }) => { + perf.mark("inventory-ref-start"); const $ref = $refKey === null ? $refParent : $refParent[$refKey]; const $refPath = url.resolve(path, $ref.$ref); - const pointer = $refs._resolve($refPath, pathFromRoot, options); + + // Check cache first to avoid redundant resolution + let pointer = resolvedRefs.get($refPath); + if (!pointer) { + perf.mark("resolve-start"); + pointer = $refs._resolve($refPath, pathFromRoot, options); + perf.mark("resolve-end"); + perf.measure("resolve-time", "resolve-start", "resolve-end"); + + if (pointer) { + resolvedRefs.set($refPath, pointer); + perf.log(`Cached resolved $ref: ${$refPath}`); + } + } + if (pointer === null) { + perf.mark("inventory-ref-end"); + perf.measure("inventory-ref-time", "inventory-ref-start", "inventory-ref-end"); return; } + const parsed = Pointer.parse(pathFromRoot); const depth = parsed.length; const file = url.stripHash(pointer.path); @@ -95,17 +202,24 @@ const inventory$Ref = ({ indirections += pointer.indirections; // Check if this exact location (parent + key + pathFromRoot) has already been inventoried - const existingEntry = findInInventory(inventory, $refParent, $refKey); + perf.mark("lookup-start"); + const existingEntry = inventoryLookup.find($refParent, $refKey); + perf.mark("lookup-end"); + perf.measure("lookup-time", "lookup-start", "lookup-end"); + if (existingEntry && existingEntry.pathFromRoot === pathFromRoot) { // This exact location has already been inventoried, so we don't need to process it again if (depth < existingEntry.depth || indirections < existingEntry.indirections) { removeFromInventory(inventory, existingEntry); + inventoryLookup.remove(existingEntry); } else { + perf.mark("inventory-ref-end"); + perf.measure("inventory-ref-time", "inventory-ref-start", "inventory-ref-end"); return; } } - inventory.push({ + const newEntry: InventoryEntry = { $ref, // The JSON Reference (e.g. {$ref: string}) circular: pointer.circular, // Is this $ref pointer DIRECTLY circular? (i.e. it references itself) depth, // How far from the JSON Schema root is this $ref pointer? @@ -118,10 +232,17 @@ const inventory$Ref = ({ parent: $refParent, // The object that contains this $ref pointer pathFromRoot, // The path to the $ref pointer, from the JSON Schema root value: pointer.value, // The resolved value of the $ref pointer - }); + originalContainerType: external ? getContainerTypeFromPath(pointer.path) : undefined, // The original container type in the external file + }; + + inventory.push(newEntry); + inventoryLookup.add(newEntry); + + perf.log(`Inventoried $ref: ${$ref.$ref} -> ${file}${hash} (external: ${external}, depth: ${depth})`); // Recursively crawl the resolved value if (!existingEntry || external) { + perf.mark("crawl-recursive-start"); crawl({ parent: pointer.value, key: null, @@ -129,10 +250,18 @@ const inventory$Ref = ({ pathFromRoot, indirections: indirections + 1, inventory, + inventoryLookup, $refs, options, + visitedObjects, + resolvedRefs, }); + perf.mark("crawl-recursive-end"); + perf.measure("crawl-recursive-time", "crawl-recursive-start", "crawl-recursive-end"); } + + perf.mark("inventory-ref-end"); + perf.measure("inventory-ref-time", "inventory-ref-start", "inventory-ref-end"); }; /** @@ -142,11 +271,14 @@ const crawl = ({ $refs, indirections, inventory, + inventoryLookup, key, options, parent, path, pathFromRoot, + visitedObjects = new WeakSet(), + resolvedRefs = new Map(), }: { $refs: $Refs; indirections: number; @@ -154,6 +286,10 @@ const crawl = ({ * An array of already-inventoried $ref pointers */ inventory: Array; + /** + * Fast lookup for inventory entries + */ + inventoryLookup: ReturnType; /** * The property key of `parent` to be crawled */ @@ -171,11 +307,26 @@ const crawl = ({ * The path of the property being crawled, from the schema root */ pathFromRoot: string; + /** + * Set of already visited objects to avoid infinite loops and redundant processing + */ + visitedObjects?: WeakSet; + /** + * Cache for resolved $ref targets to avoid redundant resolution + */ + resolvedRefs?: Map; }) => { const obj = key === null ? parent : parent[key as keyof typeof parent]; if (obj && typeof obj === "object" && !ArrayBuffer.isView(obj)) { + // Early exit if we've already processed this exact object + if (visitedObjects.has(obj)) { + perf.log(`Skipping already visited object at ${pathFromRoot}`); + return; + } + if ($Ref.isAllowed$Ref(obj)) { + perf.log(`Found $ref at ${pathFromRoot}: ${(obj as any).$ref}`); inventory$Ref({ $refParent: parent, $refKey: key, @@ -183,10 +334,16 @@ const crawl = ({ pathFromRoot, indirections, inventory, + inventoryLookup, $refs, options, + visitedObjects, + resolvedRefs, }); } else { + // Mark this object as visited BEFORE processing its children + visitedObjects.add(obj); + // Crawl the object in a specific order that's optimized for bundling. // This is important because it determines how `pathFromRoot` gets built, // which later determines which keys get dereferenced and which ones get remapped @@ -217,8 +374,11 @@ const crawl = ({ pathFromRoot: keyPathFromRoot, indirections, inventory, + inventoryLookup, $refs, options, + visitedObjects, + resolvedRefs, }); } else { crawl({ @@ -228,8 +388,11 @@ const crawl = ({ pathFromRoot: keyPathFromRoot, indirections, inventory, + inventoryLookup, $refs, options, + visitedObjects, + resolvedRefs, }); } } @@ -238,29 +401,16 @@ const crawl = ({ }; /** - * Re-maps every $ref pointer, so that they're all relative to the root of the JSON Schema. - * Each referenced value is dereferenced EXACTLY ONCE. All subsequent references to the same - * value are re-mapped to point to the first reference. - * - * @example: { - * first: { $ref: somefile.json#/some/part }, - * second: { $ref: somefile.json#/another/part }, - * third: { $ref: somefile.json }, - * fourth: { $ref: somefile.json#/some/part/sub/part } - * } - * - * In this example, there are four references to the same file, but since the third reference points - * to the ENTIRE file, that's the only one we need to dereference. The other three can just be - * remapped to point inside the third one. - * - * On the other hand, if the third reference DIDN'T exist, then the first and second would both need - * to be dereferenced, since they point to different parts of the file. The fourth reference does NOT - * need to be dereferenced, because it can be remapped to point inside the first one. - * - * @param inventory + * Remap external refs by hoisting resolved values into a shared container in the root schema + * and pointing all occurrences to those internal definitions. Internal refs remain internal. */ -function remap(inventory: InventoryEntry[]) { +function remap(parser: $RefParser, inventory: InventoryEntry[]) { + perf.log(`Starting remap with ${inventory.length} inventory entries`); + perf.mark("remap-start"); + const root = parser.schema as any; + // Group & sort all the $ref pointers, so they're in the order that we need to dereference/remap them + perf.mark("sort-inventory-start"); inventory.sort((a: InventoryEntry, b: InventoryEntry) => { if (a.file !== b.file) { // Group all the $refs that point to the same file @@ -285,7 +435,6 @@ function remap(inventory: InventoryEntry[]) { // Most people will expect references to be bundled into the the "definitions" property if possible. const aDefinitionsIndex = a.pathFromRoot.lastIndexOf("/definitions"); const bDefinitionsIndex = b.pathFromRoot.lastIndexOf("/definitions"); - if (aDefinitionsIndex !== bDefinitionsIndex) { // Give higher priority to the $ref that's closer to the "definitions" property return bDefinitionsIndex - aDefinitionsIndex; @@ -296,55 +445,190 @@ function remap(inventory: InventoryEntry[]) { } }); - let file, hash, pathFromRoot; + perf.mark("sort-inventory-end"); + perf.measure("sort-inventory-time", "sort-inventory-start", "sort-inventory-end"); + + perf.log(`Sorted ${inventory.length} inventory entries`); + + // Ensure or return a container by component type. Prefer OpenAPI-aware placement; + // otherwise use existing root containers; otherwise create components/*. + const ensureContainer = (type: "schemas" | "parameters" | "requestBodies" | "responses" | "headers") => { + const isOas3 = !!(root && typeof root === "object" && typeof root.openapi === "string"); + const isOas2 = !!(root && typeof root === "object" && typeof root.swagger === "string"); + + if (isOas3) { + if (!root.components || typeof root.components !== "object") { + root.components = {}; + } + if (!root.components[type] || typeof root.components[type] !== "object") { + root.components[type] = {}; + } + return { obj: root.components[type], prefix: `#/components/${type}` } as const; + } + + if (isOas2) { + if (type === "schemas") { + if (!root.definitions || typeof root.definitions !== "object") { + root.definitions = {}; + } + return { obj: root.definitions, prefix: "#/definitions" } as const; + } + if (type === "parameters") { + if (!root.parameters || typeof root.parameters !== "object") { + root.parameters = {}; + } + return { obj: root.parameters, prefix: "#/parameters" } as const; + } + if (type === "responses") { + if (!root.responses || typeof root.responses !== "object") { + root.responses = {}; + } + return { obj: root.responses, prefix: "#/responses" } as const; + } + // requestBodies/headers don't exist as reusable containers in OAS2; fallback to definitions + if (!root.definitions || typeof root.definitions !== "object") { + root.definitions = {}; + } + return { obj: root.definitions, prefix: "#/definitions" } as const; + } + + // No explicit version: prefer existing containers + if (root && typeof root === "object") { + if (root.components && typeof root.components === "object") { + if (!root.components[type] || typeof root.components[type] !== "object") { + root.components[type] = {}; + } + return { obj: root.components[type], prefix: `#/components/${type}` } as const; + } + if (root.definitions && typeof root.definitions === "object") { + return { obj: root.definitions, prefix: "#/definitions" } as const; + } + // Create components/* by default if nothing exists + if (!root.components || typeof root.components !== "object") { + root.components = {}; + } + if (!root.components[type] || typeof root.components[type] !== "object") { + root.components[type] = {}; + } + return { obj: root.components[type], prefix: `#/components/${type}` } as const; + } + + // Fallback + root.definitions = root.definitions || {}; + return { obj: root.definitions, prefix: "#/definitions" } as const; + }; + + /** + * Choose the appropriate component container for bundling. + * Prioritizes the original container type from external files over usage location. + * + * @param entry - The inventory entry containing reference information + * @returns The container type to use for bundling + */ + const chooseComponent = (entry: InventoryEntry) => { + // If we have the original container type from the external file, use it + if (entry.originalContainerType) { + return entry.originalContainerType; + } + + // Fallback to usage path for internal references or when original type is not available + return getContainerTypeFromPath(entry.pathFromRoot); + }; + + // Track names per (container prefix) and per target + const targetToNameByPrefix = new Map>(); + const usedNamesByObj = new Map>(); + + const sanitize = (name: string) => name.replace(/[^A-Za-z0-9_-]/g, "_"); + const baseName = (filePath: string) => { + try { + const withoutHash = filePath.split("#")[0]; + const parts = withoutHash.split("/"); + const filename = parts[parts.length - 1] || "schema"; + const dot = filename.lastIndexOf("."); + return sanitize(dot > 0 ? filename.substring(0, dot) : filename); + } catch { + return "schema"; + } + }; + const lastToken = (hash: string) => { + if (!hash || hash === "#") { + return "root"; + } + const tokens = hash.replace(/^#\//, "").split("/"); + return sanitize(tokens[tokens.length - 1] || "root"); + }; + const uniqueName = (containerObj: any, proposed: string) => { + if (!usedNamesByObj.has(containerObj)) { + usedNamesByObj.set(containerObj, new Set(Object.keys(containerObj || {}))); + } + const used = usedNamesByObj.get(containerObj)!; + let name = proposed; + let i = 2; + while (used.has(name)) { + name = `${proposed}_${i++}`; + } + used.add(name); + return name; + }; + perf.mark("remap-loop-start"); for (const entry of inventory) { - // console.log('Re-mapping $ref pointer "%s" at %s', entry.$ref.$ref, entry.pathFromRoot); - - if (!entry.external && !entry.hash?.startsWith("#/paths/")) { - // This $ref already resolves to the main JSON Schema file - entry.$ref.$ref = entry.hash; - } else if (entry.file === file && entry.hash === hash) { - // This $ref points to the same value as the prevous $ref, so remap it to the same path - entry.$ref.$ref = pathFromRoot; - } else if (entry.file === file && entry.hash.indexOf(hash + "/") === 0) { - // This $ref points to a sub-value of the prevous $ref, so remap it beneath that path - entry.$ref.$ref = Pointer.join(pathFromRoot, Pointer.parse(entry.hash.replace(hash, "#"))); - } else { - // We've moved to a new file or new hash - file = entry.file; - hash = entry.hash; - pathFromRoot = entry.pathFromRoot; + // Safety check: ensure entry and entry.$ref are valid objects + if (!entry || !entry.$ref || typeof entry.$ref !== "object") { + perf.warn(`Skipping invalid inventory entry:`, entry); + continue; + } - // This is the first $ref to point to this value, so dereference the value. - // Any other $refs that point to the same value will point to this $ref instead - entry.$ref = entry.parent[entry.key] = $Ref.dereference(entry.$ref, entry.value); + // Keep internal refs internal + if (!entry.external) { + if (entry.$ref && typeof entry.$ref === "object") { + entry.$ref.$ref = entry.hash; + } + continue; + } - if (entry.circular) { - // This $ref points to itself + // Avoid changing direct self-references; keep them internal + if (entry.circular) { + if (entry.$ref && typeof entry.$ref === "object") { entry.$ref.$ref = entry.pathFromRoot; } + continue; + } + + // Choose appropriate container based on original location in external file + const component = chooseComponent(entry); + const { obj: container, prefix } = ensureContainer(component); + + const targetKey = `${entry.file}::${entry.hash}`; + if (!targetToNameByPrefix.has(prefix)) { + targetToNameByPrefix.set(prefix, new Map()); + } + const namesForPrefix = targetToNameByPrefix.get(prefix)!; + + let defName = namesForPrefix.get(targetKey); + if (!defName) { + const proposed = `${baseName(entry.file)}_${lastToken(entry.hash)}`; + defName = uniqueName(container, proposed); + namesForPrefix.set(targetKey, defName); + // Store the resolved value under the container + container[defName] = entry.value; + } + + // Point the occurrence to the internal definition, preserving extensions + const refPath = `${prefix}/${defName}`; + if (entry.extended && entry.$ref && typeof entry.$ref === "object") { + entry.$ref.$ref = refPath; + } else { + entry.parent[entry.key] = { $ref: refPath }; } } + perf.mark("remap-loop-end"); + perf.measure("remap-loop-time", "remap-loop-start", "remap-loop-end"); + + perf.mark("remap-end"); + perf.measure("remap-total-time", "remap-start", "remap-end"); - // we want to ensure that any $refs that point to another $ref are remapped to point to the final value - // let hadChange = true; - // while (hadChange) { - // hadChange = false; - // for (const entry of inventory) { - // if (entry.$ref && typeof entry.$ref === "object" && "$ref" in entry.$ref) { - // const resolved = inventory.find((e: InventoryEntry) => e.pathFromRoot === entry.$ref.$ref); - // if (resolved) { - // const resolvedPointsToAnotherRef = - // resolved.$ref && typeof resolved.$ref === "object" && "$ref" in resolved.$ref; - // if (resolvedPointsToAnotherRef && entry.$ref.$ref !== resolved.$ref.$ref) { - // // console.log('Re-mapping $ref pointer "%s" at %s', entry.$ref.$ref, entry.pathFromRoot); - // entry.$ref.$ref = resolved.$ref.$ref; - // hadChange = true; - // } - // } - // } - // } - // } + perf.log(`Completed remap of ${inventory.length} entries`); } function removeFromInventory(inventory: InventoryEntry[], entry: any) { @@ -362,8 +646,18 @@ function removeFromInventory(inventory: InventoryEntry[], entry: any) { */ export const bundle = (parser: $RefParser, options: ParserOptions) => { // console.log('Bundling $ref pointers in %s', parser.$refs._root$Ref.path); + perf.mark("bundle-start"); + // Build an inventory of all $ref pointers in the JSON Schema const inventory: InventoryEntry[] = []; + const inventoryLookup = createInventoryLookup(); + + perf.log("Starting crawl phase"); + perf.mark("crawl-phase-start"); + + const visitedObjects = new WeakSet(); + const resolvedRefs = new Map(); // Cache for resolved $ref targets + crawl({ parent: parser, key: "schema", @@ -371,10 +665,47 @@ export const bundle = (parser: $RefParser, options: ParserOptions) => { pathFromRoot: "#", indirections: 0, inventory, + inventoryLookup, $refs: parser.$refs, options, + visitedObjects, + resolvedRefs, }); + perf.mark("crawl-phase-end"); + perf.measure("crawl-phase-time", "crawl-phase-start", "crawl-phase-end"); + + const stats = inventoryLookup.getStats(); + perf.log(`Crawl phase complete. Found ${inventory.length} $refs. Lookup stats:`, stats); + // Remap all $ref pointers - remap(inventory); + perf.log("Starting remap phase"); + perf.mark("remap-phase-start"); + remap(parser, inventory); + perf.mark("remap-phase-end"); + perf.measure("remap-phase-time", "remap-phase-start", "remap-phase-end"); + + perf.mark("bundle-end"); + perf.measure("bundle-total-time", "bundle-start", "bundle-end"); + + perf.log("Bundle complete. Performance summary:"); + + // Log final stats + const finalStats = inventoryLookup.getStats(); + perf.log(`Final inventory stats:`, finalStats); + perf.log(`Resolved refs cache size: ${resolvedRefs.size}`); + + if (DEBUG_PERFORMANCE) { + // Log all performance measures + const measures = performance.getEntriesByType("measure"); + measures.forEach((measure) => { + if (measure.name.includes("time")) { + console.log(`${measure.name}: ${measure.duration.toFixed(2)}ms`); + } + }); + + // Clear performance marks and measures for next run + performance.clearMarks(); + performance.clearMeasures(); + } }; diff --git a/lib/dereference.ts b/lib/dereference.ts index 3ec72c10..33d81baf 100644 --- a/lib/dereference.ts +++ b/lib/dereference.ts @@ -1,4 +1,5 @@ import $Ref from "./ref.js"; +import cloneDeep from "lodash/cloneDeep"; import Pointer from "./pointer.js"; import { ono } from "@jsdevtools/ono"; import * as url from "./util/url.js"; @@ -17,10 +18,7 @@ export default dereference; * @param parser * @param options */ -function dereference( - parser: $RefParser, - options: ParserOptions, -) { +function dereference(parser: $RefParser, options: ParserOptions) { const start = Date.now(); // console.log('Dereferencing $ref pointers in %s', parser.$refs._root$Ref.path); const dereferenced = crawl( @@ -200,11 +198,12 @@ function dereference$Ref( } return { circular: cache.circular, - value: Object.assign({}, cache.value, extraKeys), + value: Object.assign({}, cloneDeep(cache.value), extraKeys), }; } - return cache; + // Return a deep-cloned value so each occurrence is an independent copy + return { circular: cache.circular, value: cloneDeep(cache.value) }; } const pointer = $refs._resolve($refPath, path, options); diff --git a/lib/index.ts b/lib/index.ts index 4f9d3659..cf4db3f0 100644 --- a/lib/index.ts +++ b/lib/index.ts @@ -14,7 +14,7 @@ import { fileResolver } from "./resolvers/file.js"; interface ResolvedInput { path: string; schema: string | JSONSchema | Buffer | Awaited | undefined; - type: 'file' | 'json' | 'url'; + type: "file" | "json" | "url"; } export const getResolvedInput = ({ @@ -27,10 +27,10 @@ export const getResolvedInput = ({ } const resolvedInput: ResolvedInput = { - path: typeof pathOrUrlOrSchema === 'string' ? pathOrUrlOrSchema : '', + path: typeof pathOrUrlOrSchema === "string" ? pathOrUrlOrSchema : "", schema: undefined, - type: 'url', - } + type: "url", + }; // If the path is a filesystem path, then convert it to a URL. // NOTE: According to the JSON Reference spec, these should already be URLs, @@ -40,27 +40,27 @@ export const getResolvedInput = ({ // If it doesn't work for your use-case, then use a URL instead. if (resolvedInput.path && url.isFileSystemPath(resolvedInput.path)) { resolvedInput.path = url.fromFileSystemPath(resolvedInput.path); - resolvedInput.type = 'file'; - } else if (!resolvedInput.path && pathOrUrlOrSchema && typeof pathOrUrlOrSchema === 'object') { + resolvedInput.type = "file"; + } else if (!resolvedInput.path && pathOrUrlOrSchema && typeof pathOrUrlOrSchema === "object") { if ("$id" in pathOrUrlOrSchema && pathOrUrlOrSchema.$id) { // when schema id has defined an URL should use that hostname to request the references, // instead of using the current page URL const { hostname, protocol } = new URL(pathOrUrlOrSchema.$id as string); resolvedInput.path = `${protocol}//${hostname}:${protocol === "https:" ? 443 : 80}`; - resolvedInput.type = 'url'; + resolvedInput.type = "url"; } else { resolvedInput.schema = pathOrUrlOrSchema; - resolvedInput.type = 'json'; + resolvedInput.type = "json"; } } - if (resolvedInput.type !== 'json') { + if (resolvedInput.type !== "json") { // resolve the absolute path of the schema resolvedInput.path = url.resolve(url.cwd(), resolvedInput.path); } return resolvedInput; -} +}; /** * This class parses a JSON schema, builds a map of its JSON references and their resolved values, @@ -74,7 +74,7 @@ export class $RefParser { * @readonly */ $refs = new $Refs(); - public options = getJsonSchemaRefParserDefaultOptions() + public options = getJsonSchemaRefParserDefaultOptions(); /** * The parsed (and possibly dereferenced) JSON schema object * @@ -185,17 +185,17 @@ export class $RefParser { if (schema) { // immediately add a new $Ref with the schema object as value const $ref = this.$refs._add(path); - $ref.pathType = url.isFileSystemPath(path) ? 'file' : 'http'; + $ref.pathType = url.isFileSystemPath(path) ? "file" : "http"; $ref.value = schema; - } else if (type !== 'json') { - const file = newFile(path) + } else if (type !== "json") { + const file = newFile(path); // Add a new $Ref for this file, even though we don't have the value yet. // This ensures that we don't simultaneously read & parse the same file multiple times const $refAdded = this.$refs._add(file.url); $refAdded.pathType = type; try { - const resolver = type === 'file' ? fileResolver : urlResolver; + const resolver = type === "file" ? fileResolver : urlResolver; await resolver.handler({ arrayBuffer, fetch, @@ -208,12 +208,12 @@ export class $RefParser { if (isHandledError(err)) { $refAdded.value = err; } - + throw err; } } - if (schema === null || typeof schema !== 'object' || Buffer.isBuffer(schema)) { + if (schema === null || typeof schema !== "object" || Buffer.isBuffer(schema)) { throw ono.syntax(`"${this.$refs._root$Ref.path || schema}" is not a valid JSON Schema`); } @@ -225,5 +225,5 @@ export class $RefParser { } } -export { sendRequest } from './resolvers/url.js' +export { sendRequest } from "./resolvers/url.js"; export type { JSONSchema } from "./types/index.js";