diff --git a/packages/interop/package.json b/packages/interop/package.json index 4121f0b9..dbc153b0 100644 --- a/packages/interop/package.json +++ b/packages/interop/package.json @@ -73,12 +73,14 @@ "@helia/unixfs": "^4.0.3", "@ipld/car": "^5.3.3", "@ipld/dag-cbor": "^9.2.2", + "@ipld/dag-pb": "^4.1.3", "@libp2p/crypto": "^5.0.7", "@libp2p/interface": "^2.2.1", "@libp2p/kad-dht": "^14.1.3", "@libp2p/keychain": "^5.0.10", "@libp2p/peer-id": "^5.0.8", "@libp2p/websockets": "^9.0.13", + "@multiformats/multiaddr": "^12.4.0", "@multiformats/sha3": "^3.0.2", "aegir": "^45.1.1", "helia": "^5.3.0", diff --git a/packages/interop/src/unixfs-files.spec.ts b/packages/interop/src/unixfs-files.spec.ts index cb1dc736..68f052e7 100644 --- a/packages/interop/src/unixfs-files.spec.ts +++ b/packages/interop/src/unixfs-files.spec.ts @@ -1,15 +1,20 @@ /* eslint-env mocha */ import { unixfs } from '@helia/unixfs' +import * as dagPb from '@ipld/dag-pb' +import { multiaddr } from '@multiformats/multiaddr' import { expect } from 'aegir/chai' import { fixedSize } from 'ipfs-unixfs-importer/chunker' import { balanced } from 'ipfs-unixfs-importer/layout' +import drain from 'it-drain' +import last from 'it-last' import { CID } from 'multiformats/cid' +import { fromString as uint8ArrayFromString } from 'uint8arrays/from-string' import { createHeliaNode } from './fixtures/create-helia.js' import { createKuboNode } from './fixtures/create-kubo.js' import type { AddOptions, UnixFS } from '@helia/unixfs' import type { HeliaLibp2p } from 'helia' -import type { ByteStream } from 'ipfs-unixfs-importer' +import type { ByteStream, ImportCandidateStream } from 'ipfs-unixfs-importer' import type { KuboNode } from 'ipfsd-ctl' import type { AddOptions as KuboAddOptions } from 'kubo-rpc-client' @@ -24,12 +29,32 @@ describe('@helia/unixfs - files', () => { return cid } + async function importDirectoryToHelia (data: ImportCandidateStream, opts?: Partial): Promise { + const result = await last(unixFs.addAll(data, opts)) + + if (result == null) { + throw new Error('Nothing imported') + } + + return CID.parse(result.cid.toString()) + } + async function importToKubo (data: ByteStream, opts?: KuboAddOptions): Promise { const result = await kubo.api.add(data, opts) return CID.parse(result.cid.toString()) } + async function importDirectoryToKubo (data: ImportCandidateStream, opts?: KuboAddOptions): Promise { + const result = await last(kubo.api.addAll(data, opts)) + + if (result == null) { + throw new Error('Nothing imported') + } + + return CID.parse(result.cid.toString()) + } + async function expectSameCid (data: () => ByteStream, heliaOpts: Partial = {}, kuboOpts: KuboAddOptions = {}): Promise { const heliaCid = await importToHelia(data(), { // these are the default kubo options @@ -85,4 +110,103 @@ describe('@helia/unixfs - files', () => { await expectSameCid(candidate) }) + + it('should return the same directory stats', async () => { + const candidates = [{ + path: '/foo1.txt', + content: uint8ArrayFromString('Hello World!') + }, { + path: '/foo2.txt', + content: uint8ArrayFromString('Hello World!') + }] + + const heliaCid = await importDirectoryToHelia(candidates, { + wrapWithDirectory: true + }) + const kuboCid = await importDirectoryToKubo(candidates, { + cidVersion: 1, + chunker: `size-${1024 * 1024}`, + rawLeaves: true, + wrapWithDirectory: true + }) + + expect(heliaCid.toString()).to.equal(kuboCid.toString()) + + const heliaStat = await unixFs.stat(heliaCid, { + extended: true + }) + const kuboStat = await kubo.api.files.stat(`/ipfs/${kuboCid}`, { + withLocal: true + }) + + expect(heliaStat.dagSize.toString()).to.equal(kuboStat.cumulativeSize.toString()) + expect(heliaStat.dagSize.toString()).to.equal(kuboStat.sizeLocal?.toString()) + + // +1 because kubo doesn't count the root directory block + expect(heliaStat.blocks.toString()).to.equal((kuboStat.blocks + 1).toString()) + }) + + it('fetches missing blocks during stat', async () => { + const chunkSize = 1024 * 1024 + const size = chunkSize * 10 + + const candidate = (): ByteStream => (async function * () { + for (let i = 0; i < size; i += chunkSize) { + yield new Uint8Array(new Array(chunkSize).fill(0).map((val, index) => { + return Math.floor(Math.random() * 256) + })) + } + }()) + + const largeFileCid = await importToKubo(candidate()) + const info = await kubo.info() + + await helia.libp2p.dial(info.multiaddrs.map(ma => multiaddr(ma))) + + // pull all blocks from kubo + await drain(unixFs.cat(largeFileCid)) + + // check the root block + const block = await helia.blockstore.get(largeFileCid) + const node = dagPb.decode(block) + + expect(node.Links).to.have.lengthOf(40) + + const stats = await unixFs.stat(largeFileCid, { + extended: true + }) + + expect(stats.unixfs?.fileSize()).to.equal(10485760n) + expect(stats.blocks).to.equal(41n) + expect(stats.dagSize).to.equal(10488250n) + expect(stats.localSize).to.equal(10485760n) + + // remove one of the blocks so we now have an incomplete DAG + await helia.blockstore.delete(node.Links[0].Hash) + + // block count and local file/dag sizes should be smaller + const updatedStats = await unixFs.stat(largeFileCid, { + extended: true, + offline: true + }) + + expect(updatedStats.unixfs?.fileSize()).to.equal(10485760n) + expect(updatedStats.blocks).to.equal(40n) + expect(updatedStats.dagSize).to.equal(10226092n) + expect(updatedStats.localSize).to.equal(10223616n) + + await new Promise((resolve) => { + setTimeout(() => { + resolve() + }, 1_000) + }) + + // block count and local file/dag sizes should be smaller + const finalStats = await unixFs.stat(largeFileCid, { + extended: true + }) + + // should have fetched missing block from Kubo + expect(finalStats).to.deep.equal(stats, 'did not fetch missing block') + }) }) diff --git a/packages/mfs/src/index.ts b/packages/mfs/src/index.ts index 1c50f2df..6cdc945b 100644 --- a/packages/mfs/src/index.ts +++ b/packages/mfs/src/index.ts @@ -34,7 +34,7 @@ import { Key } from 'interface-datastore' import { UnixFS as IPFSUnixFS, type Mtime } from 'ipfs-unixfs' import { CID } from 'multiformats/cid' import { basename } from './utils/basename.js' -import type { AddOptions, CatOptions, ChmodOptions, CpOptions, LsOptions, MkdirOptions as UnixFsMkdirOptions, RmOptions as UnixFsRmOptions, StatOptions, TouchOptions, UnixFS, UnixFSStats } from '@helia/unixfs' +import type { AddOptions, CatOptions, ChmodOptions, CpOptions, LsOptions, MkdirOptions as UnixFsMkdirOptions, RmOptions as UnixFsRmOptions, StatOptions, TouchOptions, UnixFS, FileStats, DirectoryStats, RawStats, ExtendedStatOptions, ExtendedFileStats, ExtendedDirectoryStats, ExtendedRawStats } from '@helia/unixfs' import type { AbortOptions } from '@libp2p/interface' import type { Blockstore } from 'interface-blockstore' import type { Datastore } from 'interface-datastore' @@ -213,7 +213,8 @@ export interface MFS { * console.info(stats) * ``` */ - stat(path: string, options?: Partial): Promise + stat(path: string, options?: StatOptions): Promise + stat(path: string, options?: ExtendedStatOptions): Promise /** * Update the mtime of a UnixFS DAG in your MFS. @@ -438,7 +439,9 @@ class DefaultMFS implements MFS { this.root = await this.#persistPath(trail, options) } - async stat (path: string, options?: Partial): Promise { + async stat (path: string, options?: StatOptions): Promise + async stat (path: string, options?: ExtendedStatOptions): Promise + async stat (path: string, options?: StatOptions | ExtendedStatOptions): Promise { const root = await this.#getRootCID() const trail = await this.#walkPath(root, path, { @@ -453,9 +456,7 @@ class DefaultMFS implements MFS { throw new DoesNotExistError() } - return this.unixfs.stat(finalEntry.cid, { - ...options - }) + return this.unixfs.stat(finalEntry.cid, options) } async touch (path: string, options?: Partial): Promise { diff --git a/packages/mfs/test/stat.spec.ts b/packages/mfs/test/stat.spec.ts index ed7f6eef..1aa18c3b 100644 --- a/packages/mfs/test/stat.spec.ts +++ b/packages/mfs/test/stat.spec.ts @@ -23,52 +23,72 @@ describe('stat', () => { }) it('stats an empty directory', async () => { - await expect(fs.stat('/')).to.eventually.include({ - fileSize: 0n, - dagSize: 2n, - blocks: 1, - type: 'directory' + const stat = await fs.stat('/') + expect(stat.type).to.equal('directory') + + const extendedStats = await fs.stat('/', { + extended: true }) + + expect(extendedStats.type).to.equal('directory') + expect(extendedStats.blocks).to.equal(1n) + expect(extendedStats.dagSize).to.equal(4n) + expect(extendedStats.localSize).to.equal(0n) + expect(extendedStats.unixfs?.type).to.equal('directory') }) it('computes how much of the DAG is local', async () => { const filePath = '/foo.txt' await fs.writeBytes(largeFile, filePath) - const stats = await fs.stat(filePath) + const stats = await fs.stat(filePath, { + extended: true + }) const block = await blockstore.get(stats.cid) const node = dagPb.decode(block) expect(node.Links).to.have.lengthOf(5) - expect(stats).to.include({ - fileSize: 5242880n, - blocks: 6, - localDagSize: 5243139n - }) + expect(stats.unixfs?.fileSize()).to.equal(5242880n) + expect(stats.blocks).to.equal(6n) + expect(stats.dagSize).to.equal(5243139n) + expect(stats.localSize).to.equal(5242880n) // remove one of the blocks so we now have an incomplete DAG await blockstore.delete(node.Links[0].Hash) // block count and local file/dag sizes should be smaller - await expect(fs.stat(filePath)).to.eventually.include({ - fileSize: 5242880n, - blocks: 5, - localFileSize: 4194304n, - localDagSize: 4194563n + const updatedStats = await fs.stat(filePath, { + extended: true, + offline: true }) + + expect(updatedStats.unixfs?.fileSize()).to.equal(5242880n) + expect(updatedStats.blocks).to.equal(5n) + expect(updatedStats.dagSize).to.equal(4194563n) + expect(updatedStats.localSize).to.equal(4194304n) }) it('stats a raw node', async () => { const filePath = '/foo.txt' await fs.writeBytes(smallFile, filePath) - await expect(fs.stat(filePath)).to.eventually.include({ - fileSize: BigInt(smallFile.length), - dagSize: 13n, - blocks: 1, - type: 'raw' + const stat = await fs.stat(filePath) + expect(stat.type).to.equal('raw') + expect(stat.size).to.equal(13n) + + const extendedStat = await fs.stat(filePath, { + extended: true + }) + + expect(extendedStat).to.deep.equal({ + ...stat, + blocks: 1n, + dagSize: BigInt(smallFile.byteLength), + localSize: BigInt(smallFile.byteLength), + uniqueBlocks: 1n, + deduplicatedDagSize: BigInt(smallFile.byteLength) }) }) @@ -79,24 +99,38 @@ describe('stat', () => { rawLeaves: false }) - await expect(fs.stat(filePath)).to.eventually.include({ - fileSize: BigInt(smallFile.length), - dagSize: 19n, - blocks: 1, - type: 'file' + const stat = await fs.stat(filePath) + expect(stat.type).to.equal('file') + expect(stat.unixfs?.fileSize()).to.equal(13n) + + const extendedStat = await fs.stat(filePath, { + extended: true }) + + expect(extendedStat.blocks).to.equal(1n) + expect(extendedStat.dagSize).to.equal(21n) + expect(extendedStat.localSize).to.equal(13n) + expect(extendedStat.type).to.equal('file') + expect(extendedStat.unixfs?.fileSize()).to.equal(13n) }) it('stats a large file', async () => { const filePath = '/foo.txt' await fs.writeBytes(largeFile, filePath) - await expect(fs.stat(filePath)).to.eventually.include({ - fileSize: BigInt(largeFile.length), - dagSize: 5242907n, - blocks: 6, - type: 'file' + const stat = await fs.stat(filePath) + expect(stat.type).to.equal('file') + expect(stat.unixfs?.fileSize()).to.equal(BigInt(largeFile.length)) + + const extendedStat = await fs.stat(filePath, { + extended: true }) + + expect(extendedStat.blocks).to.equal(6n) + expect(extendedStat.dagSize).to.equal(5243139n) + expect(extendedStat.localSize).to.equal(BigInt(largeFile.length)) + expect(extendedStat.type).to.equal('file') + expect(extendedStat.unixfs?.fileSize()).to.equal(BigInt(largeFile.length)) }) it('should stat file with mode', async () => { @@ -106,9 +140,8 @@ describe('stat', () => { mode }) - await expect(fs.stat(filePath)).to.eventually.include({ - mode - }) + const stat = await fs.stat(filePath) + expect(stat.unixfs?.mode).to.equal(mode) }) it('should stat file with mtime', async function () { @@ -121,17 +154,22 @@ describe('stat', () => { mtime }) - await expect(fs.stat(filePath)).to.eventually.deep.include({ - mtime - }) + const stat = await fs.stat(filePath) + expect(stat.unixfs?.mtime).to.deep.equal(mtime) }) it('should stat a directory', async function () { - await expect(fs.stat('/')).to.eventually.include({ - type: 'directory', - blocks: 1, - fileSize: 0n + const stat = await fs.stat('/') + expect(stat.type).to.equal('directory') + + const extendedStat = await fs.stat('/', { + extended: true }) + + expect(extendedStat.blocks).to.equal(1n) + expect(extendedStat.dagSize).to.equal(4n) + expect(extendedStat.localSize).to.equal(0n) + expect(extendedStat.type).to.equal('directory') }) it('should stat dir with mode', async function () { @@ -141,9 +179,8 @@ describe('stat', () => { mode }) - await expect(fs.stat(path)).to.eventually.include({ - mode - }) + const stat = await fs.stat(path) + expect(stat.unixfs?.mode).to.equal(mode) }) it('should stat dir with mtime', async function () { @@ -157,9 +194,8 @@ describe('stat', () => { mtime }) - await expect(fs.stat(path)).to.eventually.deep.include({ - mtime - }) + const stat = await fs.stat(path) + expect(stat.unixfs?.mtime).to.deep.equal(mtime) }) it('stats a sharded directory', async function () { @@ -176,14 +212,18 @@ describe('stat', () => { }) const stat = await fs.stat(shardedDirPath) - expect(stat).to.have.property('type', 'directory') - expect(stat).to.have.nested.property('unixfs.type', 'hamt-sharded-directory') - expect(stat).to.include({ - mode: 0o755 - }) - expect(stat).to.deep.include({ - mtime + expect(stat.type).to.equal('directory') + expect(stat.unixfs?.type).to.equal('hamt-sharded-directory') + + const extendedStat = await fs.stat(shardedDirPath, { + extended: true }) + + expect(extendedStat.blocks).to.equal(1243n) + expect(extendedStat.dagSize).to.equal(79157n) + expect(extendedStat.localSize).to.equal(5005n) + expect(extendedStat.type).to.equal('directory') + expect(extendedStat.unixfs?.type).to.equal('hamt-sharded-directory') }) it('stats a file inside a sharded directory', async () => { @@ -198,7 +238,6 @@ describe('stat', () => { }) const stats = await fs.stat(filePath) - expect(stats.type).to.equal('file') - expect(stats.fileSize).to.equal(4n) + expect(stats.unixfs?.fileSize()).to.equal(4n) }) }) diff --git a/packages/unixfs/package.json b/packages/unixfs/package.json index d3e143e9..9f9646a6 100644 --- a/packages/unixfs/package.json +++ b/packages/unixfs/package.json @@ -78,6 +78,7 @@ "@ipld/dag-pb": "^4.1.3", "@libp2p/interface": "^2.2.1", "@libp2p/logger": "^5.1.4", + "@libp2p/utils": "^6.6.0", "@multiformats/murmur3": "^2.1.8", "hamt-sharding": "^3.0.6", "interface-blockstore": "^5.3.1", diff --git a/packages/unixfs/src/commands/stat.ts b/packages/unixfs/src/commands/stat.ts index 659ae925..0b9ae4cc 100644 --- a/packages/unixfs/src/commands/stat.ts +++ b/packages/unixfs/src/commands/stat.ts @@ -1,17 +1,21 @@ import * as dagPb from '@ipld/dag-pb' import { logger } from '@libp2p/logger' +import { ScalableCuckooFilter } from '@libp2p/utils/filters' import { UnixFS } from 'ipfs-unixfs' -import { exporter } from 'ipfs-unixfs-exporter' +import { exporter, type RawNode, type UnixFSDirectory, type UnixFSFile } from 'ipfs-unixfs-exporter' import mergeOpts from 'merge-options' import * as raw from 'multiformats/codecs/raw' import { InvalidPBNodeError, NotUnixFSError, UnknownError } from '../errors.js' import { resolve } from './utils/resolve.js' -import type { StatOptions, UnixFSStats } from '../index.js' +import type { ExtendedStatOptions, ExtendedDirectoryStats, ExtendedFileStats, StatOptions, DirectoryStats, FileStats, RawStats, ExtendedRawStats } from '../index.js' import type { GetStore, HasStore } from '../unixfs.js' -import type { AbortOptions } from '@libp2p/interface' -import type { Mtime } from 'ipfs-unixfs' +import type { Filter } from '@libp2p/utils/filters' import type { CID } from 'multiformats/cid' +// https://github.com/ipfs/specs/blob/main/UNIXFS.md#metadata +const DEFAULT_DIR_MODE = 0x755 +const DEFAULT_FILE_MODE = 0x644 + const mergeOptions = mergeOpts.bind({ ignoreUndefined: true }) const log = logger('helia:unixfs:stat') @@ -19,7 +23,9 @@ const defaultOptions: StatOptions = { } -export async function stat (cid: CID, blockstore: GetStore & HasStore, options: Partial = {}): Promise { +export async function stat (cid: CID, blockstore: GetStore & HasStore, options?: StatOptions): Promise +export async function stat (cid: CID, blockstore: GetStore & HasStore, options?: ExtendedStatOptions): Promise +export async function stat (cid: CID, blockstore: GetStore & HasStore, options: Partial = {}): Promise { const opts: StatOptions = mergeOptions(defaultOptions, options) const resolved = await resolve(cid, options.path, blockstore, opts) @@ -27,116 +33,186 @@ export async function stat (cid: CID, blockstore: GetStore & HasStore, options: const result = await exporter(resolved.cid, blockstore, opts) - if (result.type !== 'file' && result.type !== 'directory' && result.type !== 'raw') { - throw new NotUnixFSError() + if (result.type === 'raw') { + if (options.extended === true) { + return createExtendedRawStats(result) + } + + return createRawStats(result) + } else if (result.type === 'file' || result.type === 'directory') { + if (options.extended === true) { + return createExtendedStats(result, blockstore, options.filter ?? new ScalableCuckooFilter({ filterSize: 1024 }), options) + } + + return createStats(result) } - let fileSize: bigint = 0n - let dagSize: bigint = 0n - let localFileSize: bigint = 0n - let localDagSize: bigint = 0n - let blocks: number = 0 - let mode: number | undefined - let mtime: Mtime | undefined - const type = result.type - let unixfs: UnixFS | undefined + throw new NotUnixFSError() +} - if (result.type === 'raw') { - fileSize = BigInt(result.node.byteLength) - dagSize = BigInt(result.node.byteLength) - localFileSize = BigInt(result.node.byteLength) - localDagSize = BigInt(result.node.byteLength) - blocks = 1 +function createStats (entry: UnixFSFile | UnixFSDirectory): FileStats | DirectoryStats { + return { + type: entry.type, + cid: entry.cid, + unixfs: entry.unixfs, + mode: entry.unixfs.mode ?? (entry.unixfs.isDirectory() ? DEFAULT_DIR_MODE : DEFAULT_FILE_MODE), + mtime: entry.unixfs.mtime, + size: entry.unixfs.fileSize() } +} - if (result.type === 'directory') { - fileSize = 0n - dagSize = BigInt(result.unixfs.marshal().byteLength) - localFileSize = 0n - localDagSize = dagSize - blocks = 1 - mode = result.unixfs.mode - mtime = result.unixfs.mtime - unixfs = result.unixfs +async function createExtendedStats (entry: UnixFSFile | UnixFSDirectory, blockstore: GetStore & HasStore, filter: Filter, options: StatOptions): Promise { + const stats = await inspectDag(entry.cid, blockstore, false, filter, options) + + return { + type: entry.type, + cid: entry.cid, + unixfs: entry.unixfs, + size: entry.unixfs.isDirectory() ? stats.dirSize : entry.unixfs.fileSize(), + mode: entry.unixfs.mode ?? (entry.unixfs.isDirectory() ? DEFAULT_DIR_MODE : DEFAULT_FILE_MODE), + mtime: entry.unixfs.mtime, + localSize: stats.localSize, + dagSize: stats.dagSize, + deduplicatedDagSize: stats.deduplicatedDagSize, + blocks: stats.blocks, + uniqueBlocks: stats.uniqueBlocks } +} - if (result.type === 'file') { - const results = await inspectDag(resolved.cid, blockstore, opts) - - fileSize = result.unixfs.fileSize() - dagSize = BigInt((result.node.Data?.byteLength ?? 0) + result.node.Links.reduce((acc, curr) => acc + (curr.Tsize ?? 0), 0)) - localFileSize = BigInt(results.localFileSize) - localDagSize = BigInt(results.localDagSize) - blocks = results.blocks - mode = result.unixfs.mode - mtime = result.unixfs.mtime - unixfs = result.unixfs +function createRawStats (entry: RawNode): RawStats { + return { + type: entry.type, + cid: entry.cid, + unixfs: undefined, + mode: DEFAULT_FILE_MODE, + mtime: undefined, + size: BigInt(entry.node.byteLength) } +} +function createExtendedRawStats (entry: RawNode): ExtendedRawStats { return { - cid: resolved.cid, - mode, - mtime, - fileSize, - dagSize, - localFileSize, - localDagSize, - blocks, - type, - unixfs + type: entry.type, + cid: entry.cid, + unixfs: undefined, + mode: DEFAULT_FILE_MODE, + mtime: undefined, + size: BigInt(entry.node.byteLength), + localSize: BigInt(entry.node.byteLength), + dagSize: BigInt(entry.node.byteLength), + deduplicatedDagSize: BigInt(entry.node.byteLength), + blocks: 1n, + uniqueBlocks: 1n } } interface InspectDagResults { - localFileSize: number - localDagSize: number - blocks: number + dirSize: bigint + localSize: bigint + dagSize: bigint + deduplicatedDagSize: bigint + blocks: bigint + uniqueBlocks: bigint } -async function inspectDag (cid: CID, blockstore: GetStore & HasStore, options: AbortOptions): Promise { - const results = { - localFileSize: 0, - localDagSize: 0, - blocks: 0 +async function inspectDag (cid: CID, blockstore: GetStore & HasStore, isFile: boolean, filter: Filter, options: StatOptions): Promise { + const results: InspectDagResults = { + dirSize: 0n, + localSize: 0n, + dagSize: 0n, + deduplicatedDagSize: 0n, + blocks: 0n, + uniqueBlocks: 0n } - if (await blockstore.has(cid, options)) { + try { + const alreadyTraversed = filter.has(cid.bytes) + filter.add(cid.bytes) + const block = await blockstore.get(cid, options) results.blocks++ - results.localDagSize += block.byteLength + results.dagSize += BigInt(block.byteLength) + + if (!alreadyTraversed) { + results.uniqueBlocks++ + results.deduplicatedDagSize += BigInt(block.byteLength) + } if (cid.code === raw.code) { - results.localFileSize += block.byteLength + results.localSize += BigInt(block.byteLength) + + if (isFile) { + results.dirSize += BigInt(block.byteLength) + } } else if (cid.code === dagPb.code) { const pbNode = dagPb.decode(block) + let unixfs: UnixFS | undefined + + if (pbNode.Data != null) { + unixfs = UnixFS.unmarshal(pbNode.Data) + } + if (pbNode.Links.length > 0) { // intermediate node for (const link of pbNode.Links) { - const linkResult = await inspectDag(link.Hash, blockstore, options) + const linkResult = await inspectDag(link.Hash, blockstore, linkIsFile(link, unixfs), filter, options) - results.localFileSize += linkResult.localFileSize - results.localDagSize += linkResult.localDagSize + results.localSize += linkResult.localSize + results.dagSize += linkResult.dagSize + results.deduplicatedDagSize += linkResult.deduplicatedDagSize results.blocks += linkResult.blocks + results.uniqueBlocks += linkResult.uniqueBlocks + results.dirSize += linkResult.dirSize + } + + // multi-block file node + if (isFile && unixfs != null) { + results.dirSize += unixfs.fileSize() } } else { - // leaf node - if (pbNode.Data == null) { + if (unixfs == null) { throw new InvalidPBNodeError(`PBNode ${cid.toString()} had no data`) } - const unixfs = UnixFS.unmarshal(pbNode.Data) - - if (unixfs.data == null) { - throw new InvalidPBNodeError(`UnixFS node ${cid.toString()} had no data`) + // multi-block file leaf node + if (unixfs.data != null) { + results.localSize += BigInt(unixfs.data.byteLength ?? 0) } - results.localFileSize += unixfs.data.byteLength ?? 0 + // single-block file node + if (isFile) { + results.dirSize += unixfs.fileSize() + } } } else { throw new UnknownError(`${cid.toString()} was neither DAG_PB nor RAW`) } + } catch (err: any) { + if (err.name !== 'NotFoundError' || options.offline !== true) { + throw err + } } return results } + +function linkIsFile (link: dagPb.PBLink, parent?: UnixFS): boolean { + if (parent == null) { + return false + } + + const name = link.Name + + if (name == null) { + return false + } + + if (parent.type === 'directory') { + return true + } else if (parent.type === 'hamt-sharded-directory' && name.length > 2) { + return true + } + + return false +} diff --git a/packages/unixfs/src/index.ts b/packages/unixfs/src/index.ts index 945efed2..4512a4cf 100644 --- a/packages/unixfs/src/index.ts +++ b/packages/unixfs/src/index.ts @@ -49,6 +49,7 @@ import { UnixFS as UnixFSClass } from './unixfs.js' import type { GetBlockProgressEvents, PutBlockProgressEvents } from '@helia/interface/blocks' import type { AbortOptions } from '@libp2p/interface' +import type { Filter } from '@libp2p/utils/filters' import type { Blockstore } from 'interface-blockstore' import type { Mtime, UnixFS as IPFSUnixFS } from 'ipfs-unixfs' import type { ExporterProgressEvents, UnixFSEntry } from 'ipfs-unixfs-exporter' @@ -248,15 +249,32 @@ export interface StatOptions extends AbortOptions, ProgressOptions { /** * If true, do not perform any network operations and throw if blocks are - * missing from the local store. (default: false) + * missing from the local store. + * + * @default false */ offline?: boolean } +export interface ExtendedStatOptions extends StatOptions { + /** + * If true, traverse the whole DAG to return additional stats. If all data is + * not in the local blockstore, this may involve fetching them from the + * network. + */ + extended: true + + /** + * By default CIDs are deduplicated using a `ScalableCuckooFilter` - if you + * wish to use a different filter, pass it here. + */ + filter?: Filter +} + /** * Statistics relating to a UnixFS DAG */ -export interface UnixFSStats { +export interface Stats { /** * The file or directory CID */ @@ -265,7 +283,7 @@ export interface UnixFSStats { /** * The file or directory mode */ - mode?: number + mode: number /** * The file or directory mtime @@ -273,41 +291,112 @@ export interface UnixFSStats { mtime?: Mtime /** - * The size of the file in bytes + * The type of UnixFS node - 'file' or 'directory' */ - fileSize: bigint + type: 'file' | 'directory' | 'raw' /** - * The size of the DAG that holds the file in bytes + * UnixFS metadata about this file or directory */ - dagSize: bigint + unixfs?: IPFSUnixFS + + /** + * The size in bytes of the file as reported by the UnixFS metadata stored in + * the root DAG node, or if the CID resolves to a raw node, the size of the + * block that holds it. + * + * For directories this will return `0` as no size information is available in + * the root block - instead please stat with the `extended` option to traverse + * the DAG and calculate the size. + */ + size: bigint +} + +export interface FileStats extends Stats { + type: 'file' + unixfs: IPFSUnixFS +} + +export interface DirectoryStats extends Stats { + type: 'directory' + unixfs: IPFSUnixFS +} + +export interface RawStats extends Stats { + type: 'raw' + unixfs: undefined +} +/** + * More detailed statistics relating to a UnixFS DAG. These can involve + * traversing the DAG behind the CID so can involve network operations and/or + * more disk activity. + */ +export interface ExtendedStats extends Stats { /** - * How much of the file is in the local block store + * How many blocks make up the DAG. + * + * nb. this will only be accurate if either all blocks are present in the + * local blockstore or the `offline` option was not `true` */ - localFileSize: bigint + blocks: bigint /** - * How much of the DAG that holds the file is in the local blockstore + * How many unique blocks make up the DAG - this count does not include any + * blocks that appear in the DAG more than once. + * + * nb. this will only be accurate if either all blocks are present in the + * local blockstore or the `offline` option was not `true` */ - localDagSize: bigint + uniqueBlocks: bigint /** - * How many blocks make up the DAG - nb. this will only be accurate - * if all blocks are present in the local blockstore + * The size of the DAG that holds the file or directory in bytes - this is + * the sum of all block sizes so includes any protobuf overhead, etc. + * + * Duplicate blocks are included in this measurement. + * + * nb. this will only be accurate if either all blocks are present in the + * local blockstore or the `offline` option was not `true` */ - blocks: number + dagSize: bigint /** - * The type of file + * Similar to `dagSize` except duplicate blocks are not included in the + * reported amount. + * + * nb. this will only be accurate if either all blocks are present in the + * local blockstore or the `offline` option was not `true` */ - type: 'file' | 'directory' | 'raw' + deduplicatedDagSize: bigint /** - * UnixFS metadata about this file or directory. Will not be present - * if the node is a `raw` type. + * How much of the file or directory is in the local block store. If this is a + * directory it will include the `localSize` of all child files and + * directories. + * + * It does not include protobuf overhead, for that see `dagSize`. + * + * nb. if the `offline` option is `true`, and not all blocks for the + * file/directory are in the blockstore, this number may be smaller than + * `size`. */ - unixfs?: IPFSUnixFS + localSize: bigint +} + +export interface ExtendedFileStats extends ExtendedStats { + type: 'file' + unixfs: IPFSUnixFS +} + +export interface ExtendedDirectoryStats extends ExtendedStats { + type: 'directory' + unixfs: IPFSUnixFS +} + +export interface ExtendedRawStats extends ExtendedStats { + type: 'raw' + unixfs: undefined } /** @@ -571,7 +660,8 @@ export interface UnixFS { * console.info(stats) * ``` */ - stat(cid: CID, options?: Partial): Promise + stat(cid: CID, options?: StatOptions): Promise + stat(cid: CID, options?: ExtendedStatOptions): Promise /** * Update the mtime of a UnixFS DAG diff --git a/packages/unixfs/src/unixfs.ts b/packages/unixfs/src/unixfs.ts index cf06eb1a..1ba534c5 100644 --- a/packages/unixfs/src/unixfs.ts +++ b/packages/unixfs/src/unixfs.ts @@ -7,7 +7,7 @@ import { mkdir } from './commands/mkdir.js' import { rm } from './commands/rm.js' import { stat } from './commands/stat.js' import { touch } from './commands/touch.js' -import type { AddOptions, CatOptions, ChmodOptions, CpOptions, FileCandidate, LsOptions, MkdirOptions, RmOptions, StatOptions, TouchOptions, UnixFSComponents, UnixFS as UnixFSInterface, UnixFSStats } from './index.js' +import type { AddOptions, CatOptions, ChmodOptions, CpOptions, ExtendedStatOptions, ExtendedDirectoryStats, ExtendedFileStats, FileCandidate, LsOptions, MkdirOptions, RmOptions, StatOptions, TouchOptions, UnixFSComponents, DirectoryStats, FileStats, UnixFS as UnixFSInterface, RawStats, ExtendedRawStats } from './index.js' import type { Blockstore } from 'interface-blockstore' import type { UnixFSEntry } from 'ipfs-unixfs-exporter' import type { ByteStream, DirectoryCandidate, ImportCandidateStream, ImportResult } from 'ipfs-unixfs-importer' @@ -68,7 +68,9 @@ export class UnixFS implements UnixFSInterface { return rm(cid, path, this.components.blockstore, options) } - async stat (cid: CID, options: Partial = {}): Promise { + async stat (cid: CID, options?: StatOptions): Promise + async stat (cid: CID, options?: ExtendedStatOptions): Promise + async stat (cid: CID, options: Partial = {}): Promise { return stat(cid, this.components.blockstore, options) } diff --git a/packages/unixfs/test/stat.spec.ts b/packages/unixfs/test/stat.spec.ts index 745b6699..69dc510e 100644 --- a/packages/unixfs/test/stat.spec.ts +++ b/packages/unixfs/test/stat.spec.ts @@ -3,6 +3,7 @@ import * as dagPb from '@ipld/dag-pb' import { expect } from 'aegir/chai' import { MemoryBlockstore } from 'blockstore-core' +import { fromString as uint8ArrayFromString } from 'uint8arrays/from-string' import { unixfs, type UnixFS } from '../src/index.js' import { createShardedDirectory } from './fixtures/create-sharded-directory.js' import { largeFile, smallFile } from './fixtures/files.js' @@ -25,10 +26,10 @@ describe('stat', function () { }) it('stats an empty directory', async () => { - await expect(fs.stat(emptyDirCid)).to.eventually.include({ - fileSize: 0n, - dagSize: 2n, - blocks: 1, + await expect(fs.stat(emptyDirCid, { extended: true })).to.eventually.include({ + size: 0n, + dagSize: 4n, + blocks: 1n, type: 'directory' }) }) @@ -40,32 +41,48 @@ describe('stat', function () { expect(node.Links).to.have.lengthOf(5) - await expect(fs.stat(largeFileCid)).to.eventually.include({ - fileSize: 5242880n, - blocks: 6, - localDagSize: 5243139n + const stats = await fs.stat(largeFileCid, { + extended: true }) + expect(stats.unixfs?.fileSize()).to.equal(5242880n) + expect(stats.blocks).to.equal(6n) + expect(stats.dagSize).to.equal(5243139n) + expect(stats.localSize).to.equal(5242880n) + // remove one of the blocks so we now have an incomplete DAG await blockstore.delete(node.Links[0].Hash) // block count and local file/dag sizes should be smaller - await expect(fs.stat(largeFileCid)).to.eventually.include({ - fileSize: 5242880n, - blocks: 5, - localFileSize: 4194304n, - localDagSize: 4194563n + const updatedStats = await fs.stat(largeFileCid, { + extended: true, + offline: true }) + + expect(updatedStats.unixfs?.fileSize()).to.equal(5242880n) + expect(updatedStats.blocks).to.equal(5n) + expect(updatedStats.dagSize).to.equal(4194563n) + expect(updatedStats.localSize).to.equal(4194304n) }) it('stats a raw node', async () => { const fileCid = await fs.addBytes(smallFile) - await expect(fs.stat(fileCid)).to.eventually.include({ - fileSize: BigInt(smallFile.length), - dagSize: 13n, - blocks: 1, - type: 'raw' + const stat = await fs.stat(fileCid) + expect(stat.type).to.equal('raw') + expect(stat.size).to.equal(13n) + + const extendedStat = await fs.stat(fileCid, { + extended: true + }) + + expect(extendedStat).to.deep.equal({ + ...stat, + blocks: 1n, + dagSize: BigInt(smallFile.byteLength), + localSize: BigInt(smallFile.byteLength), + uniqueBlocks: 1n, + deduplicatedDagSize: BigInt(smallFile.byteLength) }) }) @@ -75,23 +92,41 @@ describe('stat', function () { rawLeaves: false }) - await expect(fs.stat(fileCid)).to.eventually.include({ - fileSize: BigInt(smallFile.length), - dagSize: 19n, - blocks: 1, - type: 'file' + const stat = await fs.stat(fileCid) + expect(stat.type).to.equal('file') + expect(stat.unixfs?.fileSize()).to.equal(13n) + expect(stat.size).to.equal(13n) + + const extendedStat = await fs.stat(fileCid, { + extended: true }) + + expect(extendedStat.blocks).to.equal(1n) + expect(extendedStat.dagSize).to.equal(21n) + expect(extendedStat.localSize).to.equal(13n) + expect(extendedStat.type).to.equal('file') + expect(extendedStat.unixfs?.fileSize()).to.equal(13n) + expect(extendedStat.size).to.equal(13n) }) it('stats a large file', async () => { - const cid = await fs.addBytes(largeFile) + const fileCid = await fs.addBytes(largeFile) + + const stat = await fs.stat(fileCid) + expect(stat.type).to.equal('file') + expect(stat.unixfs?.fileSize()).to.equal(BigInt(largeFile.length)) + expect(stat.size).to.equal(BigInt(largeFile.length)) - await expect(fs.stat(cid)).to.eventually.include({ - fileSize: BigInt(largeFile.length), - dagSize: 5242907n, - blocks: 6, - type: 'file' + const extendedStat = await fs.stat(fileCid, { + extended: true }) + + expect(extendedStat.blocks).to.equal(6n) + expect(extendedStat.dagSize).to.equal(5243139n) + expect(extendedStat.localSize).to.equal(BigInt(largeFile.length)) + expect(extendedStat.type).to.equal('file') + expect(extendedStat.unixfs?.fileSize()).to.equal(BigInt(largeFile.length)) + expect(extendedStat.size).to.equal(BigInt(largeFile.length)) }) it('should stat file with mode', async () => { @@ -128,11 +163,18 @@ describe('stat', function () { }) it('should stat a directory', async function () { - await expect(fs.stat(emptyDirCid)).to.eventually.include({ - type: 'directory', - blocks: 1, - fileSize: 0n + const stat = await fs.stat(emptyDirCid) + expect(stat.type).to.equal('directory') + + const extendedStat = await fs.stat(emptyDirCid, { + extended: true }) + + expect(extendedStat.blocks).to.equal(1n) + expect(extendedStat.dagSize).to.equal(4n) + expect(extendedStat.localSize).to.equal(0n) + expect(extendedStat.type).to.equal('directory') + expect(extendedStat.size).to.equal(0n) }) it('should stat dir with mode', async function () { @@ -142,11 +184,8 @@ describe('stat', function () { mode }) - await expect(fs.stat(dirCid, { - path - })).to.eventually.include({ - mode - }) + const stat = await fs.stat(dirCid) + expect(stat.unixfs?.mode).to.equal(mode) }) it('should stat dir with mtime', async function () { @@ -160,11 +199,11 @@ describe('stat', function () { mtime }) - await expect(fs.stat(dirCid, { + const stat = await fs.stat(dirCid, { path - })).to.eventually.deep.include({ - mtime }) + + expect(stat.unixfs?.mtime).to.deep.equal(mtime) }) it('stats a sharded directory', async function () { @@ -178,14 +217,18 @@ describe('stat', function () { }) const stat = await fs.stat(updatedShardCid) - expect(stat).to.have.property('type', 'directory') - expect(stat).to.have.nested.property('unixfs.type', 'hamt-sharded-directory') - expect(stat).to.include({ - mode: 0o755 - }) - expect(stat).to.deep.include({ - mtime + expect(stat.type).to.equal('directory') + expect(stat.unixfs?.type).to.equal('hamt-sharded-directory') + + const extendedStat = await fs.stat(updatedShardCid, { + extended: true }) + + expect(extendedStat.blocks).to.equal(1243n) + expect(extendedStat.dagSize).to.equal(79157n) + expect(extendedStat.localSize).to.equal(5005n) + expect(extendedStat.type).to.equal('directory') + expect(extendedStat.unixfs?.type).to.equal('hamt-sharded-directory') }) it('stats a file inside a sharded directory', async () => { @@ -201,7 +244,7 @@ describe('stat', function () { }) expect(stats.type).to.equal('file') - expect(stats.fileSize).to.equal(4n) + expect(stats.unixfs?.fileSize()).to.equal(4n) }) it('refuses to stat missing blocks', async () => { @@ -215,4 +258,101 @@ describe('stat', function () { })).to.eventually.be.rejected .with.property('name', 'NotFoundError') }) + + it('stats a directory with content', async () => { + const emptyDirCid = await fs.addDirectory() + const fileCid = await fs.addBytes(uint8ArrayFromString('Hello World!')) + const updateDirCid = await fs.cp(fileCid, emptyDirCid, 'foo1.txt') + const finalDirCid = await fs.cp(fileCid, updateDirCid, 'foo2.txt') + + const stats = await fs.stat(finalDirCid) + expect(stats.type).to.equal('directory') + expect(stats.size).to.equal(0n) + + const extendedStat = await fs.stat(finalDirCid, { + extended: true + }) + + expect(extendedStat.blocks).to.equal(3n) + expect(extendedStat.dagSize).to.equal(132n) + expect(extendedStat.localSize).to.equal(24n) + expect(extendedStat.type).to.equal('directory') + expect(extendedStat.size).to.equal(24n) + }) + + it('stats a directory with content and missing blocks', async () => { + const emptyDirCid = await fs.addDirectory() + const fileCid = await fs.addBytes(uint8ArrayFromString('Hello World!')) + const fileCid2 = await fs.addBytes(uint8ArrayFromString('Hello Universe!')) + const updateDirCid = await fs.cp(fileCid, emptyDirCid, 'foo1.txt') + const finalDirCid = await fs.cp(fileCid2, updateDirCid, 'foo2.txt') + const block = await blockstore.get(finalDirCid) + const node = dagPb.decode(block) + + const extendedStat = await fs.stat(finalDirCid, { + extended: true + }) + + expect(extendedStat.blocks).to.equal(3n) + expect(extendedStat.dagSize).to.equal(135n) + expect(extendedStat.localSize).to.equal(27n) + expect(extendedStat.type).to.equal('directory') + expect(extendedStat.size).to.equal(27n) + + expect(node.Links).to.have.lengthOf(2) + + // remove one of the blocks so we now have an incomplete DAG + await blockstore.delete(node.Links[0].Hash) + + const extendedStatMissingBlocks = await fs.stat(finalDirCid, { + extended: true, + offline: true + }) + + expect(extendedStatMissingBlocks.blocks).to.equal(2n) + expect(extendedStatMissingBlocks.dagSize).to.equal(123n) + expect(extendedStatMissingBlocks.localSize).to.equal(15n) + expect(extendedStatMissingBlocks.type).to.equal('directory') + expect(extendedStatMissingBlocks.size).to.equal(15n) + }) + + it('stats a directory with content and missing blocks of files', async () => { + const emptyDirCid = await fs.addDirectory() + const fileCid = await fs.addBytes(uint8ArrayFromString('Hello World!')) + const fileCid2 = await fs.addBytes(largeFile) + const updateDirCid = await fs.cp(fileCid, emptyDirCid, 'foo1.txt') + const finalDirCid = await fs.cp(fileCid2, updateDirCid, 'foo2.txt') + const block = await blockstore.get(finalDirCid) + const node = dagPb.decode(block) + + const extendedStat = await fs.stat(finalDirCid, { + extended: true + }) + + expect(extendedStat.type).to.equal('directory') + expect(extendedStat.blocks).to.equal(8n) + expect(extendedStat.dagSize).to.equal(5243262n) + expect(extendedStat.size).to.equal(5242892n) + expect(extendedStat.localSize).to.equal(5242892n) + + expect(node.Links).to.have.lengthOf(2) + + const largeFileBlock = await blockstore.get(fileCid2) + const largeFileNode = dagPb.decode(largeFileBlock) + + // remove one of the blocks of the multi-block file so we now have an + // incomplete DAG + await blockstore.delete(largeFileNode.Links[0].Hash) + + const extendedStatMissingBlocks = await fs.stat(finalDirCid, { + extended: true, + offline: true + }) + + expect(extendedStatMissingBlocks.type).to.equal('directory') + expect(extendedStatMissingBlocks.blocks).to.equal(extendedStat.blocks - 1n) + expect(extendedStatMissingBlocks.dagSize).to.equal(4194686n) + expect(extendedStatMissingBlocks.size).to.equal(extendedStat.size, 'did not calculate size from available UnixFS metadata') + expect(extendedStatMissingBlocks.localSize).to.equal(4194316n) + }) })