Skip to content

Commit

Permalink
Merge pull request #628 from marp-team/parallel-batch-conversion
Browse files Browse the repository at this point in the history
Introduce parallelism for batch conversion: `--parallel` / `-P`
  • Loading branch information
yhatt authored Jan 15, 2025
2 parents 19f8166 + 62d4d4d commit a553d84
Show file tree
Hide file tree
Showing 13 changed files with 313 additions and 119 deletions.
12 changes: 9 additions & 3 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -84,10 +84,14 @@ commands:
command: npm run lint:css

test:
parameters:
runInBand:
type: boolean
default: false
steps:
- run:
name: Jest
command: npm run test:coverage -- --ci -i --reporters=default --reporters=jest-junit
command: npm run test:coverage -- --ci --reporters=default --reporters=jest-junit <<# parameters.runInBand >>-i<</ parameters.runInBand >>
environment:
JEST_JUNIT_OUTPUT_DIR: tmp/test-results
MARP_TEST_CI: 1
Expand Down Expand Up @@ -129,7 +133,8 @@ jobs:
- prepare:
browser: true
- lint
- test
- test:
runInBand: true

test-node20:
executor:
Expand All @@ -140,7 +145,8 @@ jobs:
- prepare:
browser: true
- lint
- test
- test:
runInBand: true

test-node22:
executor:
Expand Down
7 changes: 6 additions & 1 deletion .github/workflows/test-win.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ jobs:
# - name: Output concurrency group
# run: echo "${{ github.workflow }}-${{ (github.ref_name == 'main' && github.run_id) || format('{0}-{1}', github.actor, github.head_ref || github.ref_name) }}"

- name: Get number of CPU cores
uses: SimenB/github-actions-cpu-cores@v2
id: cpu-cores

- uses: actions/checkout@v4

- name: Use Node.js ${{ matrix.node-version }}
Expand All @@ -43,7 +47,8 @@ jobs:
env:
MARP_TEST_CI: 1
run: >-
npm run test:coverage -- --ci -i --reporters=default --reporters=jest-junit --forceExit ||
npm run test:coverage -- --ci --max-workers ${{ steps.cpu-cores.outputs.count }} --reporters=default --reporters=jest-junit ||
npm run test:coverage -- --ci -i --reporters=default --reporters=jest-junit --forceExit --no-cache ||
npm run test:coverage -- --ci -i --reporters=default --reporters=jest-junit --forceExit --no-cache ||
npm run test:coverage -- --ci -i --reporters=default --reporters=jest-junit --forceExit --no-cache
Expand Down
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

## [Unreleased]

### Added

- Introduce parallelism for batch conversion: `--parallel` / `-P` ([#509](https://github.com/marp-team/marp-cli/issues/509), [#628](https://github.com/marp-team/marp-cli/pull/628))

### Fixed

- Make the preview option stable against occasional invalid URL errors ([#627](https://github.com/marp-team/marp-cli/pull/627))
Expand Down
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,10 @@ marp --pdf --allow-local-files slide-deck.md

## Conversion modes

### Parallelism

When converting multiple files, Marp CLI will process them in parallel with 5 concurrency by default. You can set the number of concurrency by `--parallel` (`-P`) option, or disable parallelism by `--no-parallel`.

### Watch mode (`--watch` / `-w`) <a name="watch-mode"></a>

Marp CLI will observe a change of Markdown and using theme CSS when passed with `--watch` (`-w`) option. The conversion will be triggered whenever the content of file is updated.
Expand Down
59 changes: 36 additions & 23 deletions src/browser/browser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import { EventEmitter } from 'node:events'
import fs from 'node:fs'
import os from 'node:os'
import path from 'node:path'
import { isPromise } from 'node:util/types'
import { nanoid } from 'nanoid'
import type {
Browser as PuppeteerBrowser,
Expand All @@ -11,6 +12,7 @@ import type {
} from 'puppeteer-core'
import type TypedEventEmitter from 'typed-emitter'
import { debugBrowser } from '../utils/debug'
import { createMemoizedPromiseContext } from '../utils/memoized-promise'
import {
getWindowsEnv,
isWSL,
Expand Down Expand Up @@ -46,11 +48,11 @@ export abstract class Browser

path: string
protocolTimeout: number
puppeteer: PuppeteerBrowser | undefined
timeout: number
#dataDirName: string

private _puppeteerDataDir?: string
private _puppeteerDataDir = createMemoizedPromiseContext<string>()
private _puppeteer = createMemoizedPromiseContext<PuppeteerBrowser>()

constructor(opts: BrowserOptions) {
super()
Expand All @@ -70,49 +72,53 @@ export abstract class Browser
return (this.constructor as typeof Browser).protocol
}

async launch(opts: LaunchOptions = {}): Promise<PuppeteerBrowser> {
if (!this.puppeteer) {
async launch(opts: LaunchOptions = {}) {
return this._puppeteer.init(async () => {
debugBrowser('Launching browser via Puppeteer...')

const puppeteer = await this.launchPuppeteer(opts)

puppeteer.once('disconnected', () => {
this.emit('disconnect', puppeteer)
this.puppeteer = undefined
this._puppeteer.value = undefined

debugBrowser('Browser disconnected (Cleaned up puppeteer instance)')
})

this.puppeteer = puppeteer
this.emit('launch', puppeteer)

return puppeteer
}
return this.puppeteer
})
}

async withPage<T>(fn: (page: Page) => T) {
const debugPageId = nanoid(8)
const puppeteer = await this.launch()
const page = await puppeteer.newPage()

debugBrowser('Created a new page [%s]', debugPageId)

page.setDefaultTimeout(this.timeout)
page.setDefaultNavigationTimeout(this.timeout)

try {
return await fn(page)
} finally {
await page.close()
debugBrowser('Page closed [%s]', debugPageId)
}
}

async close() {
if (this.puppeteer) {
const { puppeteer } = this
const pptr = await this._puppeteer.value

if (puppeteer.connected) {
await puppeteer.close()
this.emit('close', puppeteer)
if (pptr) {
if (pptr.connected) {
await pptr.close()
this.emit('close', pptr)
}

this.puppeteer = undefined
this._puppeteer.value = undefined
}
}

Expand All @@ -123,7 +129,15 @@ export abstract class Browser
async browserInWSLHost(): Promise<boolean> {
return (
!!(await isWSL()) &&
wslHostMatcher.test(this.puppeteer?.process()?.spawnfile ?? this.path)
wslHostMatcher.test(
// This function may be called while launching Puppeteer. If the browser
// value awaited, Marp CLI will bring deadlock. So we should check the
// value is already resolved (non-Promise truthy value) or not (Promise
// or undefined).
(this._puppeteer.value && !isPromise(this._puppeteer.value)
? this._puppeteer.value.process()?.spawnfile
: null) ?? this.path
)
)
}

Expand Down Expand Up @@ -160,10 +174,10 @@ export abstract class Browser

/** @internal */
protected async puppeteerDataDir() {
if (this._puppeteerDataDir === undefined) {
return this._puppeteerDataDir.init(async () => {
let needToTranslateWindowsPathToWSL = false

this._puppeteerDataDir = await (async () => {
const dir = await (async () => {
// In WSL environment, Marp CLI may use Chrome on Windows. If Chrome has
// located in host OS (Windows), we have to specify Windows path.
if (await this.browserInWSLHost()) {
Expand All @@ -176,16 +190,15 @@ export abstract class Browser
return path.resolve(os.tmpdir(), this.#dataDirName)
})()

debugBrowser(`Chrome data directory: %s`, this._puppeteerDataDir)

// Ensure the data directory is created
const mkdirPath = needToTranslateWindowsPathToWSL
? await translateWindowsPathToWSL(this._puppeteerDataDir)
: this._puppeteerDataDir
? await translateWindowsPathToWSL(dir)
: dir

await fs.promises.mkdir(mkdirPath, { recursive: true })
debugBrowser(`Created data directory: %s`, mkdirPath)
}
return this._puppeteerDataDir

return dir
})
}
}
44 changes: 23 additions & 21 deletions src/browser/manager.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { error } from '../error'
import { debugBrowser } from '../utils/debug'
import { createMemoizedPromiseContext } from '../utils/memoized-promise'
import type { Browser, BrowserProtocol } from './browser'
import { ChromeCdpBrowser } from './browsers/chrome-cdp'
import { defaultFinders, findBrowser } from './finder'
Expand All @@ -23,12 +24,12 @@ export class BrowserManager implements AsyncDisposable {
// Finder
private _finders: readonly FinderName[] = defaultFinders
private _finderPreferredPath?: string
private _finderResult?: BrowserFinderResult
private _finderResult = createMemoizedPromiseContext<BrowserFinderResult>()

// Browser
private _conversionBrowser?: Browser
private _conversionBrowser = createMemoizedPromiseContext<Browser>()
private _preferredProtocol: BrowserProtocol = 'webDriverBiDi'
private _previewBrowser?: ChromeCdpBrowser
private _previewBrowser = createMemoizedPromiseContext<ChromeCdpBrowser>()
private _timeout?: number

constructor(config: BrowserManagerConfig = {}) {
Expand All @@ -42,11 +43,11 @@ export class BrowserManager implements AsyncDisposable {
configure(config: BrowserManagerConfig) {
if (config.finders) {
this._finders = ([] as FinderName[]).concat(config.finders)
this._finderResult = undefined // Reset finder result cache
this._finderResult.value = undefined // Reset finder result cache
}
if (config.path !== undefined) {
this._finderPreferredPath = config.path
this._finderResult = undefined // Reset finder result cache
this._finderResult.value = undefined // Reset finder result cache
}
if (config.protocol) {
if (this._conversionBrowser)
Expand All @@ -62,14 +63,14 @@ export class BrowserManager implements AsyncDisposable {
}

async findBrowser() {
return (this._finderResult ??= await findBrowser(this._finders, {
preferredPath: this._finderPreferredPath,
}))
return this._finderResult.init(() =>
findBrowser(this._finders, { preferredPath: this._finderPreferredPath })
)
}

// Browser for converter
async browserForConversion(): Promise<Browser> {
if (!this._conversionBrowser) {
return this._conversionBrowser.init(async () => {
const { acceptedBrowsers, path } = await this.findBrowser()

const browser =
Expand All @@ -90,33 +91,34 @@ export class BrowserManager implements AsyncDisposable {
debugBrowser('Use browser class for conversion: %o', browser)

// @ts-expect-error ts2511: TS cannot create an instance of an abstract class
this._conversionBrowser = new browser({ path, timeout: this.timeout })
}
return this._conversionBrowser!
return new browser({ path, timeout: this.timeout })
})
}

// Browser for preview window
async browserForPreview(): Promise<ChromeCdpBrowser> {
if (!this._previewBrowser) {
return this._previewBrowser.init(async () => {
const { acceptedBrowsers, path } = await this.findBrowser()

if (!acceptedBrowsers.some((browser) => browser === ChromeCdpBrowser)) {
error('No browser found for preview')
}
debugBrowser('Use browser class for preview: %o', ChromeCdpBrowser)

this._previewBrowser = new ChromeCdpBrowser({
path,
timeout: this.timeout,
})
}
return this._previewBrowser
return new ChromeCdpBrowser({ path, timeout: this.timeout })
})
}

async dispose() {
await Promise.all([
this._conversionBrowser?.close(),
this._previewBrowser?.close(),
(async () => {
await (await this._conversionBrowser.value)?.close()
this._conversionBrowser.value = undefined
})(),
(async () => {
await (await this._previewBrowser.value)?.close()
this._previewBrowser.value = undefined
})(),
])
}

Expand Down
20 changes: 20 additions & 0 deletions src/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ interface IMarpCLIArguments {
notes?: boolean
ogImage?: string
output?: string | false
parallel?: number | boolean
pdf?: boolean
pdfNotes?: boolean
pdfOutlines?: boolean
Expand Down Expand Up @@ -94,6 +95,8 @@ export type IMarpCLIConfig = Overwrite<
}
>

export const DEFAULT_PARALLEL = 5

export class MarpCLIConfig {
args: IMarpCLIArguments = {}
conf: IMarpCLIConfig = {}
Expand Down Expand Up @@ -332,10 +335,27 @@ export class MarpCLIConfig {
return scale
})()

const parallel = (() => {
const parseParallel = (value: boolean | number | undefined) => {
if (value === true) return DEFAULT_PARALLEL
if (value === false) return 1
if (typeof value === 'number') return Math.max(1, value)

return undefined
}

return (
parseParallel(this.args.parallel) ??
parseParallel(this.conf.parallel) ??
DEFAULT_PARALLEL
)
})()

return {
imageScale,
inputDir,
output,
parallel,
pdfNotes,
pdfOutlines,
preview,
Expand Down
Loading

0 comments on commit a553d84

Please sign in to comment.