Skip to content

Commit e35fe2b

Browse files
committed
fix fetchRetry to work in commonjs
add polyfill for file writing to work in nodejs and bun update examples extraction and screenshot apis
1 parent f56e69c commit e35fe2b

39 files changed

+792
-940
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,5 @@ dev
66
cov_profile
77
.vscode
88
.history
9+
examples/**/package.json
10+
examples/**/package-lock.json

README.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,10 @@ The SDK is distributed through:
2424
3. Start scraping: 🚀
2525

2626
```javascript
27-
// node or bun:
27+
// node
2828
import { ScrapflyClient, ScrapeConfig } from 'scrapfly-sdk';
29+
// bun
30+
import { ScrapflyClient, ScrapeConfig} from '@scrapfly/scrapfly-sdk';
2931
// deno:
3032
import { ScrapflyClient, ScrapeConfig } from 'jsr:@scrapfly/scrapfly-sdk';
3133

__tests__/client/extraction.test.ts

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,15 @@ import * as errors from '../../src/errors.ts';
22
import { ScrapflyClient } from '../../src/client.ts';
33
import { ExtractionConfig } from '../../src/extractionconfig.ts';
44
import { assertEquals, assertRejects } from "https://deno.land/[email protected]/assert/mod.ts";
5-
import { stub } from "https://deno.land/std/testing/mock.ts";
5+
import { stub } from "https://deno.land/std@0.224.0/testing/mock.ts";
66
import { responseFactory } from '../utils.ts';
7+
import type { RequestOptions } from '../../src/utils.ts';
78

89
Deno.test('extract: succeeds', async () => {
910
const KEY = '__API_KEY__';
1011
const client = new ScrapflyClient({ key: KEY });
1112
const html = 'very long html file';
12-
const fetchStub = stub(client, 'fetch', async (config: Request): Promise<Response> => {
13+
const fetchStub = stub(client, 'fetch', async (config: RequestOptions): Promise<Response> => {
1314
const configUrl = new URL(config.url);
1415
const configBody = await new Response(config.body).text();
1516
assertEquals(configUrl.origin + configUrl.pathname, client.HOST + '/extraction');
@@ -51,7 +52,7 @@ Deno.test('extract: fails due to invalid API key', async () => {
5152
const KEY = '__API_KEY__';
5253
const client = new ScrapflyClient({ key: KEY });
5354
const html = 'very long html file';
54-
const fetchStub = stub(client, 'fetch', async (config: Request): Promise<Response> => {
55+
const fetchStub = stub(client, 'fetch', async (config: RequestOptions): Promise<Response> => {
5556
const result = {
5657
status: 'error',
5758
http_code: 401,
@@ -81,7 +82,7 @@ Deno.test('extract: fails due to any extraction related error', async () => {
8182
const KEY = '__API_KEY__';
8283
const client = new ScrapflyClient({ key: KEY });
8384
const html = 'very long html file';
84-
const fetchStub = stub(client, 'fetch', async (config: Request): Promise<Response> => {
85+
const fetchStub = stub(client, 'fetch', async (config: RequestOptions): Promise<Response> => {
8586
const result = {
8687
code: 'ERR::EXTRACTION::CONTENT_TYPE_NOT_SUPPORTED',
8788
error_id: 'f0e9a6af-846a-49ab-8321-e21bb12bf494',

__tests__/client/scrape.test.ts

Lines changed: 19 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import { ScrapeConfig } from '../../src/scrapeconfig.ts';
44
import { log } from '../../src/logger.ts';
55
import { assertEquals, assertRejects } from "https://deno.land/[email protected]/assert/mod.ts";
66
import { resultFactory, responseFactory } from '../utils.ts';
7+
import type { RequestOptions } from '../../src/utils.ts';
78
import { stub } from "https://deno.land/std/testing/mock.ts";
89

910
log.setLevel('DEBUG');
@@ -12,7 +13,7 @@ Deno.test('scrape: GET success', async () => {
1213
const KEY = '__API_KEY__';
1314
const client = new ScrapflyClient({ key: KEY });
1415

15-
const fetchStub = stub(client, 'fetch', async (config: Request): Promise<Response> => {
16+
const fetchStub = stub(client, 'fetch', async (config: RequestOptions): Promise<Response> => {
1617
const configUrl = new URL(config.url);
1718
assertEquals(configUrl.origin + configUrl.pathname, client.HOST + '/scrape');
1819
assertEquals(config.method, 'GET');
@@ -42,7 +43,7 @@ Deno.test('scrape errors: raises ApiHttpServerError on 500 and success', async (
4243
const KEY = '__API_KEY__';
4344
const client = new ScrapflyClient({ key: KEY });
4445

45-
const fetchStub = stub(client, 'fetch', async (config: Request): Promise<Response> => {
46+
const fetchStub = stub(client, 'fetch', async (config: RequestOptions): Promise<Response> => {
4647
const result = resultFactory({
4748
url: 'https://httpbin.dev/json',
4849
status_code: 500,
@@ -71,7 +72,7 @@ Deno.test('scrape errors: raises BadApiKeyError on 401', async () => {
7172
const KEY = '__API_KEY__';
7273
const client = new ScrapflyClient({ key: KEY });
7374

74-
const fetchStub = stub(client, 'fetch', async (config: Request): Promise<Response> => {
75+
const fetchStub = stub(client, 'fetch', async (config: RequestOptions): Promise<Response> => {
7576
const result = {
7677
status: 'error',
7778
http_code: 401,
@@ -100,7 +101,7 @@ Deno.test('scrape errors: raises TooManyRequests on 429 and success', async () =
100101
const KEY = '__API_KEY__';
101102
const client = new ScrapflyClient({ key: KEY });
102103

103-
const fetchStub = stub(client, 'fetch', async (config: Request): Promise<Response> => {
104+
const fetchStub = stub(client, 'fetch', async (config: RequestOptions): Promise<Response> => {
104105
const result = resultFactory({
105106
url: 'https://httpbin.dev/json',
106107
status_code: 429,
@@ -128,7 +129,7 @@ Deno.test('scrape errors: raises ScrapflyScrapeError on ::SCRAPE:: resource and
128129
const KEY = '__API_KEY__';
129130
const client = new ScrapflyClient({ key: KEY });
130131

131-
const fetchStub = stub(client, 'fetch', async (config: Request): Promise<Response> => {
132+
const fetchStub = stub(client, 'fetch', async (config: RequestOptions): Promise<Response> => {
132133
const result = resultFactory({
133134
url: config.url,
134135
status: 'ERR::SCRAPE::BAD_PROTOCOL',
@@ -155,7 +156,7 @@ Deno.test('scrape errors: raises ScrapflyWebhookError on ::WEBHOOK:: resource an
155156
const KEY = '__API_KEY__';
156157
const client = new ScrapflyClient({ key: KEY });
157158

158-
const fetchStub = stub(client, 'fetch', async (config: Request): Promise<Response> => {
159+
const fetchStub = stub(client, 'fetch', async (config: RequestOptions): Promise<Response> => {
159160
const result = resultFactory({
160161
url: config.url,
161162
status: 'ERR::WEBHOOK::DISABLED ',
@@ -182,7 +183,7 @@ Deno.test('scrape errors: raises ScrapflyProxyError on ERR::PROXY::POOL_NOT_FOUN
182183
const KEY = '__API_KEY__';
183184
const client = new ScrapflyClient({ key: KEY });
184185

185-
const fetchStub = stub(client, 'fetch', async (config: Request): Promise<Response> => {
186+
const fetchStub = stub(client, 'fetch', async (config: RequestOptions): Promise<Response> => {
186187
const result = resultFactory({
187188
url: config.url,
188189
status: 'ERR::PROXY::POOL_NOT_FOUND ',
@@ -209,7 +210,7 @@ Deno.test('scrape errors: raises ScrapflyScheduleError on ERR::SCHEDULE::DISABLE
209210
const KEY = '__API_KEY__';
210211
const client = new ScrapflyClient({ key: KEY });
211212

212-
const fetchStub = stub(client, 'fetch', async (config: Request): Promise<Response> => {
213+
const fetchStub = stub(client, 'fetch', async (config: RequestOptions): Promise<Response> => {
213214
const result = resultFactory({
214215
url: config.url,
215216
status: 'ERR::SCHEDULE::DISABLED',
@@ -236,7 +237,7 @@ Deno.test('scrape errors: raises ScrapflyAspError on ERR::ASP::SHIELD_ERROR reso
236237
const KEY = '__API_KEY__';
237238
const client = new ScrapflyClient({ key: KEY });
238239

239-
const fetchStub = stub(client, 'fetch', async (config: Request): Promise<Response> => {
240+
const fetchStub = stub(client, 'fetch', async (config: RequestOptions): Promise<Response> => {
240241
const result = resultFactory({
241242
url: config.url,
242243
status: 'ERR::ASP::SHIELD_ERROR',
@@ -263,7 +264,7 @@ Deno.test('scrape errors: raises ScrapflySessionError on ERR::SESSION::CONCURREN
263264
const KEY = '__API_KEY__';
264265
const client = new ScrapflyClient({ key: KEY });
265266

266-
const fetchStub = stub(client, 'fetch', async (config: Request): Promise<Response> => {
267+
const fetchStub = stub(client, 'fetch', async (config: RequestOptions): Promise<Response> => {
267268
const result = resultFactory({
268269
url: config.url,
269270
status: 'ERR::SESSION::CONCURRENT_ACCESS',
@@ -290,7 +291,7 @@ Deno.test('scrape errors: raises ApiHttpClientError on success and unknown statu
290291
const KEY = '__API_KEY__';
291292
const client = new ScrapflyClient({ key: KEY });
292293

293-
const fetchStub = stub(client, 'fetch', async (config: Request): Promise<Response> => {
294+
const fetchStub = stub(client, 'fetch', async (config: RequestOptions): Promise<Response> => {
294295
const result = resultFactory({
295296
url: config.url,
296297
status: 'ERR::NEW',
@@ -317,7 +318,7 @@ Deno.test('scrape errors: raises UpstreamHttpServerError on failure, ERR::SCRAPE
317318
const KEY = '__API_KEY__';
318319
const client = new ScrapflyClient({ key: KEY });
319320

320-
const fetchStub = stub(client, 'fetch', async (config: Request): Promise<Response> => {
321+
const fetchStub = stub(client, 'fetch', async (config: RequestOptions): Promise<Response> => {
321322
const result = resultFactory({
322323
url: config.url,
323324
success: false,
@@ -346,7 +347,7 @@ Deno.test('scrape errors: raises UpstreamHttpClientError on failure, ERR::SCRAPE
346347
const KEY = '__API_KEY__';
347348
const client = new ScrapflyClient({ key: KEY });
348349

349-
const fetchStub = stub(client, 'fetch', async (config: Request): Promise<Response> => {
350+
const fetchStub = stub(client, 'fetch', async (config: RequestOptions): Promise<Response> => {
350351
const result = resultFactory({
351352
url: config.url,
352353
success: false,
@@ -385,7 +386,7 @@ Deno.test('scrape errors: raises resource exceptions on failure', async () => {
385386
};
386387

387388
for (const [resource, err] of Object.entries(resourceErrMap)) {
388-
const fetchStub = stub(client, 'fetch', async (config: Request): Promise<Response> => {
389+
const fetchStub = stub(client, 'fetch', async (config: RequestOptions): Promise<Response> => {
389390
const result = resultFactory({
390391
url: config.url,
391392
success: false,
@@ -414,7 +415,7 @@ Deno.test('scrape errors: raises ScrapflyError on unhandled failure', async () =
414415
const KEY = '__API_KEY__';
415416
const client = new ScrapflyClient({ key: KEY });
416417

417-
const fetchStub = stub(client, 'fetch', async (config: Request): Promise<Response> => {
418+
const fetchStub = stub(client, 'fetch', async (config: RequestOptions): Promise<Response> => {
418419
const result = resultFactory({
419420
url: config.url,
420421
success: false,
@@ -443,7 +444,7 @@ Deno.test('scrape errors: account retrieval status unhandled code (e.g. 404)', a
443444
const KEY = '__API_KEY__';
444445
const client = new ScrapflyClient({ key: KEY });
445446

446-
const fetchStub = stub(client, 'fetch', async (config: Request): Promise<Response> => {
447+
const fetchStub = stub(client, 'fetch', async (config: RequestOptions): Promise<Response> => {
447448
return responseFactory(
448449
{},
449450
{
@@ -466,7 +467,7 @@ Deno.test('scrape errors: account retrieval bad api key (status 401)', async ()
466467
const KEY = '__API_KEY__';
467468
const client = new ScrapflyClient({ key: KEY });
468469

469-
const fetchStub = stub(client, 'fetch', async (config: Request): Promise<Response> => {
470+
const fetchStub = stub(client, 'fetch', async (config: RequestOptions): Promise<Response> => {
470471
return responseFactory(
471472
{},
472473
{
@@ -496,7 +497,7 @@ Deno.test('concurrent scrape: success with explicit concurrency', async () => {
496497
const results = [];
497498
const errors = [];
498499

499-
const fetchStub = stub(client, 'fetch', async (config: Request): Promise<Response> => {
500+
const fetchStub = stub(client, 'fetch', async (config: RequestOptions): Promise<Response> => {
500501
await new Promise((resolve) => setTimeout(resolve, 100)); // XXX: NEEDS a delay!
501502
log.error(config.url);
502503
if (config.url.includes('200')) {

__tests__/client/screenshot.test.ts

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,14 @@ import { ScrapflyClient } from '../../src/client.ts';
33
import { ScreenshotConfig } from '../../src/screenshotconfig.ts';
44
import { assertEquals, assertRejects } from "https://deno.land/[email protected]/assert/mod.ts";
55
import { stub } from "https://deno.land/std/testing/mock.ts";
6+
import type { RequestOptions } from '../../src/utils.ts';
67
import { mockedStream, responseFactory } from '../utils.ts';
78

89
Deno.test('screenshot: succeeds', async () => {
910
const KEY = '__API_KEY__';
1011
const client = new ScrapflyClient({ key: KEY });
1112
const url = 'https://web-scraping.dev/';
12-
const fetchStub = stub(client, 'fetch', async (config: Request): Promise<Response> => {
13+
const fetchStub = stub(client, 'fetch', async (config: RequestOptions): Promise<Response> => {
1314
const configUrl = new URL(config.url);
1415
// Ensure the URL matches the pattern
1516
assertEquals(configUrl.origin + configUrl.pathname, client.HOST + '/screenshot');
@@ -42,7 +43,7 @@ Deno.test('screenshot: fails due to failing upstream response', async () => {
4243
const KEY = '__API_KEY__';
4344
const client = new ScrapflyClient({ key: KEY });
4445
const url = 'https://domain.com/down-page/';
45-
const fetchStub = stub(client, 'fetch', async (config: Request): Promise<Response> => {
46+
const fetchStub = stub(client, 'fetch', async (config: RequestOptions): Promise<Response> => {
4647
const body = {
4748
code: 'ERR::SCREENSHOT::UNABLE_TO_TAKE_SCREENSHOT',
4849
error_id: '347bc6cb-1cba-467a-bd06-c932a9e7156d',
@@ -72,7 +73,7 @@ Deno.test('screenshot: fails to non html/text web page', async () => {
7273
const KEY = '__API_KEY__';
7374
const client = new ScrapflyClient({ key: KEY });
7475
const url = 'https://web-scraping.dev/assets/pdf/eula.pdf/';
75-
const fetchStub = stub(client, 'fetch', async (config: Request): Promise<Response> => {
76+
const fetchStub = stub(client, 'fetch', async (config: RequestOptions): Promise<Response> => {
7677
const body = {
7778
code: 'ERR::SCREENSHOT::INVALID_CONTENT_TYPE',
7879
error_id: 'f0e9a6af-846a-49ab-8321-e21bb12bf494',

__tests__/utils.test.ts

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,7 @@ Deno.test('fetchRetry: succeeds on first attempt', async () => {
2323
return new Response('Success', { status: 200 });
2424
});
2525

26-
const request = new Request('https://example.com');
27-
const response = await fetchRetry(request);
26+
const response = await fetchRetry({"url": "https://scrapfly.io/"});
2827

2928
assertEquals(await response.text(), 'Success');
3029
assertEquals(response.status, 200);
@@ -43,8 +42,7 @@ Deno.test('fetchRetry: retries on 500 and succeeds', async () => {
4342
}
4443
});
4544

46-
const request = new Request('https://example.com');
47-
const response = await fetchRetry(request);
45+
const response = await fetchRetry({"url": "https://scrapfly.io/"});
4846

4947
assertEquals(await response.text(), 'Success');
5048
assertEquals(response.status, 200);
@@ -60,8 +58,7 @@ Deno.test('fetchRetry: does not retry 4xx', async () => {
6058
return new Response('bad request', { status: 422 });
6159
});
6260

63-
const request = new Request('https://example.com');
64-
const response = await fetchRetry(request);
61+
const response = await fetchRetry({"url": "https://scrapfly.io/"});
6562

6663
assertEquals(await response.text(), 'bad request');
6764
assertEquals(response.status, 422);
@@ -77,11 +74,9 @@ Deno.test('fetchRetry: fails after max retries', async () => {
7774
return new Response('Internal Server Error', { status: 500 });
7875
});
7976

80-
const request = new Request('https://example.com');
81-
8277
await assertRejects(
8378
async () => {
84-
await fetchRetry(request, 3);
79+
await fetchRetry({"url": "https://scrapfly.io/"}, 3);
8580
},
8681
Error,
8782
'Fetch failed with status: 500'
@@ -92,6 +87,18 @@ Deno.test('fetchRetry: fails after max retries', async () => {
9287
fetchStub.restore();
9388
});
9489

90+
Deno.test('fetchRetry: fails after max retries on real target', async () => {
91+
await assertRejects(
92+
async () => {
93+
const resp = await fetchRetry({"url": "https://httpbin.dev/status/500"}, 3);
94+
},
95+
Error,
96+
'Fetch failed with status: 500'
97+
);
98+
99+
});
100+
101+
95102
// XXX: should we support built-in timeout?
96103
/*
97104
Deno.test('fetchRetry: fails due to timeout', async () => {

deno.json

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
},
55
"name": "@scrapfly/scrapfly-sdk",
66
"exports": "./src/main.ts",
7-
"version": "0.6.2",
7+
"version": "0.6.3",
88
"description": "SDK for Scrapfly.io API for web scraping, screenshotting and data extraction",
99
"tasks": {
1010
"start": "deno run --allow-net --allow-read src/main.ts",
@@ -26,7 +26,5 @@
2626
"exclude": ["no-explicit-any"]
2727
}
2828
},
29-
"imports": {
30-
"@deno/dnt": "jsr:@deno/dnt@^0.41.2"
31-
}
29+
"imports": { "@deno/dnt": "jsr:@deno/dnt@^0.41.2", "@scrapfly/scrapfly-sdk": "jsr:@scrapfly/scrapfly-sdk@^0.6.2" }
3230
}

0 commit comments

Comments
 (0)