1
+ import * as errors from '../../src/errors.js' ;
2
+ import { ScrapflyClient } from '../../src/client.js' ;
3
+ import { ExtractionConfig } from '../../src/extractionconfig.js'
4
+ import { describe , it , expect , beforeEach , jest } from '@jest/globals' ;
5
+ import { responseFactory } from '../utils.js' ;
6
+
7
+ describe ( 'extract' , ( ) => {
8
+ const KEY = '__API_KEY__' ;
9
+ const client = new ScrapflyClient ( { key : KEY } ) ;
10
+
11
+ beforeEach ( ( ) => {
12
+ jest . spyOn ( client , 'fetch' ) . mockClear ( ) ; // clear all mock meta on each test
13
+ } ) ;
14
+
15
+ it ( 'succeeds' , async ( ) => {
16
+ const spy = jest . spyOn ( client , 'fetch' ) ;
17
+ const html = 'very long html file' ;
18
+ jest . spyOn ( client , 'fetch' ) . mockImplementation ( async ( config : Request ) : Promise < any > => {
19
+ const configUrl = config [ Object . getOwnPropertySymbols ( config ) [ 1 ] ] . url ;
20
+ const configBody = config [ Object . getOwnPropertySymbols ( config ) [ 1 ] ] . body . source ;
21
+ // Ensure the URL matches the pattern
22
+ expect ( configUrl . origin + configUrl . pathname ) . toEqual ( client . HOST + '/extraction' ) ;
23
+ expect ( config . method ) . toEqual ( 'POST' ) ;
24
+ expect ( configUrl . searchParams . get ( 'key' ) ) . toMatch ( KEY ) ;
25
+ expect ( configBody ) . toEqual ( html ) ;
26
+ const body = { data : 'a document summary' , content_type : 'text/html' } ;
27
+ return responseFactory ( body , {
28
+ status : 200 ,
29
+ } ) ;
30
+ } ) ;
31
+
32
+ const result = await client . extract ( new ExtractionConfig ( { body : html , content_type : 'text/html' } ) ) ;
33
+ expect ( result ) . toBeDefined ( ) ;
34
+ expect ( result . content_type ) . toBe ( 'text/html' ) ;
35
+ expect ( result . data ) . toBe ( 'a document summary' ) ;
36
+ expect ( spy ) . toHaveBeenCalledTimes ( 1 ) ;
37
+ } ) ;
38
+
39
+ it ( 'fails due to failing to invalid config' , async ( ) => {
40
+ const html = 'very long html file' ;
41
+ await expect (
42
+ client . extract (
43
+ new ExtractionConfig ( {
44
+ body : html ,
45
+ content_type : 'text/html' ,
46
+ ephemeral_template : { source : 'html' } ,
47
+ template : 'template' ,
48
+ } ) ,
49
+ ) ,
50
+ ) . rejects . toThrow ( errors . ExtractionConfigError ) ;
51
+ } ) ;
52
+
53
+ it ( 'fails to invalid API key' , async ( ) => {
54
+ const html = 'very long html file' ;
55
+ jest . spyOn ( client , 'fetch' ) . mockImplementation ( async ( ) : Promise < any > => {
56
+ const result = {
57
+ status : 'error' ,
58
+ http_code : 401 ,
59
+ reason : 'Unauthorized' ,
60
+ error_id : '301e2d9e-b4f5-4289-85ea-e452143338df' ,
61
+ message : 'Invalid API key' ,
62
+ } ;
63
+ return responseFactory ( result , {
64
+ status : 401 ,
65
+ headers : {
66
+ 'Content-Type' : 'application/json' ,
67
+ } ,
68
+ } ) ;
69
+ } ) ;
70
+ await expect ( client . extract ( new ExtractionConfig ( { body : html , content_type : 'text/html' } ) ) ) . rejects . toThrow (
71
+ errors . BadApiKeyError ,
72
+ ) ;
73
+ } ) ;
74
+
75
+ it ( 'fails to any extraction related error' , async ( ) => {
76
+ const html = 'very long html file' ;
77
+ jest . spyOn ( client , 'fetch' ) . mockImplementation ( async ( ) : Promise < any > => {
78
+ const result = {
79
+ code : 'ERR::EXTRACTION::CONTENT_TYPE_NOT_SUPPORTED' ,
80
+ error_id : 'f0e9a6af-846a-49ab-8321-e21bb12bf494' ,
81
+ http_code : 422 ,
82
+ links : {
83
+ 'Related Error Doc' :
84
+ 'https://scrapfly.io/docs/extraction-api/error/ERR::EXTRACTION::CONTENT_TYPE_NOT_SUPPORTED' ,
85
+ } ,
86
+ message : 'ERR::EXTRACTION::CONTENT_TYPE_NOT_SUPPORTED' ,
87
+ } ;
88
+ return responseFactory ( result , {
89
+ status : 422 ,
90
+ headers : {
91
+ 'Content-Type' : 'application/json' ,
92
+ } ,
93
+ } ) ;
94
+ } ) ;
95
+ await expect ( client . extract ( new ExtractionConfig ( { body : html , content_type : 'text/html' } ) ) ) . rejects . toThrow (
96
+ errors . ExtractionApiError ,
97
+ ) ;
98
+ } ) ;
99
+ } ) ;
0 commit comments