forked from adamschwartz/web.scraper.workers.dev
-
Notifications
You must be signed in to change notification settings - Fork 0
/
index.js
61 lines (47 loc) · 1.63 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import html from './html.js'
import contentTypes from './content-types.js'
import Scraper from './scraper.js'
import { generateJSONResponse, generateErrorJSONResponse } from './json-response.js'
addEventListener('fetch', event => {
event.respondWith(handleRequest(event.request))
})
async function handleRequest(request) {
const searchParams = new URL(request.url).searchParams
let url = searchParams.get('url')
if (url && !url.match(/^[a-zA-Z]+:\/\//)) url = 'http://' + url
const selector = searchParams.get('selector')
const attr = searchParams.get('attr')
const spaced = searchParams.get('spaced') // Adds spaces between tags
const pretty = searchParams.get('pretty')
if (!url || !selector) {
return handleSiteRequest(request)
}
return handleAPIRequest({ url, selector, attr, spaced, pretty })
}
async function handleSiteRequest(request) {
const url = new URL(request.url)
if (url.pathname === '/' || url.pathname === '') {
return new Response(html, {
headers: { 'content-type': contentTypes.html }
})
}
return new Response('Not found', { status: 404 })
}
async function handleAPIRequest({ url, selector, attr, spaced, pretty }) {
let scraper, result
try {
scraper = await new Scraper().fetch(url)
} catch (error) {
return generateErrorJSONResponse(error, pretty)
}
try {
if (!attr) {
result = await scraper.querySelector(selector).getText({ spaced })
} else {
result = await scraper.querySelector(selector).getAttribute(attr)
}
} catch (error) {
return generateErrorJSONResponse(error, pretty)
}
return generateJSONResponse({ result }, pretty)
}