-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathexample.js
36 lines (33 loc) · 1.18 KB
/
example.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import Scraper from './src/index'
const config = {
uri: 'https://www.example.com/',
pagination: {
selector: 'div.prev_next',
handler: async (currentUri, uris) => {
const url = new URL(currentUri)
const newPath = uris[Object.keys(uris).filter(k => k.indexOf('Next') != -1)[0]]
return newPath && url.origin+newPath
}
},
selectors: {
id: 'el#id',
class: 'el.class',
attr: 'el[attr="val"]',
specifictychainsaresafehere: 'ul.news > li.first > p.title span[data-why-would-this-exist="..."]',
}
};
(async () => {
const scraper = Scraper().setConfig(config);
scraper.errors(console.error)
scraper.listen(console.log)
await scraper.autoScrape() // or scrape a single page: `scraper.scrape()`
if (scraper.stop()) {
console.info('done scraping')
}
})()
/* Parse HTML using DOMParser
var xmlString = "<div id='foo'><a href='#'>Link</a><span></span></div>";
doc = new DOMParser().parseFromString(xmlString, "text/xml");
console.log(doc.firstChild.innerHTML); // => <div id="foo">...
console.log(doc.firstChild.firstChild.innerHTML); // => <a href="#">..
*/