Skip to content

Commit

Permalink
Adds broken links support
Browse files Browse the repository at this point in the history
  • Loading branch information
Rafael Alfaro committed Oct 12, 2018
1 parent b289e46 commit 6f78b16
Show file tree
Hide file tree
Showing 4 changed files with 83 additions and 1 deletion.
15 changes: 15 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,20 @@ Get Broken images and screenshot of page with broken images from a list of page
4. (HT_USER and HT_PASS are optional, use only if your site is password protected).
5. npm start

## Instructions (Broken Links)
1. npm install.
2. create an .env file with content like [this](env.example)
3. SAME_SITE_LINKS is a comma separated list of urls from the original domain (without the domain or starting slash)
4. (HT_USER and HT_PASS are optional, use only if your site is password protected).
5. (NEW_DOMAIN is required and is the new domain to check if those urls exist)
6. npm run broken-links

## Instruction (Featured Image)
1. npm install.
2. create an .env file with content like [this](env.example)
3. SAME_SITE_LINKS is a comma separated list of urls to test
4. (HT_USER and HT_PASS are optional, use only if your site is password protected).
5. npm run featured-image

### To Do
* Code Refactoring
65 changes: 65 additions & 0 deletions broken-links.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
require('dotenv').config()
const puppeteer = require('puppeteer');
var fs = require('fs');

async function asyncForEach(array, callback) {
for (let index = 0; index < array.length; index++) {
await callback(array[index], index, array)
}
}

async function getImages(url , index=0){
let username = process.env.HT_USER;
let password = process.env.HT_PASS;
let browser = await puppeteer.launch();
let page = await browser.newPage();
// let old_url = process.env.OLD_DOMAIN.replace(/\/$/, '') + '/' +url;
let new_url = process.env.NEW_DOMAIN.replace(/\/$/, '') + '/' +url;
try {
if (username && password) {
await page.authenticate({username, password});
}

await page.goto(new_url, {
waitUntil: 'networkidle2',
timeout: 0
});
let images_links = await page.$$eval('body.error404', image => image);
if (images_links.length > 0) {
// await page.screenshot({path: `page-${index}.png`, fullPage: true });
console.log(new_url);

fs.appendFile('broken-links.txt', new_url+'\n', function (err) {
if (err) throw err;
console.log('Saved!');
});
}
await browser.close();
}
catch (e){
console.error(e);
}
}

(async() => {
fs.writeFile('broken-links.txt', '', function (err) {
if (err) throw err;
console.log('Replaced!');
});

const same_site_links = process.env.SAME_SITE_LINKS.split(',');
try {

await asyncForEach(same_site_links, async (link, index) => {
try {
await getImages(link, index);
}
catch (e){
console.error(e);
}
});
}
catch (e){
console.error(e);
}
})();
1 change: 1 addition & 0 deletions env.example
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
HT_USER=your_user
HT_PASS=your_password
SAME_SITE_LINKS=https://your_link.com,https://your_link_2.com
NEW_DOMAIN=https://google.com
3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1",
"start": "node index.js",
"featured-image": "node featured-images.js"
"featured-image": "node featured-images.js",
"broken-links": "node broken-links.js"
},
"repository": {
"type": "git",
Expand Down

0 comments on commit 6f78b16

Please sign in to comment.