-
Notifications
You must be signed in to change notification settings - Fork 0
/
app.js
50 lines (38 loc) · 1.68 KB
/
app.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
const puppeteer = require("puppeteer");
const { getNextPage, scrapeData } = require("./src/scrape");
const { initOutputDirectory, downloadAndSaveImage } = require("./src/io");
const { getURL, constructFileName } = require("./src/utils");
const START_PAGE = 1;
const OUTPUT_DIRECTORY = "komixxy";
(async () => {
const browser = await puppeteer.launch({headless: true});
process.on("unhandledRejection", (reason, promise) => {
console.error("Unhandled Rejection at: Promise", promise, "reason:", reason);
browser.close();
});
const page = await browser.newPage();
// scrape and save
initOutputDirectory(OUTPUT_DIRECTORY);
const paginate = async (page, nextPage) => {
const pageNumber = Number(nextPage.split("/").pop());
await page.goto(nextPage, {waitUntil: 'networkidle2'});
const isFaulty = await page.$("div#framework_error") ? true : false;
if(!isFaulty){
const data = await scrapeData(page);
data.forEach(async entry => {
const image_url = entry.image_src;
const fileName = constructFileName(entry);
await downloadAndSaveImage(image_url, fileName, OUTPUT_DIRECTORY);
});
console.log("\x1b[32m%s\x1b[0m", `#${pageNumber} page scraped and saved!`);
nextPage = await getNextPage(page);
}
else{
nextPage = getURL(pageNumber+1);
console.log("\x1b[31m%s\x1b[0m", `#${pageNumber} is faulty. Skipping to ${pageNumber+1}...`);
}
if(nextPage) await paginate(page, nextPage);
};
await paginate(page, getURL(START_PAGE));
await browser.close();
})();