100 lines
2.6 KiB
JavaScript
100 lines
2.6 KiB
JavaScript
const puppeteer = require('puppeteer');
|
|
const mysql = require('mysql2/promise');
|
|
const config = require("./config.json");
|
|
|
|
async function asyncForEach (array, callback, args = []) {
|
|
for (let index = 0; index < array.length; index++) {
|
|
await callback(array[index], index, args)
|
|
}
|
|
}
|
|
|
|
class Urq {
|
|
constructor() {
|
|
this.dbopts = {
|
|
host: 'localhost',
|
|
database: config.DB_NAME,
|
|
user: config.DB_USER,
|
|
password: config.DB_PASSWORD,
|
|
protocol: 'mysql',
|
|
port: '3306',
|
|
query: {pool: true}
|
|
};
|
|
};
|
|
|
|
async scrape() {
|
|
this.db = await mysql.createConnection(this.dbopts);
|
|
this.browser = await puppeteer.launch({
|
|
"headless": true,
|
|
"args": [
|
|
"--disable-web-security",
|
|
"--no-sandbox",
|
|
"--disable-dev-shm-usage"
|
|
]
|
|
});
|
|
await this.db.execute(`
|
|
CREATE TABLE IF NOT EXISTS \`${config.DB_PREFIX}bordathreads\` (
|
|
\`id\` Int( 255 ) UNSIGNED AUTO_INCREMENT NOT NULL,
|
|
\`url\` Varchar(255) UNIQUE NOT NULL,
|
|
\`name\` Varchar(255) NOT NULL,
|
|
PRIMARY KEY ( \`id\` )
|
|
);`);
|
|
await this.scrape_pages();
|
|
await this.browser.close();
|
|
};
|
|
|
|
async scrape_pages() {
|
|
try {
|
|
console.log("Scraping threads.");
|
|
const page = await this.browser.newPage();
|
|
let threads = [];
|
|
for (let offset = 0; offset <= 400; offset += 20) {
|
|
console.log("Offset "+offset);
|
|
await page.goto("http://urq.borda.ru?0-0-"+offset, {
|
|
"waitUntil" : "load",
|
|
"timeout": 60000
|
|
});
|
|
|
|
let pagethreads = await page.evaluate(() => {
|
|
try {
|
|
let links = document.querySelectorAll(".font3 a");
|
|
let href = [];
|
|
for (let i = 0; i < links.length; i++) {
|
|
href.push({
|
|
"url": links[i].getAttribute("href"),
|
|
"name": links[i].innerHTML,
|
|
});
|
|
}
|
|
return href;
|
|
} catch (e) {
|
|
return [];
|
|
}
|
|
});
|
|
console.log(pagethreads);
|
|
for (let i = 0; i < pagethreads.length; i++) {
|
|
threads.push(pagethreads[i]);
|
|
}
|
|
|
|
process.exit();
|
|
}
|
|
|
|
for (let i = 0; i < threads.length; i++) {
|
|
try {
|
|
let [rows, fields] = await this.db.execute(`
|
|
INSERT INTO ${config.DB_PREFIX}bordathreads (url, name) VALUES( ?, ? )
|
|
`, [
|
|
threads[i].url,
|
|
threads[i].name,
|
|
]);
|
|
} catch(e) {
|
|
console.log(e);
|
|
}
|
|
}
|
|
|
|
await page.close();
|
|
} catch(e) {
|
|
console.log(e);
|
|
}
|
|
};
|
|
}
|
|
module.exports = Urq;
|