From bd305cb7ec95e329e787056f2cf58975b42dc568 Mon Sep 17 00:00:00 2001 From: SoulNaturalist Date: Tue, 19 Nov 2024 21:52:32 +0300 Subject: [PATCH] added parser of site quote --- backend/parser/quote.js | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 backend/parser/quote.js diff --git a/backend/parser/quote.js b/backend/parser/quote.js new file mode 100644 index 0000000..f1380a8 --- /dev/null +++ b/backend/parser/quote.js @@ -0,0 +1,17 @@ +const puppeteer = require('puppeteer'); + +(async () => { + const browser = await puppeteer.launch({ headless: true }); + const page = await browser.newPage(); + const quoteArray = ['https://mybook.ru/author/mihail-bulgakov/master-i-margarita-1/citations/'] + for (const quote of quoteArray) { + await page.goto(quote); + const texts = await page.evaluate(() => { + const articles = document.querySelectorAll('article'); + return Array.from(articles).map(article => article.textContent.trim()); + }); + + console.log(texts); + } + await browser.close(); +})(); \ No newline at end of file