Releases: marcomontalbano/html-miner
Releases · marcomontalbano/html-miner
0.0.1-beta-3
Example
We have following html snippet and we want to fetch some information.
<h1>Hello, world!</h1>
<div class="articles">
<div class="article">
<h2>Heading 1</h2>
<p>Lorem ipsum dolor sit amet, consectetur adipiscing elit.</p>
</div>
<div class="article">
<h2>Heading 2</h2>
<p>Donec maximus ipsum quis est tempor, sit amet laoreet libero bibendum.</p>
</div>
<div class="article">
<h2>Heading 3</h2>
<p>Suspendisse viverra convallis risus, vitae molestie est tincidunt eget.</p>
</div>
</div>
<footer>
<p>© <span>Company</span> 2017</p>
</footer>
const htmlMiner = require('html-miner');
let json = htmlMiner(html, {
title: "h1",
h2: "h2",
articles: {
_each_: '.articles .article',
title: 'h2',
content: 'p',
},
footer: {
copyright: 'footer',
company: 'footer span',
year: ($, scopeData) => { return scopeData.copyright.match(/[0-9]+/)[0] },
},
greet: $ => { return 'Hi!' }
});
console.log( json );
// {
// title: 'Hello, world!',
// h2: ['Heading 1', 'Heading 2', 'Heading 3'],
// articles: [
// {
// title: 'Heading 1',
// content: 'Lorem ipsum dolor sit amet, consectetur adipiscing elit.',
// },
// {
// title: 'Heading 2',
// content: 'Donec maximus ipsum quis est tempor, sit amet laoreet libero bibendum.',
// },
// {
// title: 'Heading 3',
// content: 'Suspendisse viverra convallis risus, vitae molestie est tincidunt eget.',
// }
// ],
// footer: {
// copyright: '© Company 2017',
// company: 'Company',
// year: '2017'
// },
// greet: 'Hi!'
// }
0.0.1-beta-2
Example
Fetch html from http://getbootstrap.com/docs/4.0/examples/jumbotron/ and put it into let html = ' ... '
.
const htmlMiner = require('html-miner');
let json = htmlMiner(html, {
title : 'h1',
headings : 'h2',
greet : $ => { return 'Hi!' }
});
console.log( json );
// {
// title : 'Hello, world!',
// headings : ['Heading', 'Heading', 'Heading'],
// greet : 'Hi!'
// }
0.0.1-beta-1
HTML Miner
A powerful miner who will scrape html pages for you. ` HTML Scraper ´