Skip to content

Commit

Permalink
chore: add tests and docs
Browse files Browse the repository at this point in the history
  • Loading branch information
Kikobeats committed Jun 24, 2024
1 parent 5d5136f commit a4b3306
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 7 deletions.
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,12 @@ Type: `String`

The HTML markup for extracting the content.

##### htmlDom

Type: `object`

The DOM representation of the HTML markup. When it's not provided, it's get from the `html` parameter.

#### rules

Type: `Array`
Expand Down
7 changes: 3 additions & 4 deletions packages/metascraper/src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -14,22 +14,21 @@ module.exports = rules => {
return async ({
url,
html = '',
dom,
htmlDom,
rules: inlineRules,
validateUrl = true,
...props
} = {}) => {
if (validateUrl && !isUrl(url)) {
if (!validateUrl && !isUrl(url)) {
throw new MetascraperError({
message: 'Need to provide a valid URL.',
code: 'INVALID_URL'
})
}
const htmlDom = dom || load(html, { baseURI: url })

return getData({
url,
htmlDom,
htmlDom: htmlDom ?? load(html, { baseURI: url }),
rules: mergeRules(inlineRules, loadedRules),
...props
})
Expand Down
40 changes: 37 additions & 3 deletions packages/metascraper/test/unit/interface.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ const test = require('ava')

const createMetascraper = require('../..')
const titleRules = require('metascraper-title')()
const { load } = require('cheerio')

test('`url` is required', async t => {
t.plan(9)
Expand Down Expand Up @@ -31,7 +32,7 @@ test('`url` is required', async t => {
}
})

test('Disable URL validation using `validateUrl`', async t => {
test('passing `{ validateUrl: false }`', async t => {
const metascraper = createMetascraper([titleRules])

const html = `
Expand Down Expand Up @@ -66,7 +67,7 @@ test('Disable URL validation using `validateUrl`', async t => {
t.is(metadata.title, 'Document')
})

test('load extra `rules`', async t => {
test('passing `rules`', async t => {
const url = 'https://microlink.io'

const html = `
Expand Down Expand Up @@ -104,7 +105,7 @@ test('load extra `rules`', async t => {
t.is(metadata.foo, 'bar')
})

test('associate test function with rules', async t => {
test('skip `rules` via `test` function', async t => {
const url = 'https://microlink.io'

const html = `
Expand Down Expand Up @@ -148,3 +149,36 @@ test('associate test function with rules', async t => {
t.is(metadata.foo, null)
t.true(isCalled)
})

test('passing `htmlDom`', async t => {
const url = 'https://microlink.io'

const htmlDom = load('<title>htmlDom</title>')

const html = `
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<meta http-equiv="X-UA-Compatible" content="ie=edge">
<title>Original HTML</title>
</head>
<body>
<div class="logos">
<img class="logo" href="https://microlink.io/logo.png">
<img class="logo" href="https://microlink.io/logo.png">
<img class="logo" href="https://microlink.io/logo.png">
<img class="logo" href="https://microlink.io/logo.png">
</div>
<img class="main-logo" href="https://microlink.io/logo.png">
<p>Hello World </p>
</body>
</html>
`

const metascraper = createMetascraper([titleRules])
const metadata = await metascraper({ url, htmlDom, html })
t.is(metadata.title, 'htmlDom')
})

0 comments on commit a4b3306

Please sign in to comment.