Skip to content

Commit

Permalink
chore: Inline test fixtures (#683)
Browse files Browse the repository at this point in the history
Not to be confused with extractor fixtures, which are snapshots of a webpage.

This change removes the pattern of separate JS files that provide "fixtures" for tests, which are used as provided or expected strings in tests. They were inconsistent and disorganized, and generally just served to add indirection to test files. So now all those strings are defined where they are used in their respective tests.
  • Loading branch information
johnholdun authored Aug 16, 2022
1 parent 0d2bad5 commit 112846f
Show file tree
Hide file tree
Showing 64 changed files with 1,260 additions and 2,995 deletions.
15 changes: 0 additions & 15 deletions src/cleaners/fixtures/html.js

This file was deleted.

3 changes: 1 addition & 2 deletions src/cleaners/lead-image-url.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,7 @@ describe('clean(leadImageUrl)', () => {
});

it('returns null if the url is not valid', () => {
const url = 'this is not a valid url';
assert.equal(clean(url), null);
assert.equal(clean('this is not a valid url'), null);
});

it('trims whitespace', () => {
Expand Down
18 changes: 13 additions & 5 deletions src/cleaners/title.test.js
Original file line number Diff line number Diff line change
@@ -1,27 +1,35 @@
import assert from 'assert';
import cheerio from 'cheerio';

import HTML from './fixtures/html';
import { cleanTitle } from './index';

describe('cleanTitle(title, { url, $ })', () => {
it('only uses h1 if there is only one on the page', () => {
const title = 'Too Short';
const $ = cheerio.load(HTML.docWith2H1s);
const $ = cheerio.load(`
<div>
<h1>This Is the Real Title</h1>
<h1>This Is the Real Title</h1>
</div>
`);

assert.equal(cleanTitle(title, { url: '', $ }), title);
});

it('removes HTML tags from titles', () => {
const $ = cheerio.load(HTML.docWithTagsInH1.before);
const $ = cheerio.load(
'<div><h1>This Is the <em>Real</em> Title</h1></div>'
);
const title = $('h1').html();

assert.equal(cleanTitle(title, { url: '', $ }), HTML.docWithTagsInH1.after);
assert.equal(cleanTitle(title, { url: '', $ }), 'This Is the Real Title');
});

it('trims extraneous spaces', () => {
const title = " This Is a Great Title That You'll Love ";
const $ = cheerio.load(HTML.docWithTagsInH1.before);
const $ = cheerio.load(
'<div><h1>This Is the <em>Real</em> Title</h1></div>'
);

assert.equal(cleanTitle(title, { url: '', $ }), title.trim());
});
Expand Down
10 changes: 4 additions & 6 deletions src/extractors/detect-by-html.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,15 @@ import detectByHtml from './detect-by-html';

describe('detectByHtml', () => {
it('detects a medium post from the html', () => {
const html = '<head><meta name="al:ios:app_name" value="Medium" /></head>';

const $ = cheerio.load(html);
const $ = cheerio.load(
'<head><meta name="al:ios:app_name" value="Medium" /></head>'
);

assert.equal(detectByHtml($).domain, 'medium.com');
});

it('returns nothing if no match is found', () => {
const html = '<div></div>';

const $ = cheerio.load(html);
const $ = cheerio.load('<div></div>');

assert.equal(detectByHtml($), null);
});
Expand Down
29 changes: 22 additions & 7 deletions src/extractors/generic/author/extractor.test.js
Original file line number Diff line number Diff line change
@@ -1,39 +1,54 @@
import assert from 'assert';
import cheerio from 'cheerio';

import HTML from './fixtures/html';
import GenericAuthorExtractor from './extractor';

describe('GenericAuthorExtractor', () => {
describe('extract($, cachedMeta)', () => {
it('extracts author from meta tags', () => {
const $ = cheerio.load(HTML.authorMeta.test);
const $ = cheerio.load(`
<html>
<meta name="dc.author" value="Adam" />
</html>
`);
const result = GenericAuthorExtractor.extract({
$,
metaCache: ['dc.author', 'something-else'],
});

assert.equal(result, HTML.authorMeta.result);
assert.equal(result, 'Adam');
});

it('extracts author from author selectors', () => {
const $ = cheerio.load(HTML.authorSelectors.test);
const $ = cheerio.load(`
<div>
<div class="byline">
<a href="/author/adam">Adam</a>
</div>
</div>
`);
const result = GenericAuthorExtractor.extract({
$,
metaCache: ['dc.author', 'something-else'],
});

assert.equal(result, HTML.authorSelectors.result);
assert.equal(result, 'Adam');
});

it('extracts author with regex selectors', () => {
const $ = cheerio.load(HTML.authorRegSelectors.test);
const $ = cheerio.load(`
<div>
<div class="byline">
<span>By Adam</span>
</div>
</div>
`);
const result = GenericAuthorExtractor.extract({
$,
metaCache: ['dc.author', 'something-else'],
});

assert.equal(result, HTML.authorRegSelectors.result);
assert.equal(result, 'Adam');
});

it('returns null if no author found', () => {
Expand Down
32 changes: 0 additions & 32 deletions src/extractors/generic/author/fixtures/html.js

This file was deleted.

10 changes: 4 additions & 6 deletions src/extractors/generic/content/extract-best-node.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,12 @@ const fs = require('fs');
describe('extractBestNode($, flags)', () => {
it('scores the dom nodes and returns the best option', () => {
const html = fs.readFileSync('./fixtures/latimes.html', 'utf-8');
const opts = {
stripUnlikelyCandidates: true,
weightNodes: true,
};

const $ = cheerio.load(html);

const bestNode = extractBestNode($, opts);
const bestNode = extractBestNode($, {
stripUnlikelyCandidates: true,
weightNodes: true,
});

assert(typeof bestNode, 'object');
});
Expand Down
10 changes: 4 additions & 6 deletions src/extractors/generic/content/scoring/add-score.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,15 @@ describe('Scoring utils', () => {
describe('addScore(node, $, amount)', () => {
it("adds the specified amount to a node's score", () => {
const $ = cheerio.load('<p score="25">Foo</p>');
let $node = $('p').first();

$node = addScore($node, $, 25);
const $node = $('p').first();
addScore($node, $, 25);
assert.equal(getScore($node), 50);
});

it('adds score if score not yet set (assumes score is 0)', () => {
const $ = cheerio.load('<p>Foo</p>');
let $node = $('p').first();

$node = addScore($node, $, 25);
const $node = $('p').first();
addScore($node, $, 25);
assert.equal(getScore($node), 25);
});
});
Expand Down
7 changes: 2 additions & 5 deletions src/extractors/generic/content/scoring/add-to-parent.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,8 @@ import { addToParent, getScore } from './index';
describe('Scoring utils', () => {
describe('addToParent(node, $, amount)', () => {
it("adds 1/4 of a node's score it its parent", () => {
const html = '<div score="25"><p score="40">Foo</p></div>';
const $ = cheerio.load(html);
let $node = $('p').first();

$node = addToParent($node, $, 40);
const $ = cheerio.load('<div score="25"><p score="40">Foo</p></div>');
const $node = addToParent($('p').first(), $, 40);

assert.equal(getScore($node.parent()), 35);
assert.equal(getScore($node), 40);
Expand Down
40 changes: 29 additions & 11 deletions src/extractors/generic/content/scoring/find-top-candidate.test.js
Original file line number Diff line number Diff line change
@@ -1,47 +1,65 @@
import assert from 'assert';
import cheerio from 'cheerio';

import HTML from './fixtures/html';

import { getScore, findTopCandidate, scoreContent } from './index';

const fs = require('fs');

describe('findTopCandidate($)', () => {
it('finds the top candidate from simple case', () => {
const $ = cheerio.load(HTML.findDom1);
const $ = cheerio.load(`
<div score="100">
<p score="1">Lorem ipsum etc</p>
</div>
`);

const $$topCandidate = findTopCandidate($);

assert.equal(getScore($$topCandidate), 100);
});

it('finds the top candidate from a nested case', () => {
const $ = cheerio.load(HTML.findDom2);
const $ = cheerio.load(`
<div score="10">
<article score="50">
<p score="1">Lorem ipsum etc</p>
</article>
</div>
`);

const $$topCandidate = findTopCandidate($);

// this is wrapped in a div so checking
// the score of the first child
// this is wrapped in a div so checking the score of the first child
assert.equal(getScore($$topCandidate.first()), 50);
});

it('ignores tags like BR', () => {
const $ = cheerio.load(HTML.findDom3);
const $ = cheerio.load(`
<article score="50">
<p score="1">Lorem ipsum br</p>
<br score="1000" />
</article>
`);

const $topCandidate = findTopCandidate($);

assert.equal(getScore($topCandidate), 50);
});

it('returns BODY if no candidates found', () => {
const $ = cheerio.load(HTML.topBody);
const $ = cheerio.load(`
<body>
<article>
<p>Lorem ipsum etc</p>
<br />
</article>
<body>
`);

const $topCandidate = findTopCandidate($);

// browser won't allow body tag to be placed
// arbitrarily/loaded on the page, so we tranform
// it in cheerio-query, so this test would fail.
// browser won't allow body tag to be placed arbitrarily/loaded on the page,
// so we tranform it in cheerio-query, so this test would fail.
if (!$.browser) {
assert.equal($topCandidate.get(0).tagName, 'body');
}
Expand Down
Loading

0 comments on commit 112846f

Please sign in to comment.