From ce5cd2dd0db932ba2175fa2c9edadef4baeca3c5 Mon Sep 17 00:00:00 2001 From: kik0220 <4346449+kik0220@users.noreply.github.com> Date: Fri, 12 Apr 2019 23:18:47 +0900 Subject: [PATCH] feat: add phpspot.org custom parser (#369) * feat: add phpspot.org custom parser * fix: date_published selector --- fixtures/phpspot.org/1554622277068.html | 263 ++++++++++++++++++ src/extractors/custom/index.js | 1 + src/extractors/custom/phpspot.org/index.js | 29 ++ .../custom/phpspot.org/index.test.js | 106 +++++++ 4 files changed, 399 insertions(+) create mode 100644 fixtures/phpspot.org/1554622277068.html create mode 100644 src/extractors/custom/phpspot.org/index.js create mode 100644 src/extractors/custom/phpspot.org/index.test.js diff --git a/fixtures/phpspot.org/1554622277068.html b/fixtures/phpspot.org/1554622277068.html new file mode 100644 index 000000000..2129df5f7 --- /dev/null +++ b/fixtures/phpspot.org/1554622277068.html @@ -0,0 +1,263 @@ + + + + + + + + + + + +マインドマップ風のツリーを描画できる「Treeviz」:phpspot開発日誌 + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ + + +
+ +
+ + + +
+前の記事 ≪:CSS3とjQueryで作ったテキストアニメーション「sahansira/Text-Slider」
+ + +
+ + +

マインドマップ風のツリーを描画できる「Treeviz」

+

2019年04月04日-はてなブックマーク

+ +
+スポンサード リンク
+ + +
+[PR] 英単語を忘却曲線アプリを使って超効率よく記憶する方法
+
+

Treeviz
+
+マインドマップ風のツリーを描画できる「Treeviz」
+次のような図形を描画できるcodepenでのサンプルです
+
+
+関連エントリ

+ + + +
+ + + +
+
+
+
+
+
+
+
+
+スポンサード リンク
+ + +

By.KJ : 2019年04月04日 09:00 livedoor Readerで購読 Twitterに投稿

間違いの指摘をしていただける方はメール、あるいはTwitter/FBでお願いします(クリック)

+
+ + + + + +
+ +
+ +
+ + + +
+ + + + + + +
+ + +
+ + +
+ +
+
+ + + + + + + + + + + + + + \ No newline at end of file diff --git a/src/extractors/custom/index.js b/src/extractors/custom/index.js index eddc96973..e48948c9c 100644 --- a/src/extractors/custom/index.js +++ b/src/extractors/custom/index.js @@ -105,3 +105,4 @@ export * from './scan.netsecurity.ne.jp'; export * from './jvndb.jvn.jp'; export * from './genius.com'; export * from './www.jnsa.org'; +export * from './phpspot.org'; diff --git a/src/extractors/custom/phpspot.org/index.js b/src/extractors/custom/phpspot.org/index.js new file mode 100644 index 000000000..041157337 --- /dev/null +++ b/src/extractors/custom/phpspot.org/index.js @@ -0,0 +1,29 @@ +export const PhpspotOrgExtractor = { + domain: 'phpspot.org', + + title: { + selectors: ['h3.hl'], + }, + + author: null, + + date_published: { + selectors: ['h4.hl'], + format: 'YYYY年MM月DD日', + timezone: 'Asia/Tokyo', + }, + + dek: null, + + lead_image_url: null, + + content: { + selectors: ['div.entrybody'], + + defaultCleaner: false, + + transforms: {}, + + clean: [], + }, +}; diff --git a/src/extractors/custom/phpspot.org/index.test.js b/src/extractors/custom/phpspot.org/index.test.js new file mode 100644 index 000000000..97ad53261 --- /dev/null +++ b/src/extractors/custom/phpspot.org/index.test.js @@ -0,0 +1,106 @@ +import assert from 'assert'; +import URL from 'url'; +import cheerio from 'cheerio'; + +import Mercury from 'mercury'; +import getExtractor from 'extractors/get-extractor'; +import { excerptContent } from 'utils/text'; + +const fs = require('fs'); + +describe('PhpspotOrgExtractor', () => { + describe('initial test case', () => { + let result; + let url; + beforeAll(() => { + url = 'http://phpspot.org/blog/archives/2019/04/treeviz.html'; + const html = fs.readFileSync('./fixtures/phpspot.org/1554622277068.html'); + result = Mercury.parse(url, { + html, + fallback: false, + }); + }); + + it('is selected properly', () => { + // This test should be passing by default. + // It sanity checks that the correct parser + // is being selected for URLs from this domain + const extractor = getExtractor(url); + assert.equal(extractor.domain, URL.parse(url).hostname); + }); + + it('returns the title', async () => { + // To pass this test, fill out the title selector + // in ./src/extractors/custom/phpspot.org/index.js. + const { title } = await result; + + // Update these values with the expected values from + // the article. + assert.equal(title, `マインドマップ風のツリーを描画できる「Treeviz」`); + }); + + it('returns the author', async () => { + // To pass this test, fill out the author selector + // in ./src/extractors/custom/phpspot.org/index.js. + const { author } = await result; + + // Update these values with the expected values from + // the article. + assert.equal(author, null); + }); + + it('returns the date_published', async () => { + // To pass this test, fill out the date_published selector + // in ./src/extractors/custom/phpspot.org/index.js. + const { date_published } = await result; + + // Update these values with the expected values from + // the article. + assert.equal(date_published, '2019-04-03T15:00:00.000Z'); + }); + + it('returns the dek', async () => { + // To pass this test, fill out the dek selector + // in ./src/extractors/custom/phpspot.org/index.js. + const { dek } = await result; + + // Update these values with the expected values from + // the article. + assert.equal(dek, null); + }); + + it('returns the lead_image_url', async () => { + // To pass this test, fill out the lead_image_url selector + // in ./src/extractors/custom/phpspot.org/index.js. + const { lead_image_url } = await result; + + // Update these values with the expected values from + // the article. + assert.equal(lead_image_url, null); + }); + + it('returns the content', async () => { + // To pass this test, fill out the content selector + // in ./src/extractors/custom/phpspot.org/index.js. + // You may also want to make use of the clean and transform + // options. + const { content } = await result; + + const $ = cheerio.load(content || ''); + + const first13 = excerptContent( + $('*') + .first() + .text(), + 2 + ); + + // Update these values with the expected values from + // the article. + assert.equal( + first13, + 'Treeviz マインドマップ風のツリーを描画できる「Treeviz」' + ); + }); + }); +});