diff --git a/package.json b/package.json
index 282c44c..0acd13d 100644
--- a/package.json
+++ b/package.json
@@ -21,7 +21,8 @@
"dependencies": {
"babel-polyfill": "^6.9.0",
"isomorphic-fetch": "^2.2.1",
- "underscore": "^1.8.3"
+ "underscore": "^1.8.3",
+ "wiki-infobox-parser-core": "0.0.1"
},
"devDependencies": {
"babel-cli": "^6.9.0",
diff --git a/src/parser/index.js b/src/parser/index.js
deleted file mode 100644
index 7185399..0000000
--- a/src/parser/index.js
+++ /dev/null
@@ -1,225 +0,0 @@
-var utils = require('./utils');
-
-/**
- * Main markup syndax parser
- * @param {string} data markup text
- * @param {Function} callback callback function
- * @return {undefined} return undefined when error occurs
- */
-module.exports = function(data, callback) {
-
- var content;
-
- /**********************************
- * Parse scraping result, *
- * which is in the format of JSON *
- **********************************/
-
- try {
- content = JSON.parse(data);
- } catch(e) {
- callback(e);
- return;
- }
-
- if (!content.query) { callback(new Error('Query Not Found')); return; }
-
- /**
- * Get JSON data
- */
- var json = content.query.pages;
- var key = Object.keys(json);
-
- if (key.indexOf('-1') === 0) {
- callback(new Error('Page Index Not Found'));
- return;
- } else if(!json[key]){
- callback(new Error('Malformed Response Payload'));
- return;
- } else if (json[key].revisions[0]['*'].indexOf('REDIRECT') > -1) {
- callback(new Error(json[key].revisions[0]['*']));
- return;
- }
-
- /**
- * Get the JSON data that contains infobox section
- */
- var reg = new RegExp('{{[Ii]nfobox(.|\n)*}}', 'g');
- var text = reg.exec(json[key].revisions[0]['*']);
- if (!text) { callback(new Error('Infobox Not Found')); return; }
- text = text[0];
-
-
- /************************
- * Remove useless marks *
- ************************/
-
- /*
- * Remove comments
- */
- text = utils.replaceAll('', '', text);
- /*
- * Remove reference
- * TODO: support reference in advanced model
- */
- text = utils.replaceAll('|>.*)', '', text);
- /*
- * Remove all HTML tags like '
', etc.
- */
- text = utils.replaceAll('<[^>]+>', '', text);
- /*
- * Remove footnote
- * TODO; support footnote in advanced model
- */
- text = utils.replaceAll('\{\{refn[^\}\}]*?\}\}', '', text);
-
- /*
- * Merge order, bulleted, unbulleted, Pagelist
- * list items to one line
- */
- var lists = text.match(/\{\{(order|bulleted|unbulleted|Pagelist)(.*\n)*?\}\}/g);
- if (lists && lists.length) {
- lists.forEach(function(l) {
- text = text.replace(l, l.replace('{{', '').replace('}}', '')
- .replace(/(order|bulleted|unbulleted)\slist\n\|/g, '')
- .split('\n|').join(', '));
- });
- }
-
- /*
- * Parse URL
- */
- lists = text.match(/\{\{(URL)(.*)\}\}/g);
- if (lists && lists.length) {
- lists.forEach(function(l) {
- var tmp = l.replace('{{', '').replace('}}', '').split('|');
- text = (tmp && tmp.length > 0) ? text.replace(l, tmp[tmp.length - 1]) : text;
- });
- }
-
- /*
- * Parse Start date
- */
- lists = text.match(/\{\{(Start\sdate)(.*)\}\}/g);
- if (lists && lists.length) {
- lists.forEach(function(l) {
- var tmp = l.replace('{{', '').replace('}}', '').split('|');
- /* Pop first element: 'Start date' */
- tmp.shift();
- text = (tmp) ? text.replace(l, tmp.join('/')) : text;
- });
- }
-
- /*****************************
- * Analyze each line of text *
- *****************************/
-
- var result = {};
- text.split('\n|').forEach(function(item) {
- /**
- * Extract {item_name, item_content} from each item
- */
- var itemIndex = item.indexOf('=');
- if (itemIndex != -1) {
- var item_name = item.substr(0, itemIndex).trim();
- var item_content = item.substr(itemIndex + 1).trim().split('\n')[0];
-
- /*
- * Extract all simple texts inside '[[ ]]'
- * such as [[France]], [[Language French|French]], etc.
- */
- var find = item_content.match(/\[\[.*?\]\]/g);
- if (find) {
- find.forEach(function(substring) {
- var barestring = substring.replace('[[', '').replace(']]', '');
- var arr = barestring.split('|');
- /**
- * TODO: support link.
- * Reference: https://en.wikipedia.org/wiki/Help:Wiki_markup#Links_and_URLs
- */
- item_content = item_content.replace(substring, arr[arr.length - 1]);
- });
- }
-
- /*
- * Remove font style
- * {{fake clarify}}
- * {{fake citation needed}}
- * {{fake elucidate}}
- * {{fake heading}}
- * {{fake notes and references}}
- * {{dummy ref}}
- * {{dummy backlink}}
- * {{dummy footnote}}
- * {{break}}
- * {{break|5}}
- * {{clear}}
- * {{clear|left}}
- * {{clear|right}}
- * {{plainlist}}
- * {{startflatlist}}
- * {{flatlist}}
- * {{hlist|first item|second item|third item|...}}
- * {{bulleted list |item1 |item2 |...}}
- * {{pagelist}}
- * {{nowrap}}
- * {{italics}}
- * {{smallcaps|small caps}}
- * {{pad|4.0em}}
- */
- while (item_content.indexOf('{{nowrap|') !== -1) {
- item_content = item_content.replace('{{nowrap|', '');
- item_content = item_content.replace('}}', '');
- }
-
- while (item_content.indexOf('{{small|') !== -1) {
- item_content = item_content.replace('{{small|', '');
- item_content = item_content.replace('}}', '');
- }
-
- if (item_content.indexOf('{{native') !== -1) {
- find = item_content.match(/\{\{native[^\}\}]*?\}\}/g);
- find && find.forEach(function(substring) {
- item_content = item_content.replace(substring, substring.split('|')[2]);
- });
- }
-
- /* Remove simple vertical list tag */
- if (item_content.indexOf('{{vunblist') !== -1 &&
- item_content.split('{{').length < 3) {
-
- find = item_content.match(/\{\{vunblist[^\}\}]*?\}\}/g);
- find && find.forEach(function(substring) {
- var tmp = substring.split('|');
- tmp.shift();
- item_content = item_content.replace(substring, tmp.join(',').replace('}}', ''));
- });
- }
-
- /* Remove horizon list tag */
- if (item_content.indexOf('{{hlist') !== -1) {
- find = item_content.match(/\{\{hlist[^\}\}]*?\}\}/g);
- find && find.forEach(function(substring) {
- var tmp = substring.split('|');
- tmp.shift();
- item_content = item_content.replace(substring, tmp.join(',').replace('}}', ''));
- });
- }
-
- /* Remove efn tag */
- if (item_content.indexOf('{{efn') !== -1) {
- find = item_content.match(/\{\{efn[^\}\}]*?\}\}/g);
- find && find.forEach(function(substring) {
- item_content = item_content.replace(substring, '');
- });
- }
-
- item_content = utils.replaceAll(' ', ' ', item_content);
- item_content = utils.replaceAll('\n\}\}', '', item_content);
- result[item_name] = item_content;
- }
- });
-
- callback(null, JSON.stringify(result));
- return;
-};
diff --git a/src/parser/utils.js b/src/parser/utils.js
deleted file mode 100644
index 0d6e6bd..0000000
--- a/src/parser/utils.js
+++ /dev/null
@@ -1,39 +0,0 @@
-/**
- * Collection of utils
- * @type {Object}
- */
-var utils = {
- /**
- * Relace all target strings
- * @method function
- * @param {string} find target snippet
- * @param {string} replace new snippet
- * @param {string} str original string
- * @return {string} new string with new snippet
- */
- replaceAll: function(find, replace, str) {
- if(str) {
- return str.replace(new RegExp(find, 'gm'), replace).trim();
- } else {
- return null;
- }
- },
-
- /**
- * Valid JSON format
- * @method function
- * @param {string} text JSON string
- * @return {boolean} boolean valid or not
- */
- checkJson: function(text) {
- if (text && /^[\],:{}\s]*$/.test(text.replace(/\\["\\\/bfnrtu]/g, '@').
- replace(/"[^"\\\n\r]*"|true|false|null|-?\d+(?:\.\d*)?(?:[eE][+\-]?\d+)?/g, ']').
- replace(/(?:^|:|,)(?:\s*\[)+/g, ''))) {
- return true;
- } else {
- return false;
- }
- }
-};
-
-module.exports = utils;
diff --git a/src/wiki.js b/src/wiki.js
index f40750e..20adc7e 100644
--- a/src/wiki.js
+++ b/src/wiki.js
@@ -3,7 +3,7 @@
import 'babel-polyfill';
import fetch from 'isomorphic-fetch';
import _ from 'underscore';
-import wikiInfoboxParser from './parser';
+import wikiInfoboxParser from 'wiki-infobox-parser-core';
import querystring from 'querystring';
/**