Skip to content

Commit

Permalink
fix(parser): date parser issue for chinese in older w issues
Browse files Browse the repository at this point in the history
  • Loading branch information
rhahao committed Sep 17, 2024
1 parent 55768fb commit 6ded852
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 48 deletions.
4 changes: 2 additions & 2 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 2 additions & 8 deletions src/common/date_parser.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,5 @@
import { JWEPUBParserError } from '../classes/error.js';
import {
WDateParsing,
WDateParsingResult,
LangRegExp,
MWBDateParsingResult,
MWBDateParsing,
} from '../types/index.js';
import { WDateParsing, WDateParsingResult, LangRegExp, MWBDateParsingResult, MWBDateParsing } from '../types/index.js';
import { getMonthNames } from './language_rules.js';

const dateRangeSeparator = `\\s? bis |[-–—]| do | — | – \\s?`;
Expand Down Expand Up @@ -179,7 +173,7 @@ option2 = `(${wordWithDiacritics}) (\\d{1,2}), (\\d{4})`;
const wDatePatternE = `${option1}|${option2}`;

// date like 1-) 2024年12月16-22日; or 2) 2024年12月30日-2025年1月5日
option1 = `(\\d{4})年(\\d{1,2})月(\\d{1,2})[-–](\\d{1,2})日`;
option1 = `(\\d{4})年(\\d{1,2})月(\\d{1,2})[-–](\\d{1,2})日`;
option2 = `(\\d{4})年(\\d{1,2})月(\\d{1,2})日`;
const wDatePatternJ = `${option1}|${option2}`;

Expand Down
47 changes: 9 additions & 38 deletions src/node/index.ts
Original file line number Diff line number Diff line change
@@ -1,43 +1,14 @@
import './utils.node.js';
import { parseMWBSchedule, parseWSchedule, startParse } from '../common/parser.js';
import { startParse } from '../common/parser.js';
import { validateInput } from '../common/epub_validation.js';
import { HTMLParse } from '../common/html_validation.js';

export const loadEPUB = async (epubInput: string | Blob | { url: string }) => {
try {
validateInput(epubInput);

const data = await startParse(epubInput);
return data;
} catch (err) {
console.error(err);
}
};

export const parseMWB = (htmlString: string, mwbYear: number, mwbLang: string) => {
try {
// convert string to html
const htmlItem = HTMLParse(htmlString);
const article = htmlItem.querySelector('article')!;

// Step: Start Parsing
const data = parseMWBSchedule(article, mwbYear, mwbLang);
return data;
} catch (err) {
console.error(err);
}
};

export const parseW = (articleString: string, contentString: string, wLang: string) => {
try {
// convert string to html
const article = HTMLParse(articleString);
const content = HTMLParse(contentString);

// Step: Start Parsing
const data = parseWSchedule(article, content, wLang);
return data;
} catch (err) {
console.error(err);
}
try {
validateInput(epubInput);

const data = await startParse(epubInput);
return data;
} catch (err) {
console.error(err);
}
};

0 comments on commit 6ded852

Please sign in to comment.