Skip to content

Commit

Permalink
🪪 Nest external identifiers in frontmatter
Browse files Browse the repository at this point in the history
  • Loading branch information
fwkoch committed Oct 17, 2024
1 parent ec48a72 commit 2f7cb6b
Show file tree
Hide file tree
Showing 8 changed files with 200 additions and 35 deletions.
5 changes: 5 additions & 0 deletions .changeset/great-steaks-talk.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
'myst-frontmatter': patch
---

Nest external identifiers in frontmatter
3 changes: 2 additions & 1 deletion packages/myst-frontmatter/src/page/page.yml
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,8 @@ cases:
name: University B
label: example.md
doi: 10.1000/abcd/efg012
arxiv: https://arxiv.org/example
identifiers:
arxiv: https://arxiv.org/example
open_access: true
license:
content:
Expand Down
94 changes: 85 additions & 9 deletions packages/myst-frontmatter/src/project/project.yml
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,8 @@ cases:
name: University A
date: '2021-12-14'
doi: 10.1000/abcd/efg012
arxiv: https://arxiv.org/example
identifiers:
arxiv: https://arxiv.org/example
open_access: true
license:
content:
Expand Down Expand Up @@ -238,26 +239,101 @@ cases:
errors: 1
- title: pmcid validates
raw:
pmcid: PMC123
identifiers:
pmcid: PMC123
normalized:
pmcid: PMC123
identifiers:
pmcid: PMC123
- title: invalid pmcid errors
raw:
pmcid: '123'
identifiers:
pmcid: '123'
normalized: {}
errors: 1
- title: pmid string validates
raw:
pmid: '123'
identifiers:
pmid: '123'
normalized:
pmid: 123
identifiers:
pmid: 123
- title: pmid number validates
raw:
pmid: 123
identifiers:
pmid: 123
normalized:
pmid: 123
identifiers:
pmid: 123
- title: invalid pmid errors
raw:
pmid: 'abc'
identifiers:
pmid: 'abc'
normalized: {}
errors: 1
- title: doi is lifted from identifiers
raw:
identifiers:
doi: 10.1000/abc123
pmid: 123
normalized:
doi: 10.1000/abc123
identifiers:
pmid: 123
warnings: 1
- title: unknown string identifier passes
raw:
identifiers:
unknown: '123'
normalized:
identifiers:
unknown: '123'
- title: unknown numeric identifier passes
raw:
identifiers:
unknown: 123
normalized:
identifiers:
unknown: 123
- title: known identifiers nest under identifiers
raw:
zenodo: https://zenodo.org/records/13942262
pmcid: PMC123
pmid: 123
arxiv: https://arxiv.org/example
normalized:
identifiers:
zenodo: https://zenodo.org/records/13942262
pmcid: PMC123
pmid: 123
arxiv: https://arxiv.org/example
- title: invalid zenodo errors
raw:
identifiers:
zenodo: '13942262'
normalized: {}
errors: 1
- title: identifier coerces to identifiers
raw:
identifier:
unknown: 123
normalized:
identifiers:
unknown: 123
- title: duplicate identifiers error
raw:
pmcid: PMC456
identifier:
pmcid: PMC123
normalized:
identifiers:
pmcid: PMC123
errors: 1
- title: duplicate dois error
raw:
doi: 10.1000/abc456
identifier:
doi: 10.1000/abc123
normalized:
doi: 10.1000/abc456
errors: 1

11 changes: 5 additions & 6 deletions packages/myst-frontmatter/src/project/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,12 @@ export const PAGE_KNOWN_PARTS = [
'acknowledgments',
];

export const KNOWN_EXTERNAL_IDENTIFIERS = ['arxiv', 'pmid', 'pmcid', 'zenodo'];

export const PROJECT_AND_PAGE_FRONTMATTER_KEYS = [
'date',
'doi',
'arxiv',
'pmid',
'pmcid',
'identifiers',
'open_access',
'license',
'binder',
Expand All @@ -45,6 +45,7 @@ export const PROJECT_AND_PAGE_FRONTMATTER_KEYS = [
'settings', // We maybe want to move this into site frontmatter in the future
'parts',
...PAGE_KNOWN_PARTS,
...KNOWN_EXTERNAL_IDENTIFIERS,
// Do not add any project specific keys here!
...SITE_FRONTMATTER_KEYS,
];
Expand All @@ -63,9 +64,7 @@ export const PROJECT_FRONTMATTER_KEYS = [
export type ProjectAndPageFrontmatter = SiteFrontmatter & {
date?: string;
doi?: string;
arxiv?: string;
pmid?: number;
pmcid?: string;
identifiers?: Record<string, string | number>;
open_access?: boolean;
license?: Licenses;
binder?: string;
Expand Down
103 changes: 84 additions & 19 deletions packages/myst-frontmatter/src/project/validators.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import {
validateString,
validateUrl,
validationError,
validationWarning,
} from 'simple-validators';
import { validateTOC } from 'myst-toc';
import { validatePublicationMeta } from '../biblio/validators.js';
Expand All @@ -22,12 +23,53 @@ import { validateExternalReferences } from '../references/validators.js';
import { validateSiteFrontmatterKeys } from '../site/validators.js';
import { validateThebe } from '../thebe/validators.js';
import { validateDoi, validateStringOrNumber } from '../utils/validators.js';
import { PAGE_KNOWN_PARTS, PROJECT_FRONTMATTER_KEYS } from './types.js';
import { KNOWN_EXTERNAL_IDENTIFIERS, PAGE_KNOWN_PARTS, PROJECT_FRONTMATTER_KEYS } from './types.js';
import type { ProjectAndPageFrontmatter, ProjectFrontmatter } from './types.js';
import { validateProjectAndPageSettings } from '../settings/validators.js';
import { FRONTMATTER_ALIASES } from '../site/types.js';
import { validateMathMacroObject } from '../math/validators.js';

function getExternalIdentifierValidator(
key: string,
): (value: any, opts: ValidationOptions) => string | number | undefined {
if (key === 'arxiv') {
return (value: any, opts: ValidationOptions) => {
return validateUrl(value, {
...incrementOptions('arxiv', opts),
includes: 'arxiv.org',
});
};
}
if (key === 'pmid') {
return (value: any, opts: ValidationOptions) => {
return validateNumber(value, {
...incrementOptions('pmid', opts),
integer: true,
min: 1,
});
};
}
if (key === 'pmcid') {
return (value: any, opts: ValidationOptions) => {
return validateString(value, {
...incrementOptions('pmcid', opts),
regex: '^PMC[0-9]+$',
});
};
}
if (key === 'zenodo') {
return (value: any, opts: ValidationOptions) => {
return validateUrl(value, {
...incrementOptions('zenodo', opts),
includes: 'zenodo.org',
});
};
}
return (value: any, opts: ValidationOptions) => {
return validateStringOrNumber(value, incrementOptions(key, opts));
};
}

export function validateProjectAndPageFrontmatterKeys(
value: Record<string, any>,
opts: ValidationOptions,
Expand All @@ -36,27 +78,50 @@ export function validateProjectAndPageFrontmatterKeys(
if (defined(value.date)) {
output.date = validateDate(value.date, incrementOptions('date', opts));
}
if (defined(value.doi)) {
output.doi = validateDoi(value.doi, incrementOptions('doi', opts));
const identifiersOpts = incrementOptions('identifiers', opts);
let identifiers: Record<string, string | number> | undefined;
if (defined(value.identifiers)) {
identifiers = validateObjectKeys(
value.identifiers,
{ optional: KNOWN_EXTERNAL_IDENTIFIERS },
{ keepExtraKeys: true, suppressWarnings: true, ...identifiersOpts },
);
}
if (defined(value.arxiv)) {
output.arxiv = validateUrl(value.arxiv, {
...incrementOptions('arxiv', opts),
includes: 'arxiv.org',
});
KNOWN_EXTERNAL_IDENTIFIERS.forEach((identifierKey) => {
if (defined(value[identifierKey])) {
identifiers ??= {};
if (identifiers[identifierKey]) {
validationError(`duplicate value for identifier ${identifierKey}`, identifiersOpts);
} else {
identifiers[identifierKey] = value[identifierKey];
}
}
});
if (identifiers?.doi) {
if (defined(value.doi)) {
validationError(`duplicate value for DOI`, identifiersOpts);
} else {
value.doi = identifiers.doi;
validationWarning(
"DOI should be defined directly on the project frontmatter, not under 'identifiers'",
identifiersOpts,
);
}
delete identifiers.doi;
}
if (defined(value.pmid)) {
output.pmid = validateNumber(value.pmid, {
...incrementOptions('pmid', opts),
integer: true,
min: 1,
});
if (identifiers) {
const identifiersEntries = Object.entries(identifiers)
.map(([k, v]) => {
const validator = getExternalIdentifierValidator(k);
return [k, validator(v, identifiersOpts)];
})
.filter((entry): entry is [string, string | number] => entry[1] != null);
if (identifiersEntries.length > 0) {
output.identifiers = Object.fromEntries(identifiersEntries);
}
}
if (defined(value.pmcid)) {
output.pmcid = validateString(value.pmcid, {
...incrementOptions('pmcid', opts),
regex: '^PMC[0-9]+$',
});
if (defined(value.doi)) {
output.doi = validateDoi(value.doi, incrementOptions('doi', opts));
}
if (defined(value.open_access)) {
output.open_access = validateBoolean(value.open_access, incrementOptions('open_access', opts));
Expand Down
1 change: 1 addition & 0 deletions packages/myst-frontmatter/src/site/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ export const FRONTMATTER_ALIASES = {
key_points: 'keypoints',
'key-points': 'keypoints',
image: 'thumbnail',
identifier: 'identifiers',
};

export type SiteFrontmatter = {
Expand Down
11 changes: 11 additions & 0 deletions packages/myst-frontmatter/src/utils/fillPageFrontmatter.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -727,6 +727,17 @@ describe('fillPageFrontmatter', () => {
options: { a: 'b' },
});
});
it('project and page identifiers combine', async () => {
expect(
fillPageFrontmatter(
{ identifiers: { pmcid: 'PMC123', other: 'abc' } },
{ identifiers: { other: 'def', arxiv: 'https://arxiv.org/example' } },
opts,
),
).toEqual({
identifiers: { pmcid: 'PMC123', other: 'abc', arxiv: 'https://arxiv.org/example' },
});
});
});

describe('fillSiteFrontmatter', () => {
Expand Down
7 changes: 7 additions & 0 deletions packages/myst-frontmatter/src/utils/fillPageFrontmatter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,13 @@ export function fillProjectFrontmatter(
};
}

if (filler.identifiers || base.identifiers) {
frontmatter.identifiers = {
...(filler.identifiers ?? {}),
...(base.identifiers ?? {}),
};
}

if (!trimUnused) {
if (filler.bibliography || base.bibliography) {
frontmatter.bibliography = [
Expand Down

0 comments on commit 2f7cb6b

Please sign in to comment.