Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🪪 Nest external identifiers in frontmatter #1589

Merged
merged 1 commit into from
Oct 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/great-steaks-talk.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
'myst-frontmatter': patch
---

Nest external identifiers in frontmatter
3 changes: 2 additions & 1 deletion packages/myst-frontmatter/src/page/page.yml
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,8 @@ cases:
name: University B
label: example.md
doi: 10.1000/abcd/efg012
arxiv: https://arxiv.org/example
identifiers:
arxiv: https://arxiv.org/example
open_access: true
license:
content:
Expand Down
94 changes: 85 additions & 9 deletions packages/myst-frontmatter/src/project/project.yml
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,8 @@ cases:
name: University A
date: '2021-12-14'
doi: 10.1000/abcd/efg012
arxiv: https://arxiv.org/example
identifiers:
arxiv: https://arxiv.org/example
open_access: true
license:
content:
Expand Down Expand Up @@ -238,26 +239,101 @@ cases:
errors: 1
- title: pmcid validates
raw:
pmcid: PMC123
identifiers:
pmcid: PMC123
normalized:
pmcid: PMC123
identifiers:
pmcid: PMC123
- title: invalid pmcid errors
raw:
pmcid: '123'
identifiers:
pmcid: '123'
normalized: {}
errors: 1
- title: pmid string validates
raw:
pmid: '123'
identifiers:
pmid: '123'
normalized:
pmid: 123
identifiers:
pmid: 123
- title: pmid number validates
raw:
pmid: 123
identifiers:
pmid: 123
normalized:
pmid: 123
identifiers:
pmid: 123
- title: invalid pmid errors
raw:
pmid: 'abc'
identifiers:
pmid: 'abc'
normalized: {}
errors: 1
- title: doi is lifted from identifiers
raw:
identifiers:
doi: 10.1000/abc123
pmid: 123
normalized:
doi: 10.1000/abc123
identifiers:
pmid: 123
warnings: 1
- title: unknown string identifier passes
raw:
identifiers:
unknown: '123'
normalized:
identifiers:
unknown: '123'
- title: unknown numeric identifier passes
raw:
identifiers:
unknown: 123
normalized:
identifiers:
unknown: 123
- title: known identifiers nest under identifiers
raw:
zenodo: https://zenodo.org/records/13942262
pmcid: PMC123
pmid: 123
arxiv: https://arxiv.org/example
normalized:
identifiers:
zenodo: https://zenodo.org/records/13942262
pmcid: PMC123
pmid: 123
arxiv: https://arxiv.org/example
- title: invalid zenodo errors
raw:
identifiers:
zenodo: '13942262'
normalized: {}
errors: 1
- title: identifier coerces to identifiers
raw:
identifier:
unknown: 123
normalized:
identifiers:
unknown: 123
- title: duplicate identifiers error
raw:
pmcid: PMC456
identifier:
pmcid: PMC123
normalized:
identifiers:
pmcid: PMC123
errors: 1
- title: duplicate dois error
raw:
doi: 10.1000/abc456
identifier:
doi: 10.1000/abc123
normalized:
doi: 10.1000/abc456
errors: 1

11 changes: 5 additions & 6 deletions packages/myst-frontmatter/src/project/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,12 @@ import type { SiteFrontmatter } from '../site/types.js';
import { SITE_FRONTMATTER_KEYS } from '../site/types.js';
import type { ExpandedThebeFrontmatter } from '../thebe/types.js';

export const KNOWN_EXTERNAL_IDENTIFIERS = ['arxiv', 'pmid', 'pmcid', 'zenodo'];

export const PROJECT_AND_PAGE_FRONTMATTER_KEYS = [
'date',
'doi',
'arxiv',
'pmid',
'pmcid',
'identifiers',
'open_access',
'license',
'binder',
Expand All @@ -33,6 +33,7 @@ export const PROJECT_AND_PAGE_FRONTMATTER_KEYS = [
'exports',
'downloads',
'settings', // We maybe want to move this into site frontmatter in the future
...KNOWN_EXTERNAL_IDENTIFIERS,
// Do not add any project specific keys here!
...SITE_FRONTMATTER_KEYS,
];
Expand All @@ -51,9 +52,7 @@ export const PROJECT_FRONTMATTER_KEYS = [
export type ProjectAndPageFrontmatter = SiteFrontmatter & {
date?: string;
doi?: string;
arxiv?: string;
pmid?: number;
pmcid?: string;
identifiers?: Record<string, string | number>;
open_access?: boolean;
license?: Licenses;
binder?: string;
Expand Down
106 changes: 86 additions & 20 deletions packages/myst-frontmatter/src/project/validators.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ import {
validateObjectKeys,
validateString,
validateUrl,
validationError,
validationWarning,
} from 'simple-validators';
import { validateTOC } from 'myst-toc';
import { validatePublicationMeta } from '../biblio/validators.js';
Expand All @@ -21,12 +23,53 @@ import { validateExternalReferences } from '../references/validators.js';
import { validateSiteFrontmatterKeys } from '../site/validators.js';
import { validateThebe } from '../thebe/validators.js';
import { validateDoi, validateStringOrNumber } from '../utils/validators.js';
import { PROJECT_FRONTMATTER_KEYS } from './types.js';
import { KNOWN_EXTERNAL_IDENTIFIERS, PROJECT_FRONTMATTER_KEYS } from './types.js';
import type { ProjectAndPageFrontmatter, ProjectFrontmatter } from './types.js';
import { validateProjectAndPageSettings } from '../settings/validators.js';
import { FRONTMATTER_ALIASES } from '../site/types.js';
import { validateMathMacroObject } from '../math/validators.js';

function getExternalIdentifierValidator(
key: string,
): (value: any, opts: ValidationOptions) => string | number | undefined {
if (key === 'arxiv') {
return (value: any, opts: ValidationOptions) => {
return validateUrl(value, {
...incrementOptions('arxiv', opts),
includes: 'arxiv.org',
});
};
}
if (key === 'pmid') {
return (value: any, opts: ValidationOptions) => {
return validateNumber(value, {
...incrementOptions('pmid', opts),
integer: true,
min: 1,
});
};
}
if (key === 'pmcid') {
return (value: any, opts: ValidationOptions) => {
return validateString(value, {
...incrementOptions('pmcid', opts),
regex: '^PMC[0-9]+$',
});
};
}
if (key === 'zenodo') {
return (value: any, opts: ValidationOptions) => {
return validateUrl(value, {
...incrementOptions('zenodo', opts),
includes: 'zenodo.org',
});
};
}
return (value: any, opts: ValidationOptions) => {
return validateStringOrNumber(value, incrementOptions(key, opts));
};
}

export function validateProjectAndPageFrontmatterKeys(
value: Record<string, any>,
opts: ValidationOptions,
Expand All @@ -35,27 +78,50 @@ export function validateProjectAndPageFrontmatterKeys(
if (defined(value.date)) {
output.date = validateDate(value.date, incrementOptions('date', opts));
}
if (defined(value.doi)) {
output.doi = validateDoi(value.doi, incrementOptions('doi', opts));
}
if (defined(value.arxiv)) {
output.arxiv = validateUrl(value.arxiv, {
...incrementOptions('arxiv', opts),
includes: 'arxiv.org',
});
const identifiersOpts = incrementOptions('identifiers', opts);
let identifiers: Record<string, string | number> | undefined;
if (defined(value.identifiers)) {
identifiers = validateObjectKeys(
value.identifiers,
{ optional: KNOWN_EXTERNAL_IDENTIFIERS },
{ keepExtraKeys: true, suppressWarnings: true, ...identifiersOpts },
);
}
if (defined(value.pmid)) {
output.pmid = validateNumber(value.pmid, {
...incrementOptions('pmid', opts),
integer: true,
min: 1,
});
KNOWN_EXTERNAL_IDENTIFIERS.forEach((identifierKey) => {
if (defined(value[identifierKey])) {
identifiers ??= {};
if (identifiers[identifierKey]) {
validationError(`duplicate value for identifier ${identifierKey}`, identifiersOpts);
} else {
identifiers[identifierKey] = value[identifierKey];
}
}
});
if (identifiers?.doi) {
if (defined(value.doi)) {
validationError(`duplicate value for DOI`, identifiersOpts);
} else {
value.doi = identifiers.doi;
validationWarning(
"DOI should be defined directly on the project frontmatter, not under 'identifiers'",
identifiersOpts,
);
}
delete identifiers.doi;
}
if (identifiers) {
const identifiersEntries = Object.entries(identifiers)
.map(([k, v]) => {
const validator = getExternalIdentifierValidator(k);
return [k, validator(v, identifiersOpts)];
})
.filter((entry): entry is [string, string | number] => entry[1] != null);
if (identifiersEntries.length > 0) {
output.identifiers = Object.fromEntries(identifiersEntries);
}
}
if (defined(value.pmcid)) {
output.pmcid = validateString(value.pmcid, {
...incrementOptions('pmcid', opts),
regex: '^PMC[0-9]+$',
});
if (defined(value.doi)) {
output.doi = validateDoi(value.doi, incrementOptions('doi', opts));
}
if (defined(value.open_access)) {
output.open_access = validateBoolean(value.open_access, incrementOptions('open_access', opts));
Expand Down
1 change: 1 addition & 0 deletions packages/myst-frontmatter/src/site/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ export const FRONTMATTER_ALIASES = {
key_points: 'keypoints',
'key-points': 'keypoints',
image: 'thumbnail',
identifier: 'identifiers',
};

export type SiteFrontmatter = {
Expand Down
11 changes: 11 additions & 0 deletions packages/myst-frontmatter/src/utils/fillPageFrontmatter.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -727,6 +727,17 @@ describe('fillPageFrontmatter', () => {
options: { a: 'b' },
});
});
it('project and page identifiers combine', async () => {
expect(
fillPageFrontmatter(
{ identifiers: { pmcid: 'PMC123', other: 'abc' } },
{ identifiers: { other: 'def', arxiv: 'https://arxiv.org/example' } },
opts,
),
).toEqual({
identifiers: { pmcid: 'PMC123', other: 'abc', arxiv: 'https://arxiv.org/example' },
});
});
});

describe('fillSiteFrontmatter', () => {
Expand Down
7 changes: 7 additions & 0 deletions packages/myst-frontmatter/src/utils/fillPageFrontmatter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,13 @@ export function fillProjectFrontmatter(
};
}

if (filler.identifiers || base.identifiers) {
frontmatter.identifiers = {
...(filler.identifiers ?? {}),
...(base.identifiers ?? {}),
};
}

if (!trimUnused) {
if (filler.bibliography || base.bibliography) {
frontmatter.bibliography = [
Expand Down
Loading