Skip to content

Commit

Permalink
#1986 – SMILES: Pasting structure with small letters throw an error (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
Nitvex committed Dec 23, 2022
1 parent d9bb502 commit 94cc3ce
Show file tree
Hide file tree
Showing 6 changed files with 28 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,12 @@ const formatProperties: FormatPropertiesMap = {
ChemicalMimeType.CDX,
['.cdx'],
true
),
unknown: new SupportedFormatProperties(
'Unknown',
ChemicalMimeType.UNKNOWN,
['.'],
true
)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ export class FormatterFactory {
case SupportedFormat.smarts:
case SupportedFormat.cdxml:
case SupportedFormat.cdx:
case SupportedFormat.unknown:
default:
formatter = new ServerFormatter(
this.#structService,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@ export function identifyStructFormat(
return SupportedFormat.rxn
}

if (sanitizedString.indexOf('V2000') !== -1) {
return SupportedFormat.mol
}

if (sanitizedString.indexOf('V3000') !== -1) {
return SupportedFormat.molV3000
}
Expand All @@ -55,12 +59,16 @@ export function identifyStructFormat(
return SupportedFormat.cml
}

const clearStr = sanitizedString.replace(/\s/g, '')
const anyLetterAnyDigitContainsSlashesEndsWithEqualSign =
/^[a-zA-Z0-9+/]*={0,2}$/
const clearStr = sanitizedString
.replace(/\s/g, '')
.replace(/(\\r)|(\\n)/g, '')
const isBase64String =
/^([0-9a-zA-Z+/]{4})*(([0-9a-zA-Z+/]{2}==)|([0-9a-zA-Z+/]{3}=))?$/
const cdxHeader = 'VjCD0100'
if (
anyLetterAnyDigitContainsSlashesEndsWithEqualSign.test(clearStr) &&
clearStr.length % 4 === 0
clearStr.length % 4 === 0 &&
isBase64String.test(clearStr) &&
window.atob(clearStr).startsWith(cdxHeader)
) {
return SupportedFormat.cdx
}
Expand All @@ -69,17 +77,14 @@ export function identifyStructFormat(
return SupportedFormat.inChI
}

if (
sanitizedString.indexOf('\n') === -1 &&
sanitizedString === sanitizedString.toUpperCase()
) {
if (sanitizedString.indexOf('\n') === -1) {
// TODO: smiles regexp
return SupportedFormat.smiles
}

if (sanitizedString.indexOf('<CDXML') !== -1) {
return SupportedFormat.cdxml
}
// Molfile by default as Indigo does
return SupportedFormat.mol

return SupportedFormat.unknown
}
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ export enum SupportedFormat {
cml = 'cml',
ket = 'ket',
cdxml = 'cdxml',
cdx = 'cdx'
cdx = 'cdx',
unknown = 'unknown'
}

export type FormatterFactoryOptions = Partial<
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ export enum ChemicalMimeType {
CDX = 'chemical/x-cdx',
CDXML = 'chemical/x-cdxml',
CML = 'chemical/x-cml',
KET = 'chemical/x-indigo-ket'
KET = 'chemical/x-indigo-ket',
UNKNOWN = 'chemical/x-unknown'
}

export interface WithStruct {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ function convertMimeTypeToOutputFormat(
format = SupportedFormat.CDX
break
}
case ChemicalMimeType.UNKNOWN:
default: {
throw new Error('Unsupported chemical mime type')
}
Expand Down

0 comments on commit 94cc3ce

Please sign in to comment.