From cedf2a96e838f33062a4adc74b4abb5a2f8125b0 Mon Sep 17 00:00:00 2001 From: Nikita Date: Wed, 15 Feb 2023 15:27:46 +0300 Subject: [PATCH] Backmerge: #2142 s groups that combine multiple molecules get lost when saving as ket file (#2219) (#2225) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Revert "#2142: s groups get lost when saving as ket file (#2191)" This reverts commit 289295397f769404afcf0519a540fa38ecf9c197. * Fix S-Groups which combines multiple molecules getting lost when saving as ket file * #2142 – updated indigo version to 1.10.0-rc.3 --------- Co-authored-by: Yulei Chen --- .../__tests__/domain/entities/pile.test.ts | 41 ++++ .../serializers/ket/KetSerializer.test.ts | 27 ++- .../ketcher-core/src/domain/entities/pile.ts | 41 ++++ .../ket/fromKet/moleculeToStruct.ts | 1 + .../domain/serializers/ket/ketSerializer.ts | 20 +- .../src/domain/serializers/ket/schema.json | 216 ++++++++---------- .../serializers/ket/toKet/moleculeToKet.ts | 8 +- .../domain/serializers/ket/toKet/prepare.ts | 116 +++++++--- packages/ketcher-standalone/package.json | 2 +- yarn.lock | 10 +- 10 files changed, 300 insertions(+), 182 deletions(-) create mode 100644 packages/ketcher-core/__tests__/domain/entities/pile.test.ts diff --git a/packages/ketcher-core/__tests__/domain/entities/pile.test.ts b/packages/ketcher-core/__tests__/domain/entities/pile.test.ts new file mode 100644 index 0000000000..ca3ffbc3d4 --- /dev/null +++ b/packages/ketcher-core/__tests__/domain/entities/pile.test.ts @@ -0,0 +1,41 @@ +import { Pile } from 'domain/entities/pile' + +describe('unionIntersections', () => { + it('unions multiple sets which have intersections', () => { + const setA = new Pile([0, 1]) + const setB = new Pile([1, 2, 3]) + const setC = new Pile([2, 3]) + + const union = Pile.unionIntersections([setA, setB, setC]) + + expect(union).toHaveLength(1) + expect(union[0]).toEqual(new Pile([0, 1, 2, 3])) + }) + + it('does not union sets which have no intersections', () => { + const setA = new Pile([0, 1]) + const setB = new Pile([2, 3]) + + const union = Pile.unionIntersections([setA, setB]) + + expect(union).toHaveLength(2) + expect(union[0]).toEqual(setA) + expect(union[1]).toEqual(setB) + }) + + // Combines above two situations + it('unions multiple sets which have intersections, and skips sets without intersections', () => { + const setA = new Pile([0, 1]) + const setB = new Pile([1, 2, 3]) + const setC = new Pile([2, 3]) + const setD = new Pile([4, 5]) + const setE = new Pile([6]) + + const union = Pile.unionIntersections([setA, setB, setC, setD, setE]) + + expect(union).toHaveLength(3) + expect(union[0]).toEqual(new Pile([0, 1, 2, 3])) + expect(union[1]).toEqual(setD) + expect(union[2]).toEqual(setE) + }) +}) diff --git a/packages/ketcher-core/__tests__/domain/serializers/ket/KetSerializer.test.ts b/packages/ketcher-core/__tests__/domain/serializers/ket/KetSerializer.test.ts index 1d53b9ba35..f0a318ee79 100644 --- a/packages/ketcher-core/__tests__/domain/serializers/ket/KetSerializer.test.ts +++ b/packages/ketcher-core/__tests__/domain/serializers/ket/KetSerializer.test.ts @@ -205,20 +205,6 @@ describe('serialize (ToKet)', () => { const plusKet = parsedPrepareContent.root.nodes[3] expect(structPlus).toEqual(plusKet) }) - it('correct work with sgroups', () => { - const parsedSgroupStruct = JSON.parse(ket.serialize(moleculeSgroupStruct)) - expect(parsedSgroupStruct.root.nodes[6].data.type).toEqual('GEN') - expect(parsedSgroupStruct.root.nodes[7].data.type).toEqual('MUL') - expect(parsedSgroupStruct.root.nodes[7].data.mul).toEqual(1) - expect(parsedSgroupStruct.root.nodes[8].data.type).toEqual('SRU') - expect(parsedSgroupStruct.root.nodes[8].data.subscript).toEqual('n') - expect(parsedSgroupStruct.root.nodes[8].data.connectivity).toEqual('HT') - expect(parsedSgroupStruct.root.nodes[9].data.type).toEqual('MUL') - expect(parsedSgroupStruct.root.nodes[9].data.mul).toEqual(1) - expect(parsedSgroupStruct.root.nodes[10].data.type).toEqual('SUP') - expect(parsedSgroupStruct.root.nodes[11].data.subscript).toEqual('n') - expect(parsedSgroupStruct.root.nodes[11].data.connectivity).toEqual('HT') - }) it('moleculeToKet', () => { const spy = jest.spyOn(moleculeToKet, 'moleculeToKet') ket.serialize(moleculeContentStruct) @@ -244,6 +230,19 @@ describe('serialize (ToKet)', () => { expect( spy.mock.results[1].value.bonds.filter((bond) => bond.type === 2).length ).toEqual(3) + // sgroups + ket.serialize(moleculeSgroupStruct) + expect(spy.mock.results[2].value.sgroups[0].type).toEqual('GEN') + expect(spy.mock.results[2].value.sgroups[1].type).toEqual('MUL') + expect(spy.mock.results[2].value.sgroups[1].mul).toEqual(1) + expect(spy.mock.results[2].value.sgroups[2].type).toEqual('SRU') + expect(spy.mock.results[2].value.sgroups[2].subscript).toEqual('n') + expect(spy.mock.results[2].value.sgroups[2].connectivity).toEqual('HT') + expect(spy.mock.results[2].value.sgroups[3].type).toEqual('MUL') + expect(spy.mock.results[2].value.sgroups[3].mul).toEqual(1) + expect(spy.mock.results[2].value.sgroups[4].type).toEqual('SUP') + expect(spy.mock.results[2].value.sgroups[5].subscript).toEqual('n') + expect(spy.mock.results[2].value.sgroups[5].connectivity).toEqual('HT') }) it('rgroupToKet', () => { const spy = jest.spyOn(rgroupToKet, 'rgroupToKet') diff --git a/packages/ketcher-core/src/domain/entities/pile.ts b/packages/ketcher-core/src/domain/entities/pile.ts index 9962425722..b5c7359ba9 100644 --- a/packages/ketcher-core/src/domain/entities/pile.ts +++ b/packages/ketcher-core/src/domain/entities/pile.ts @@ -48,4 +48,45 @@ export class Pile extends Set { return union } + + intersection(setB: Pile): Pile { + const thisSet = new Pile(this) + return new Pile([...thisSet].filter((item) => setB.has(item))) + } + + /** + * Union multiple sets which have intersections + * @example ``` + * const setA = new Pile([0, 1]) + * const setB = new Pile([1, 2]) + * const setC = new Pile([2, 3]) + * const setD = new Pile([4, 5]) + * console.log(Pile.unionMultiple([setA, setB, setC, setD])) + * // [{0, 1, 2, 3}, {4, 5}] + * ``` + */ + static unionIntersections(sets: Array>): Array> { + let unionized = false + + // Union two of sets + const setsToReturn = sets.reduce((prevSets, curSet) => { + let isCurSetMerged = false + + const newSets = prevSets.map((set) => { + const intersec = set.intersection(curSet) + if (intersec.size > 0) { + unionized = true + isCurSetMerged = true + return set.union(curSet) + } + return set + }) + + if (!isCurSetMerged) newSets.push(curSet) + return newSets + }, new Array>()) + + // Recursively union two of sets === union all sets + return unionized ? Pile.unionIntersections(setsToReturn) : setsToReturn + } } diff --git a/packages/ketcher-core/src/domain/serializers/ket/fromKet/moleculeToStruct.ts b/packages/ketcher-core/src/domain/serializers/ket/fromKet/moleculeToStruct.ts index c6a4ef0f0b..54ad35040b 100644 --- a/packages/ketcher-core/src/domain/serializers/ket/fromKet/moleculeToStruct.ts +++ b/packages/ketcher-core/src/domain/serializers/ket/fromKet/moleculeToStruct.ts @@ -49,6 +49,7 @@ export function moleculeToStruct(ketItem: any): Struct { struct.initHalfBonds() struct.initNeighbors() struct.markFragments() + struct.bindSGroupsToFunctionalGroups() return struct } diff --git a/packages/ketcher-core/src/domain/serializers/ket/ketSerializer.ts b/packages/ketcher-core/src/domain/serializers/ket/ketSerializer.ts index 8ea8b55c1f..05e280c083 100644 --- a/packages/ketcher-core/src/domain/serializers/ket/ketSerializer.ts +++ b/packages/ketcher-core/src/domain/serializers/ket/ketSerializer.ts @@ -19,8 +19,8 @@ import { arrowToKet, plusToKet } from './toKet/rxnToKet' import { Serializer } from '../serializers.types' import { headerToKet } from './toKet/headerToKet' -import { moleculeToKet, sgroupToKet } from './toKet/moleculeToKet' -import { moleculeToStruct, sgroupToStruct } from './fromKet/moleculeToStruct' +import { moleculeToKet } from './toKet/moleculeToKet' +import { moleculeToStruct } from './fromKet/moleculeToStruct' import { prepareStructForKet } from './toKet/prepare' import { rgroupToKet } from './toKet/rgroupToKet' import { rgroupToStruct } from './fromKet/rgroupToStruct' @@ -64,10 +64,6 @@ function parseNode(node: any, struct: any) { textToStruct(node, struct) break } - case 'sgroup': { - struct.sgroups.add(sgroupToStruct(node.data)) - break - } default: break } @@ -85,7 +81,6 @@ export class KetSerializer implements Serializer { else if (nodes[i].$ref) parseNode(ket[nodes[i].$ref], resultingStruct) }) resultingStruct.name = ket.header ? ket.header.moleculeName : null - resultingStruct.bindSGroupsToFunctionalGroups() return resultingStruct } @@ -107,13 +102,13 @@ export class KetSerializer implements Serializer { switch (item.type) { case 'molecule': { result.root.nodes.push({ $ref: `mol${moleculeId}` }) - result[`mol${moleculeId++}`] = moleculeToKet(item.fragment) + result[`mol${moleculeId++}`] = moleculeToKet(item.fragment!) break } case 'rgroup': { result.root.nodes.push({ $ref: `rg${item.data!.rgnumber}` }) result[`rg${item.data!.rgnumber}`] = rgroupToKet( - item.fragment, + item.fragment!, item.data ) break @@ -134,13 +129,6 @@ export class KetSerializer implements Serializer { result.root.nodes.push(textToKet(item)) break } - case 'sgroup': { - result.root.nodes.push({ - type: item.type, - data: sgroupToKet(struct, item.data) - }) - break - } default: break } diff --git a/packages/ketcher-core/src/domain/serializers/ket/schema.json b/packages/ketcher-core/src/domain/serializers/ket/schema.json index 7eb103aed5..072335ebf9 100644 --- a/packages/ketcher-core/src/domain/serializers/ket/schema.json +++ b/packages/ketcher-core/src/domain/serializers/ket/schema.json @@ -26,9 +26,6 @@ { "$ref": "#/definitions/plus" }, - { - "$ref": "#/definitions/sgroup" - }, { "type": "object", "required": ["$ref"], @@ -382,132 +379,117 @@ ] } }, - "sgroupData": { - "type": "object", - "required": ["atoms", "type"], - "properties": { - "atoms": { - "type": "array", - "items": { - "type": "integer", - "minimum": 0 - } - }, - "type": { - "type": "string", - "enum": ["GEN", "MUL", "SRU", "SUP", "DAT"] - } - }, - "if": { + "sgroups": { + "type": "array", + "items": { + "required": ["atoms", "type"], + "type": "object", "properties": { + "atoms": { + "type": "array", + "items": { + "type": "integer", + "minimum": 0 + } + }, "type": { - "const": "MUL" + "type": "string", + "enum": ["GEN", "MUL", "SRU", "SUP", "DAT"] } - } - }, - "then": { - "required": ["mul"], - "properties": { - "mul": { - "type": "integer", - "minimum": 1, - "maximum": 1000 + }, + "if": { + "properties": { + "type": { + "const": "MUL" + } } - } - }, - "if": { - "properties": { - "type": { - "const": "SRU" + }, + "then": { + "required": ["mul"], + "properties": { + "mul": { + "type": "integer", + "minimum": 1, + "maximum": 1000 + } } - } - }, - "then": { - "required": ["subscript", "connectivity"], - "properties": { - "subscript": { - "type": "string", - "pattern": "^[a-zA-Z]$" - }, - "connectivity": { - "type": "string", - "enum": ["HT", "HH", "EU"] + }, + "if": { + "properties": { + "type": { + "const": "SRU" + } } - } - }, - "if": { - "properties": { - "type": { - "const": "SUP" + }, + "then": { + "required": ["subscript", "connectivity"], + "properties": { + "subscript": { + "type": "string", + "pattern": "^[a-zA-Z]$" + }, + "connectivity": { + "type": "string", + "enum": ["HT", "HH", "EU"] + } } - } - }, - "then": { - "required": ["type"], - "properties": { - "type": { - "type": "string", - "minLength": 1 - }, - "expanded": { - "type": "boolean" - }, - "id": { - "type": "number" + }, + "if": { + "properties": { + "type": { + "const": "SUP" + } } - } - }, - "if": { - "properties": { - "type": { - "const": "DAT" + }, + "then": { + "required": ["type"], + "properties": { + "type": { + "type": "string", + "minLength": 1 + }, + "expanded": { + "type": "boolean" + }, + "id": { + "type": "number" + } } - } - }, - "then": { - "required": ["fieldName"], - "properties": { - "context": { - "enum": ["Fragment", "Multifragment", "Bond", "Atom", "Group"] - }, - "fieldName": { - "type": "string" - }, - "fieldValue": { - "type": "string", - "minLength": 1 - }, - "display": { - "type": "boolean" - }, - "placement": { - "type": "boolean" - }, - "bonds": { - "type": "array", - "items": { - "type": "integer", - "minimum": 0 + }, + "if": { + "properties": { + "type": { + "const": "DAT" } } - } - } - }, - "sgroups": { - "type": "array", - "items": { - "$ref": "#/definitions/sgroupData" - } - }, - "sgroup": { - "type": "object", - "required": ["type", "data"], - "properties": { - "type": { - "const": "sgroup" }, - "data": { - "$ref": "#/definitions/sgroupData" + "then": { + "required": ["fieldName"], + "properties": { + "context": { + "enum": ["Fragment", "Multifragment", "Bond", "Atom", "Group"] + }, + "fieldName": { + "type": "string" + }, + "fieldValue": { + "type": "string", + "minLength": 1 + }, + "display": { + "type": "boolean" + }, + "placement": { + "type": "boolean" + }, + "bonds": { + "type": "array", + "items": { + "type": "integer", + "minimum": 0 + } + } + } } } }, diff --git a/packages/ketcher-core/src/domain/serializers/ket/toKet/moleculeToKet.ts b/packages/ketcher-core/src/domain/serializers/ket/toKet/moleculeToKet.ts index 7fd5e0470c..f2591a8d35 100644 --- a/packages/ketcher-core/src/domain/serializers/ket/toKet/moleculeToKet.ts +++ b/packages/ketcher-core/src/domain/serializers/ket/toKet/moleculeToKet.ts @@ -44,6 +44,12 @@ export function moleculeToKet(struct: Struct): any { body.bonds = Array.from(struct.bonds.values()).map(bondToKet) } + if (struct.sgroups.size !== 0) { + body.sgroups = Array.from(struct.sgroups.values()).map((sGroup) => + sgroupToKet(struct, sGroup) + ) + } + const fragment = struct.frags.get(0) if (fragment) { ifDef(body, 'stereoFlagPosition', fragment.stereoFlagPosition, null) @@ -118,7 +124,7 @@ function bondToKet(source) { return result } -export function sgroupToKet(struct, source) { +function sgroupToKet(struct, source) { const result = {} ifDef(result, 'type', source.type) diff --git a/packages/ketcher-core/src/domain/serializers/ket/toKet/prepare.ts b/packages/ketcher-core/src/domain/serializers/ket/toKet/prepare.ts index 2ac3cca15e..d3a83848e4 100644 --- a/packages/ketcher-core/src/domain/serializers/ket/toKet/prepare.ts +++ b/packages/ketcher-core/src/domain/serializers/ket/toKet/prepare.ts @@ -13,11 +13,17 @@ * See the License for the specific language governing permissions and * limitations under the License. ***************************************************************************/ -import { Pile, SGroup, Struct, Vec2 } from 'domain/entities' +import { Pile, Pool, SGroup, Struct, Vec2 } from 'domain/entities' + +type KetNode = { + type: string + fragment?: Struct + center: Vec2 + data?: any +} export function prepareStructForKet(struct: Struct) { - const ketNodes: any = [] - const atomIdMap: Map = initAtomIdMap(struct) + const ketNodes: KetNode[] = [] const rgFrags = new Set() // skip this when writing molecules for (const [rgnumber, rgroup] of struct.rgroups.entries()) { @@ -37,16 +43,10 @@ export function prepareStructForKet(struct: Struct) { }) } - Array.from(struct.frags.keys()) - .filter((fid) => !rgFrags.has(fid)) - .forEach((fid) => { - const fragAtoms = struct.getFragmentIds(fid) - ketNodes.push({ - type: 'molecule', - fragment: struct.clone(fragAtoms), - center: getFragmentCenter(struct, fragAtoms) - }) - }) + const filteredFragmentIds = Array.from(struct.frags.keys()).filter( + (fid) => !rgFrags.has(fid) + ) + addMolecules(ketNodes, filteredFragmentIds, struct) struct.rxnArrows.forEach((item) => { ketNodes.push({ @@ -91,20 +91,13 @@ export function prepareStructForKet(struct: Struct) { }) }) - struct.sgroups.forEach((sgroup) => { - ketNodes.push({ - type: 'sgroup', - data: SGroup.clone(sgroup, atomIdMap) - }) - }) - ketNodes.forEach((ketNode) => { if (ketNode.fragment) { const sgroups: SGroup[] = Array.from(ketNode.fragment.sgroups.values()) const filteredSGroups = sgroups.filter((sg: SGroup) => sg.atoms.every((atom) => atom !== undefined) ) - const filteredSGroupsMap = new Map() + const filteredSGroupsMap = new Pool() filteredSGroups.forEach((sg, index) => { filteredSGroupsMap.set(index, sg) }) @@ -122,14 +115,81 @@ function getFragmentCenter(struct, atomSet) { return Vec2.centre(bb.min, bb.max) } -function initAtomIdMap(struct: Struct): Map { - const atomIdMap = new Map() +/** + * Merge fragments which are in the same S-Groups to one fragment(molecule) + * and add new fragments(molecules) to KetNodes + * See: https://github.com/epam/ketcher/issues/2142 + */ +function addMolecules( + ketNodes: KetNode[], + fragmentIds: number[], + struct: Struct +) { + const sGroupFragmentsMap = generateSGroupFragmentsMap( + ketNodes, + fragmentIds, + struct + ) + const mergedFragments = Pile.unionIntersections( + Array.from(sGroupFragmentsMap.values()) + ) + + mergedFragments.forEach((fragments) => { + let atomSet = new Pile() + fragments.forEach((fragmentId) => { + atomSet = atomSet.union(struct.getFragmentIds(fragmentId)) + }) + ketNodes.push({ + type: 'molecule', + fragment: struct.clone(atomSet), + center: getFragmentCenter(struct, atomSet) + }) + }) +} + +/** + * @example Give `fragmentIds` is `[0, 1]`, + * and S-Group0 includes fragment0 and fragment1, + * and S-Group1 includes fragment1, + * then return value should be + * ``` + * { + * 0: [0, 1], + * 1: [1] + * } + * ``` + */ +function generateSGroupFragmentsMap( + ketNodes: KetNode[], + fragmentIds: number[], + struct: Struct +) { + const sGroupFragmentsMap = new Map>() + + fragmentIds.forEach((fragmentId) => { + const atomsInFragment = struct.getFragmentIds(fragmentId) + + let hasAtomInSGroup = false + atomsInFragment.forEach((atomId) => { + struct.atoms.get(atomId)?.sgs.forEach((sGroupId) => { + hasAtomInSGroup = true + const fragmentSet = sGroupFragmentsMap.get(sGroupId) + if (fragmentSet) { + fragmentSet.add(fragmentId) + } else { + sGroupFragmentsMap.set(sGroupId, new Pile([fragmentId])) + } + }) + }) - let index = 0 - struct.atoms.forEach((_value, key) => { - atomIdMap.set(key, index) - index++ + if (!hasAtomInSGroup) { + ketNodes.push({ + type: 'molecule', + fragment: struct.clone(atomsInFragment), + center: getFragmentCenter(struct, atomsInFragment) + }) + } }) - return atomIdMap + return sGroupFragmentsMap } diff --git a/packages/ketcher-standalone/package.json b/packages/ketcher-standalone/package.json index 6b5789947a..d20a933701 100644 --- a/packages/ketcher-standalone/package.json +++ b/packages/ketcher-standalone/package.json @@ -39,7 +39,7 @@ }, "dependencies": { "@babel/runtime": "^7.17.9", - "indigo-ketcher": "1.10.0-rc.2", + "indigo-ketcher": "1.10.0-rc.3", "ketcher-core": "workspace:*" }, "devDependencies": { diff --git a/yarn.lock b/yarn.lock index 78ed375de6..5ff3530582 100644 --- a/yarn.lock +++ b/yarn.lock @@ -9450,12 +9450,12 @@ __metadata: languageName: node linkType: hard -"indigo-ketcher@npm:1.10.0-rc.2": - version: 1.10.0-rc.2 - resolution: "indigo-ketcher@npm:1.10.0-rc.2" +"indigo-ketcher@npm:1.10.0-rc.3": + version: 1.10.0-rc.3 + resolution: "indigo-ketcher@npm:1.10.0-rc.3" dependencies: looks-same: ^8.1.0 - checksum: 2625c64d81b452308e68fe1e578bdd68d1b306e7d37bd35f669f89a546a31a2ceab8f510c27181e2a51b68968745b9bb20600a1cd6d6f79099a37e4b6ab7ff1a + checksum: 00bad7a075afb0ce3984a390c0c15e83fb1423e4daa03eaa3e3b4abcfc4fcc04777fe03c9779ef5d7e4970af03ed23f4de19aee6b9283fad0d7bdb2da21504de languageName: node linkType: hard @@ -11652,7 +11652,7 @@ __metadata: cross-env: ^7.0.3 eslint: ^8.4.1 eslint-plugin-jest: ^25.3.0 - indigo-ketcher: 1.10.0-rc.2 + indigo-ketcher: 1.10.0-rc.3 jest: 26.6.0 ketcher-core: "workspace:*" npm-run-all: ^4.1.5