diff --git a/pyproject.toml b/pyproject.toml index 6660e4b..85bd1a7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -74,7 +74,7 @@ select = [ "UP", # isort "I", - # pylint + # pylint "PL", ] @@ -108,7 +108,6 @@ package-dir = { "" = "src" } where = ["src"] [project.entry-points.'nomad.plugin'] -myparser = "nomad_tadf_molecules.parsers:myparser" -mypackage = "nomad_tadf_molecules.schema_packages:mypackage" - -myapp = "nomad_tadf_molecules.apps:myapp" +parser = "nomad_tadf_molecules.parsers:tadf_molecules" +package = "nomad_tadf_molecules.schema_packages:tadf_molecules" +app = "nomad_tadf_molecules.apps:tadf_molecules" diff --git a/src/nomad_tadf_molecules/apps/__init__.py b/src/nomad_tadf_molecules/apps/__init__.py index cfbc588..7d8a0e6 100644 --- a/src/nomad_tadf_molecules/apps/__init__.py +++ b/src/nomad_tadf_molecules/apps/__init__.py @@ -1,24 +1,9 @@ from nomad.config.models.plugins import AppEntryPoint -from nomad.config.models.ui import App, Column, Columns, FilterMenu, FilterMenus +from nomad_tadf_molecules.apps.tadf_molecules import tadf_molecules_app -myapp = AppEntryPoint( - name='MyApp', - description='App defined using the new plugin mechanism.', - app=App( - label='MyApp', - path='myapp', - category='simulation', - columns=Columns( - selected=['entry_id'], - options={ - 'entry_id': Column(), - }, - ), - filter_menus=FilterMenus( - options={ - 'material': FilterMenu(label='Material'), - } - ), - ), +tadf_molecules = AppEntryPoint( + name='TADF Molecules', + description='Search information about thermally activated delayed fluorescent (TADF) molecules.', + app=tadf_molecules_app, ) diff --git a/src/nomad_tadf_molecules/apps/tadf_molecules.py b/src/nomad_tadf_molecules/apps/tadf_molecules.py new file mode 100644 index 0000000..81e8b6c --- /dev/null +++ b/src/nomad_tadf_molecules/apps/tadf_molecules.py @@ -0,0 +1,88 @@ +from nomad.config.models.ui import ( + App, + Axis, + Column, + Columns, + Dashboard, + FilterMenu, + FilterMenus, + Filters, + Layout, + WidgetPeriodicTable, + WidgetHistogram, + WidgetScatterPlot, + WidgetTerms, +) + +schema = 'nomad_tadf_molecules.schema_packages.tadf.TADFMolecule' +tadf_molecules_app = App( + label='Fluorescent Molecules', + path='fluorescent-molecules', + description='Search for fluorescent molecule data', + category='Experiment', + filters=Filters(include=[f'*#{schema}'],), + filters_locked={'section_defs.definition_qualified_name': schema}, + columns=Columns( + selected=[ + 'results.material.chemical_formula_hill', + f'data.photoluminescence_quantum_yield#{schema}', + f'data.peak_emission_wavelength#{schema}', + f'data.delayed_lifetime#{schema}', + f'data.singlet_triplet_energy_splitting#{schema}', + f'data.DOI_number#{schema}', + ], + options={ + 'results.material.chemical_formula_hill': Column(), + f'data.photoluminescence_quantum_yield#{schema}': Column(), + f'data.peak_emission_wavelength#{schema}': Column(), + f'data.delayed_lifetime#{schema}': Column(), + f'data.singlet_triplet_energy_splitting#{schema}': Column(), + f'data.DOI_number#{schema}': Column(), + } + ), + filter_menus=FilterMenus( + options={ + 'material': FilterMenu(label='Material'), + 'elements': FilterMenu(label='Elements / Formula', level=1, size='xl'), + 'custom_quantities': FilterMenu(label='Custom Quantities', size='l'), + } + ), + dashboard=Dashboard( + widgets=[ + WidgetPeriodicTable( + type='periodictable', + scale='linear', + quantity='results.material.elements', + layout={'lg': Layout(w=11, h=7, x=0, y=0)}, + ), + WidgetScatterPlot( + type='scatterplot', + layout={'lg':Layout(w=7, h=7, x=11, y=0)}, + x=Axis(quantity=f'data.peak_emission_wavelength#{schema}', unit='nm'), + y=Axis(quantity=f'data.photoluminescence_quantum_yield#{schema}'), + ), + WidgetTerms( + type='terms', + layout={'lg': Layout(w=6, h=7, x=18, y=0)}, + quantity='results.material.chemical_fomula_fill', + scale='linear', + ), + WidgetHistogram( + type='histogram', + layout={'lg': Layout(w=12, h=4, x=12, y=7)}, + autorange=True, + nbins=30, + scale='linear', + quantity=f'data.delayed_lifetime#{schema}', + ), + WidgetHistogram( + type='histogram', + layout={'lg': Layout(w=12, h=4, x=12, y=7)}, + autorange=True, + nbins=30, + scale='linear', + quantity=f'data.singlet_triplet_energy_splitting#{schema}', + ) + ] + ) +) diff --git a/src/nomad_tadf_molecules/parsers/__init__.py b/src/nomad_tadf_molecules/parsers/__init__.py index b44c607..773035c 100644 --- a/src/nomad_tadf_molecules/parsers/__init__.py +++ b/src/nomad_tadf_molecules/parsers/__init__.py @@ -1,18 +1,15 @@ from nomad.config.models.plugins import ParserEntryPoint -from pydantic import Field -class MyParserEntryPoint(ParserEntryPoint): - parameter: int = Field(0, description='Custom configuration parameter') - +class TADFMoleculesParserEntryPoint(ParserEntryPoint): def load(self): - from nomad_tadf_molecules.parsers.myparser import MyParser + from nomad_tadf_molecules.parsers.tadf_molecules import TADFMoleculesParser - return MyParser(**self.dict()) + return TADFMoleculesParser(**self.dict()) -myparser = MyParserEntryPoint( - name='MyParser', - description='Parser defined using the new plugin mechanism.', - mainfile_name_re='.*\.myparser', +tadf_molecules = TADFMoleculesParserEntryPoint( + name='TADFMoleculesParser', + description='Used to parse information about thermally activated fluorescent molecules from JSON files.', + mainfile_name_re='.*molecule\d+.json', ) diff --git a/src/nomad_tadf_molecules/parsers/myparser.py b/src/nomad_tadf_molecules/parsers/myparser.py deleted file mode 100644 index f25dbb8..0000000 --- a/src/nomad_tadf_molecules/parsers/myparser.py +++ /dev/null @@ -1,31 +0,0 @@ -from typing import ( - TYPE_CHECKING, -) - -if TYPE_CHECKING: - from nomad.datamodel.datamodel import ( - EntryArchive, - ) - from structlog.stdlib import ( - BoundLogger, - ) - -from nomad.config import config -from nomad.datamodel.results import Material, Results -from nomad.parsing.parser import MatchingParser - -configuration = config.get_plugin_entry_point( - 'nomad_tadf_molecules.parsers:myparser' -) - - -class MyParser(MatchingParser): - def parse( - self, - mainfile: str, - archive: 'EntryArchive', - logger: 'BoundLogger', - child_archives: dict[str, 'EntryArchive'] = None, - ) -> None: - logger.info('MyParser.parse', parameter=configuration.parameter) - archive.results = Results(material=Material(elements=['H', 'O'])) diff --git a/src/nomad_tadf_molecules/parsers/tadf_molecules.py b/src/nomad_tadf_molecules/parsers/tadf_molecules.py new file mode 100644 index 0000000..1803e70 --- /dev/null +++ b/src/nomad_tadf_molecules/parsers/tadf_molecules.py @@ -0,0 +1,41 @@ +import json + +from nomad.datamodel.datamodel import EntryArchive +from nomad.parsing.parser import MatchingParser +from nomad.units import ureg +from structlog.stdlib import BoundLogger + +from nomad_tadf_molecules.schema_packages.tadf_molecules import TADFMolecule + + +class TADFMoleculesParser(MatchingParser): + def parse( + self, + mainfile: str, + archive: EntryArchive, + logger: BoundLogger, + ) -> None: + # Extract file contents + with open(mainfile) as file: + raw = json.load(file) + + # Fill information about the chemical composition + schema_instance = TADFMolecule() + schema_instance.DOI_number = raw['doi'] + schema_instance.name = raw['abbreviated_name'] + schema_instance.iupac_name = raw['iupac_name'] + schema_instance.smile = raw['smiles'] + + # Extract the four measured properties + for name in [ + 'photoluminescence_quantum_yield', + 'peak_emission_wavelength', + 'delayed_lifetime', + 'singlet_triplet_energy_splitting', + ]: + value = raw.get(f'{name}_value') + if value is not None: + setattr(schema_instance, name, value * ureg(raw[f'{name}_unit'])) + + # Save schema instance into archive.data + archive.data = schema_instance diff --git a/src/nomad_tadf_molecules/schema_packages/__init__.py b/src/nomad_tadf_molecules/schema_packages/__init__.py index 2f6e95e..87e1fb0 100644 --- a/src/nomad_tadf_molecules/schema_packages/__init__.py +++ b/src/nomad_tadf_molecules/schema_packages/__init__.py @@ -1,17 +1,14 @@ from nomad.config.models.plugins import SchemaPackageEntryPoint -from pydantic import Field -class MySchemaPackageEntryPoint(SchemaPackageEntryPoint): - parameter: int = Field(0, description='Custom configuration parameter') - +class TADFMoleculesSchemaPackageEntryPoint(SchemaPackageEntryPoint): def load(self): - from nomad_tadf_molecules.schema_packages.mypackage import m_package - + from nomad_tadf_molecules.schema_packages.tadf_molecules import m_package + return m_package -mypackage = MySchemaPackageEntryPoint( - name='MyPackage', - description='Schema package defined using the new plugin mechanism.', +tadf_molecules = TADFMoleculesSchemaPackageEntryPoint( + name='TADF molecules', + description='Schema package for thermally activated delayed fluorescent (TADF) molecules.', ) diff --git a/src/nomad_tadf_molecules/schema_packages/mypackage.py b/src/nomad_tadf_molecules/schema_packages/mypackage.py deleted file mode 100644 index 80a23d4..0000000 --- a/src/nomad_tadf_molecules/schema_packages/mypackage.py +++ /dev/null @@ -1,38 +0,0 @@ -from typing import ( - TYPE_CHECKING, -) - -if TYPE_CHECKING: - from nomad.datamodel.datamodel import ( - EntryArchive, - ) - from structlog.stdlib import ( - BoundLogger, - ) - -from nomad.config import config -from nomad.datamodel.data import Schema -from nomad.datamodel.metainfo.annotations import ELNAnnotation, ELNComponentEnum -from nomad.metainfo import Quantity, SchemaPackage - -configuration = config.get_plugin_entry_point( - 'nomad_tadf_molecules.schema_packages:mypackage' -) - -m_package = SchemaPackage() - - -class MySchema(Schema): - name = Quantity( - type=str, a_eln=ELNAnnotation(component=ELNComponentEnum.StringEditQuantity) - ) - message = Quantity(type=str) - - def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None: - super().normalize(archive, logger) - - logger.info('MySchema.normalize', parameter=configuration.parameter) - self.message = f'Hello {self.name}!' - - -m_package.__init_metainfo__() diff --git a/src/nomad_tadf_molecules/schema_packages/tadf_molecules.py b/src/nomad_tadf_molecules/schema_packages/tadf_molecules.py new file mode 100644 index 0000000..80217f4 --- /dev/null +++ b/src/nomad_tadf_molecules/schema_packages/tadf_molecules.py @@ -0,0 +1,110 @@ +import numpy as np +from ase import Atoms +from nomad.atomutils import Formula +from nomad.datamodel.data import Schema +from nomad.datamodel.metainfo.basesections import ( + PublicationReference, + PureSubstanceSection, +) +from nomad.datamodel.results import Material, System +from nomad.datamodel.metainfo.annotations import ELNAnnotation +from nomad.metainfo import Quantity, SchemaPackage +from nomad.normalizing.common import nomad_atoms_from_ase_atoms +from nomad.normalizing.topology import add_system_info, add_system +from rdkit import Chem +from rdkit.Chem import AllChem + + +m_package = SchemaPackage() + +class TADFMolecule(Schema, PureSubstanceSection, PublicationReference): + ''' + A schema describing a thermally activated delayed fluorescent molecule with + information extracted from the literature. + ''' + photoluminescence_quantum_yield = Quantity( + type=np.float64, + description=''' + The photoluminescence quantum yield defined as the ratio of the number of photons + emitted to the number of photons absorbed. + ''', + a_eln=ELNAnnotation( + component='NumberEditQuantity', + ) + ) + peak_emission_wavelength = Quantity( + type=np.float64, + unit='nanometer', + description='The wavelength at which the emission intensity is at a maximum.', + a_eln=ELNAnnotation( + component='NumberEditQuantity', + ) + ) + delayed_lifetime = Quantity( + type=np.float64, + unit='microsecond', + description=''' + The time interval between the absorption of photons (excitation) and the emission + of light (fluorescence). + ''', + a_eln=ELNAnnotation( + component='NumberEditQuantity', + ) + ) + singlet_triplet_energy_splitting = Quantity( + type=np.float64, + unit='electron_volt', + description='Difference in the singlet and triplet state energy levels.', + a_eln=ELNAnnotation( + component='NumberEditQuantity', + ) + ) + + def normalize(self, archive, logger: None) -> None: + # Here you can trigger base class normalization + super().normalize(archive, logger) + + # Here we can trigger our own normalization + if self.smile: + # Convert InChi to RDkit molecule + rdkit_mol = Chem.MolFromSmiles(self.smile) + + # Add hydrogens, store molecule formula and mass + rdkit_mol = Chem.AddHs(rdkit_mol) + self.molecular_formula = Chem.rdMolDescriptors.CalcMolFormula(rdkit_mol) + self.molecular_mass = Chem.rdMolDescriptors.CalcExactMolWt(rdkit_mol) + + # Embed the molecule in 3D space and optimize its structure + AllChem.EmbedMolecule(rdkit_mol) + AllChem.MMFFOptimizeMolecule(rdkit_mol) + + # Let's save the composition and structure into archive.results.material + if not archive.results.material: + archive.results.material = Material() + formula = Formula(self.molecular_formula) + formula.populate(archive.results.material) + + # Convert the RDKit molecule to an ASE atoms object + positions = rdkit_mol.GetConformer().GetPositions() + atomic_numbers = [atom.GetAtomicNum() for atom in rdkit_mol.GetAtoms()] + ase_atoms = Atoms(numbers=atomic_numbers, positions=positions) + + # Create a System: this is a NOMAD specific data structure for storing structural + # and chemical information that is suitable for both experiments and simulations. + system = System( + atoms=nomad_atoms_from_ase_atoms(ase_atoms), + label='Molecule reconstruction', + description='3D reconstruction of the molecule generated from SMILES.', + structural_type='molecule', + dimensionality='0D', + ) + + # archive.results.topology can used to represent relations between systems. + # E.g. "System A is part of System B". In our case there is only a single system. + topology = {} + add_system_info(system, topology) + add_system(system, topology) + archive.results.material.topology = list(topology.values()) + + +m_package.__init_metainfo__()