diff --git a/.gitignore b/.gitignore index 756f9b9..ef54fa2 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,12 @@ **/*.egg-info/ **/*~ src/pysonata/sonata/tests/tmp +src/pysonata/build +src/pysonata/dist +src/pysonata/.pytest_cache +**/mechanisms/x86_64 +examples/**/output/log.txt +examples/**/output/config.json .idea .cache -**/mechanisms/x86_64 + diff --git a/docs/SONATA_DEVELOPER_GUIDE.md b/docs/SONATA_DEVELOPER_GUIDE.md index 8037e08..c7a2b5a 100644 --- a/docs/SONATA_DEVELOPER_GUIDE.md +++ b/docs/SONATA_DEVELOPER_GUIDE.md @@ -113,10 +113,7 @@ The format used is SWC ( [http://www.neuronland.org/NLMorphologyConverter/Morpho - -* Point soma, interpreted as a sphere with radius from column 6 (**Cylindrical somas are not used**) - - Point soma will be interpreted in NEURON as cylinder with L=radius +* Single sample point soma, interpreted as a sphere with radius from column 6 (**Cylindrical somas are not used**). * Types: The following types are currently used. @@ -157,7 +154,12 @@ It is recommended, but not required, that morphologies in a network have a stand ### Representing ion channel, point neuron and synapse models -To represent point neuron models, synapses and ion channels NEURON MOD files are used. Models provided as standard by NEURON are also valid, such as ExpSyn, IntFire1. +Representation of point neuron models, synapses and ion channels depends on +the target simulator. + +For NEURON, NEURON MOD files are used. Models provided as standard by NEURON +are also valid, such as ExpSyn, IntFire1. For NEST and PyNN, the names of +built-in/standard point neuron and synapse models are used. To support reproducible random numbers in NEURON, it is required to define conventions for the configuration of random number generators, and how they are assigned to channel models, synapses random number generators. To this end, NEURON mechanisms should follow idioms in the MOD files, so that a uniform and automated approach to random number configuration can be employed. Such an approach was out of scope of the current format specification, and will be the subject of future versions. @@ -401,7 +403,7 @@ The details of how node and edge populations are defined and represented are des In general, populations of neurons are heterogeneous in the types of cell models describing each node, implying heterogeneousequations and sets of parameters. We define a node group as a set of nodes with a homogeneous parameter namespace implying a uniform tabular layout. A population is defined as the union of one or more groups, which need not have uniform tabular layout among them, and further defines some indexing datasets. A population provides then a uniform view on a collection of nodes which have heterogeneous parameterization namespaces. -A model_type attribute allows nodes to be configured as "biophysical", “point_soma”, etc. and also “virtual” one may be provided to specify external (or “virtual”) nodes that are not explicitly simulated but provide inputs to the network. +A model_type attribute allows nodes to be configured as `biophysical`, `point_neuron`, etc. and also `virtual` one may be provided to specify external (or `virtual`) nodes that are not explicitly simulated but provide inputs to the network. A typical network may include multiple simulated populations as well as multiple populations of external input nodes. @@ -486,7 +488,6 @@ The HDF5 nodes file layout is designed to store multiple named populations that - Table 1: The Layout of the HDF5 file format for describing nodes. ##### Nodes - Required Attributes @@ -497,31 +498,40 @@ Table 1: The Layout of the HDF5 file format for describing nodes. **node_type_id** - This is a unique integer for every node type used to associate a node to a node type. A node type has associated attributes, and a node inherits attributes from its node type. Attributes associated with a node override attributes inherited from the node type CSV. node_type_id is a unique integer associated with each node type and is used to index all the node type properties associated with a given node with known node_type_id. These need not be ordered or contiguous, but must be unique. -**model_type** - Has four valid values: , "biophysical"”,“virtual”, “single_compartment”, “point_process”. In the future, more model_types may be defined. The meaning of each of these model types is as follows. +**model_type** - Has 4 valid values: `biophysical`, `virtual`, `single_compartment`, and `point_neuron`. In the future, more `model_types` may be defined. The meaning of each of these model types is as follows. + +The `model_type`=`single_compartment` is a single-compartment model of a neuron. A cylindrical compartment is created with length equal to diameter, and the diameter being defined by an additional expected dynamics_param “D”, which if not specified defaults to 1 micron. The voltage of the neuron is defined by the voltage of the compartment. Further, the passive mechanism is inserted, and the additional mechanism named in the “model_template” required attribute. Note that a single compartment of length = diameter has the same effective area as that of a sphere of the same diameter (see [NEURON documentation](https://www.neuron.yale.edu/neuron/static/docs/help/neuron/neuron/geometry.html)). -The model_type=*"single_compartment"”* is a single-compartment model of a neuron. A cylindrical compartment is created with length equal to diameter, and the diameter being defined by an additional expected dynamics_param “D”, which if not specified defaults to 1 micron. The voltage of the neuron is defined by the voltage of the compartment. Further, the passive mechanism is inserted, and the additional mechanism named in the “model_template” required attribute. Note that a single compartment of length = diameter has the same effective area as that of a sphere of the same diameter (see [NEURON documentation](https://www.neuron.yale.edu/neuron/static/docs/help/neuron/neuron/geometry.html)). +The `model_type`=`point_neuron` results in a point process neuron. The actual +model type is defined by the `model_template` required attribute. In NEURON the +the `model_template` will point to an NMOD file. For the NEST and PyNN, +`model_template` will provide the name of a built-in neuron model. See below +for details. -The model_type=*"point_process"* results in a point process neuron, i.e. a NEURON artificial cell. The point process model is named in the “model_template” required attribute. +The `model_type`=`virtual` results in a placeholder neuron, which is not otherwise simulated, but can be the source of spikes which result in post-synaptic events. -The model_type=*"virtual"* results in a placeholder neuron, which is not otherwise simulated, but can be the source of spikes which result in post-synaptic events. +The `model_type`=`biophysical` results in a compartmental neuron. The attribute **morphology** must be defined, either via the node or node_type. -The model_type=*"biophysical"* results in a compartmental neuron. The attribute **morphology** must be defined, either via the node or node_type. **model_template** - Used to reference a template or class describing the electrophysical properties and mechanisms of the node(s). Its value and interpretation is context-dependent on the corresponding ‘model_type’. When there is no applicable model template for a given model type (i.e. model_type=virtual) it is assigned a value of NULL. Otherwise it uses a colon-separated string-pair with the following syntax: -**:** +*`schema`*:*`resource`* -** is a keyword used to identify the type of template being specified. Reserved options include: +*`schema`* is a keyword used to identify the type of template being specified. Reserved options include: -nml: Template is described by a NeuroML file. Valid for biophysical model types. +- **nml**: Template is described by a NeuroML file. Valid for biophysical model types. -actdb: Template is described using a pre-generated hoc template specifically designed to run AIBS cell type models. Valid for both biophysical and single_compartment model types +- **ctdb**: Template is described using a pre-generated hoc template specifically designed to run AIBS cell type models. Valid for both biophysical and single_compartment model types. -hoc: Template is described using a customized hoc file. Valid for both biophysical and single_compartmentmodel types +- **hoc**: Template is described using a customized hoc file. Valid for both biophysical and single_compartment model types. -nrn: Valid for both point_process and single_compartment model types. For a point_process model type, should specify the name of NEURON simulator point_process (i.e. IntFire1, IntFire2). For a single_compartment type, should specify the name of the mechanism to insert. +- **nrn**: Valid for both `point_neuron` and `single_compartment` model types. For a `point_neuron` model type, should specify the name of NEURON simulator `point_neuron` (i.e. IntFire1, IntFire2). For a `single_compartment` type, should specify the name of the mechanism to insert. -** is a reference to the template file-name or class. For file names if a full-path or url is not specified the interpreter is expected to use the "components" in the config file to find the full path (see below). +- **nest**: Indicates that the model is a built-in NEST model. + +- **pynn**: Indicates that the model is a standard PyNN model. + +*`resource`* is a reference to the template file-name or class. For file names if a full-path or url is not specified the interpreter is expected to use the "components" in the config file to find the full path (see below). **node_group_id** - Assigns each node to a specific group of nodes. @@ -547,78 +557,21 @@ Each morphology is first moved from its original coordinates in SWC to such a lo **recenter** [INT8] - Optional reserved attribute, if the value is set to 0, morphology would _not_ be moved to (0, 0, 0) prior to rotation / translation. -**dynamics_params** - Define parameter overrides for nodes. This attribute can exist in the node_types.csv file in which case a .json file is referenced, which should contain a dictionary of keys and values. A key should be a valid name in the namespace of parameters of the model, and the value specifies the assigned parameter override. Alternatively, dynamics_params overrides can be specified for each individual node in a group, in the corresponding nodes HDF5. In this case, a dynamics_params HDF5 group contains datasets named according to the parameter of the model to override in the namespace of parameters of the model (see Table 1). The length of such datasets is the number of nodes in the group. +**dynamics_params** - Define parameter overrides for nodes. This attribute can exist in the node_types.csv file in which case a .json file is referenced, which should contain a dictionary of keys and values. A key should be a valid name in the namespace of parameters of the model, and the value specifies the assigned parameter override. Alternatively, dynamics_params overrides can be specified for each individual node in a group in the corresponding H5 dataset. In this case, a dynamics_params HDF5 group contains datasets named according to the parameter of the model to override in the namespace of parameters of the model (see Table 1). The length of such datasets is the number of nodes in the group. Note that if an override is defined for a given name in both the nodes HDF5 file and the nodes_types CSV file, then the HDF5 file will override the latter. The namespace of parameters depends on model_type, and are defined as follows. -**For point_soma models**, it is the namespace of the NEURON Section containing the "pas" and user requested soma mechanism. - -**For point_process models**, it is the namespace of the point_process/artificial cell mechanism. - -**For biophysical models** defined according to the *nml *schema (see above), names take the form ".", where is the id of an element and an attribute of said element in the nml file defining the biophysical model. For example “g_pas_apic.erev” refers to the “erev” attribute of the “g_pas_apic” element of the nml biophysics block defining the channel composition of the model. It is worth noting that namespaces defined in this way apply equally to dynamics_params overrides at the node_types and node levels for all model types. - -**For biophysical models** defined according to the *bmtk *(see above), the namespace definition is to be filled in by the Allen folks. +**For `single_compartment` models**, it is the namespace of the NEURON Section containing the "pas" and user requested soma mechanism. -**For biophysical models** defined according to the *hoc *(see above), the namespace definition is to be filled in by the Allen folks. +**For `point_neuron` models**, it is the namespace of the point neuron model. -For a conceptual schematic of the architecture relating node attributes *model_type*,* model*, and *dynamics_params* overrides and their namespaces at the node_type and nodes level, see Table 2. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Model Type
point_processpoint_somabiophysical
Parameter -OverrideNode -leveldynamics_params HDF5 group
Node_type -leveldynamics_params .json
Model -ObjectParameter -Namespacepoint processSection ∪ pas ∪ -user mechanism. -∈ .nml file
Modelpoint process namemechanism name.nml file
+**For `biophysical` models** defined according to the *nml* schema (see above), names take the form **id**.**attribute**, where **id** is the id of an element and **attribute** an attribute of said element in the nml file defining the biophysical model. For example “g_pas_apic.erev” refers to the “erev” attribute of the “g_pas_apic” element of the nml biophysics block defining the channel composition of the model. It is worth noting that namespaces defined in this way apply equally to dynamics_params overrides at the node_types and node levels for all model types. +For `biophysical` models defined according to the *bmtk* (see above), the namespace definition is to be filled in by the Allen folks. -Table 2: A conceptual schematic of the architecture relating node attributes* model*, and *dynamics_params* overrides at the node_type and nodes level for each model_type. The namespace of both node_type and node level parameter overrides are given uniquely by the model object parameter namespace. Node level parameter overrides take precedence over node_type level parameter overrides. +For `biophysical` models defined according to the *hoc* (see above), the namespace definition is to be filled in by the Allen folks. ####
Representing Edges @@ -758,7 +711,7 @@ The edge populations as defined above only allow fast enumeration of the edges f The indexing has been designed for networks where if two nodes are connected, they tend to have multiple edges connecting them (multi-synapse connections in detailed morphology networks). For single edge networks this index has excessive overhead. For maximum space and time efficiency, edges connecting two nodes should be contiguous in the edge population. In any case, the index allows edges between different nodes to be placed in any order in the population datasets. -The indexing is rooted at a single group called indices which is a subgroup of an edge population group. For convenience, the prefix /edges/**/indices/ is stripped from the following layout description: +The indexing is rooted at a single group called indices which is a subgroup of an edge population group. For convenience, the prefix /edges/**population_name**/indices/ is stripped from the following layout description: @@ -911,7 +864,7 @@ overwritten. The default behaviour is for simulators to produce spike data (a series of gid, timestamp pairs). By default the name of the file is "spikes.h5" and it -is written to . The name of the output file for spikes can be +is written to "output_dir". The name of the output file for spikes can be configured with the optional attribute "spikes_file" (using a relative or absolute path in spikes_file has undefined behaviour) @@ -1401,7 +1354,7 @@ See CodeJam talks on this topic: model_processing is a string attribute of nodes allowing the specification of alternative -processing approaches in the model construction behaviour of biophysical neurons. The following values are currently defined (more will be defined in the future, as required). It is currently not valid to specify this attribute for model_type != "biophysical”. +processing approaches in the model construction behaviour of biophysical neurons. The following values are currently defined (more will be defined in the future, as required). It is currently not valid to specify this attribute for model_type != `biophysical`. For model_processing=*"fullaxon",* the biophysical neuron will construct and simulate the full axon. This is the default behaviour if model_processing is undefined for a given node. diff --git a/examples/300_intfire/build_network.py b/examples/300_intfire/build_network.py index dd6341f..30692a3 100644 --- a/examples/300_intfire/build_network.py +++ b/examples/300_intfire/build_network.py @@ -7,12 +7,12 @@ # Step 1: Create a v1 mock network of 14 cells (nodes) with across 7 different cell "types" net = NetworkBuilder("v1") -net.add_nodes(N=240, pop_name='LIF_exc', location='VisL4', ei='e', +net.add_nodes(N=240, model_name='LIF_exc', location='VisL4', ei='e', model_type='point_process', # use point_process to indicate were are using point model cells model_template='nrn:IntFire1', # Tell the simulator to use the NEURON built-in IntFire1 type cell dynamics_params='IntFire1_exc_1.json') -net.add_nodes(N=60, pop_name='LIF_inh', location='VisL4', ei='i', +net.add_nodes(N=60, model_name='LIF_inh', location='VisL4', ei='i', model_type='point_process', model_template='nrn:IntFire1', dynamics_params='IntFire1_inh_1.json') @@ -46,7 +46,7 @@ def recurrent_connections(src_cells, trg_cell, n_syns): dynamics_params='instanteneousInh.json') -net.add_edges(source={'ei': 'e'}, target={'pop_name': 'LIF_inh'}, +net.add_edges(source={'ei': 'e'}, target={'model_name': 'LIF_inh'}, iterator='all_to_one', connection_rule=recurrent_connections, connection_params={'n_syns': 10}, @@ -56,7 +56,7 @@ def recurrent_connections(src_cells, trg_cell, n_syns): dynamics_params='instanteneousExc.json') -net.add_edges(source={'ei': 'e'}, target={'pop_name': 'LIF_exc'}, +net.add_edges(source={'ei': 'e'}, target={'model_name': 'LIF_exc'}, iterator='all_to_one', connection_rule=recurrent_connections, connection_params={'n_syns': 10}, @@ -78,33 +78,33 @@ def generate_positions(N, x0=0.0, x1=300.0, y0=0.0, y1=100.0): def select_source_cells(src_cells, trg_cell, n_syns): if np.random.random() > 0.1: - synapses = [n_syns if src['pop_name'] == 'tON' or src['pop_name'] == 'tOFF' else 0 for src in src_cells] + synapses = [n_syns if src['model_name'] == 'tON' or src['model_name'] == 'tOFF' else 0 for src in src_cells] else: - synapses = [n_syns if src['pop_name'] == 'tONOFF' else 0 for src in src_cells] + synapses = [n_syns if src['model_name'] == 'tONOFF' else 0 for src in src_cells] return synapses lgn = NetworkBuilder("lgn") pos_x, pos_y = generate_positions(30) -lgn.add_nodes(N=30, pop_name='tON', ei='e', location='LGN', +lgn.add_nodes(N=30, model_name='tON', ei='e', location='LGN', x=pos_x, y=pos_y, model_type='virtual') pos_x, pos_y = generate_positions(30) -lgn.add_nodes(N=30, pop_name='tOFF', ei='e', location='LGN', +lgn.add_nodes(N=30, model_name='tOFF', ei='e', location='LGN', x=pos_x, y=pos_y, model_type='virtual') pos_x, pos_y = generate_positions(30) -lgn.add_nodes(N=30, pop_name='tONOFF', ei='e', location='LGN', +lgn.add_nodes(N=30, model_name='tONOFF', ei='e', location='LGN', x=pos_x, y=pos_y, model_type='virtual') -lgn.add_edges(source=lgn.nodes(), target=net.nodes(pop_name='LIF_exc'), +lgn.add_edges(source=lgn.nodes(), target=net.nodes(model_name='LIF_exc'), iterator='all_to_one', connection_rule=select_source_cells, connection_params={'n_syns': 10}, @@ -113,7 +113,7 @@ def select_source_cells(src_cells, trg_cell, n_syns): delay=2.0, dynamics_params='instanteneousExc.json') -lgn.add_edges(source=lgn.nodes(), target=net.nodes(pop_name='LIF_inh'), +lgn.add_edges(source=lgn.nodes(), target=net.nodes(model_name='LIF_inh'), iterator='all_to_one', connection_rule=select_source_cells, connection_params={'n_syns': 10}, @@ -127,16 +127,16 @@ def select_source_cells(src_cells, trg_cell, n_syns): tw = NetworkBuilder("tw") -tw.add_nodes(N=30, pop_name='TW', ei='e', location='TW', model_type='virtual') +tw.add_nodes(N=30, model_name='TW', ei='e', location='TW', model_type='virtual') -tw.add_edges(source=tw.nodes(), target=net.nodes(pop_name='LIF_exc'), +tw.add_edges(source=tw.nodes(), target=net.nodes(model_name='LIF_exc'), connection_rule=5, syn_weight=0.01, weight_function='wmax', delay=2.0, dynamics_params='instanteneousExc.json') -tw.add_edges(source=tw.nodes(), target=net.nodes(pop_name='LIF_inh'), +tw.add_edges(source=tw.nodes(), target=net.nodes(model_name='LIF_inh'), connection_rule=5, syn_weight=0.02, weight_function='wmax', diff --git a/examples/300_intfire/network/lgn_node_types.csv b/examples/300_intfire/network/lgn_node_types.csv index 2ead28a..bf27cc6 100644 --- a/examples/300_intfire/network/lgn_node_types.csv +++ b/examples/300_intfire/network/lgn_node_types.csv @@ -1,4 +1,4 @@ -node_type_id model_type ei location pop_name +node_type_id model_type ei location model_name 100 virtual e LGN tON 101 virtual e LGN tOFF 102 virtual e LGN tONOFF diff --git a/examples/300_intfire/network/lgn_v1_edge_types.csv b/examples/300_intfire/network/lgn_v1_edge_types.csv index 21d9f24..7bce56b 100644 --- a/examples/300_intfire/network/lgn_v1_edge_types.csv +++ b/examples/300_intfire/network/lgn_v1_edge_types.csv @@ -1,3 +1,3 @@ edge_type_id target_query source_query delay weight_function syn_weight dynamics_params -100 pop_name=='LIF_exc' * 2.0 wmax 0.0045 instanteneousExc.json -101 pop_name=='LIF_inh' * 2.0 wmax 0.0015 instanteneousExc.json +100 model_name=='LIF_exc' * 2.0 wmax 0.0045 instanteneousExc.json +101 model_name=='LIF_inh' * 2.0 wmax 0.0015 instanteneousExc.json diff --git a/examples/300_intfire/network/tw_node_types.csv b/examples/300_intfire/network/tw_node_types.csv index e9ad70a..a76afaf 100644 --- a/examples/300_intfire/network/tw_node_types.csv +++ b/examples/300_intfire/network/tw_node_types.csv @@ -1,2 +1,2 @@ -node_type_id model_type ei location pop_name +node_type_id model_type ei location model_name 100 virtual e TW TW diff --git a/examples/300_intfire/network/tw_v1_edge_types.csv b/examples/300_intfire/network/tw_v1_edge_types.csv index b3dbdd4..14068d3 100644 --- a/examples/300_intfire/network/tw_v1_edge_types.csv +++ b/examples/300_intfire/network/tw_v1_edge_types.csv @@ -1,3 +1,3 @@ edge_type_id target_query source_query delay weight_function syn_weight dynamics_params -100 pop_name=='LIF_exc' * 2.0 wmax 0.01 instanteneousExc.json -101 pop_name=='LIF_inh' * 2.0 wmax 0.02 instanteneousExc.json +100 model_name=='LIF_exc' * 2.0 wmax 0.01 instanteneousExc.json +101 model_name=='LIF_inh' * 2.0 wmax 0.02 instanteneousExc.json diff --git a/examples/300_intfire/network/v1_node_types.csv b/examples/300_intfire/network/v1_node_types.csv index ad51c61..900fe10 100644 --- a/examples/300_intfire/network/v1_node_types.csv +++ b/examples/300_intfire/network/v1_node_types.csv @@ -1,3 +1,3 @@ -node_type_id ei pop_name location model_template model_type dynamics_params +node_type_id ei model_name location model_template model_type dynamics_params 100 e LIF_exc VisL4 nrn:IntFire1 point_process IntFire1_exc_1.json 101 i LIF_inh VisL4 nrn:IntFire1 point_process IntFire1_inh_1.json diff --git a/examples/300_intfire/network/v1_v1_edge_types.csv b/examples/300_intfire/network/v1_v1_edge_types.csv index 269e0f1..4e83484 100644 --- a/examples/300_intfire/network/v1_v1_edge_types.csv +++ b/examples/300_intfire/network/v1_v1_edge_types.csv @@ -1,5 +1,5 @@ edge_type_id target_query source_query delay weight_function syn_weight dynamics_params 100 model_type=='point_process'&ei=='i' ei=='i' 2.0 wmax 0.01 instanteneousInh.json 101 model_type=='point_process'&ei=='e' ei=='i' 2.0 wmax 0.15 instanteneousInh.json -102 pop_name=='LIF_inh' ei=='e' 2.0 wmax 0.3 instanteneousExc.json -103 pop_name=='LIF_exc' ei=='e' 2.0 wmax 0.002 instanteneousExc.json +102 model_name=='LIF_inh' ei=='e' 2.0 wmax 0.3 instanteneousExc.json +103 model_name=='LIF_exc' ei=='e' 2.0 wmax 0.002 instanteneousExc.json diff --git a/examples/300_intfire/plot_spikes.py b/examples/300_intfire/plot_spikes.py index 785cbfe..23557d5 100644 --- a/examples/300_intfire/plot_spikes.py +++ b/examples/300_intfire/plot_spikes.py @@ -1,3 +1,3 @@ from bmtk.analyzer.visualization.spikes import plot_spikes -plot_spikes('network/v1_nodes.h5', 'network/v1_node_types.csv', 'output/spikes.h5', group_key='pop_name') +plot_spikes('network/v1_nodes.h5', 'network/v1_node_types.csv', 'output/spikes.h5', group_key='model_name') diff --git a/src/pysonata/setup.py b/src/pysonata/setup.py index 70baf3c..d997fb5 100644 --- a/src/pysonata/setup.py +++ b/src/pysonata/setup.py @@ -15,11 +15,19 @@ def prepend_find_packages(*roots): return packages + +with open('README.md', 'r') as fhandle: + long_description = fhandle.read() + + setup( name='sonata', - version=0.1, + version='0.0.1', description='SONATA Data Format', - package_data={'': ['*.md', '*.txt', '*.cfg', '**/*.json', '**/*.hoc', '**/*.h5', '**/*.csv']}, + long_description=long_description, + long_description_content_type='text/markdown', + url='https://github.com/AllenInstitute/bmtk', + package_data={'': ['*.md', '*.txt', '*.cfg', '**/*.json', '**/*.hoc']}, tests_require=['pytest'], install_requires=[ 'jsonschema', diff --git a/src/pysonata/sonata/io/edge.py b/src/pysonata/sonata/io/edge.py index 0f91a7a..75a3d00 100644 --- a/src/pysonata/sonata/io/edge.py +++ b/src/pysonata/sonata/io/edge.py @@ -41,12 +41,13 @@ def next(self): class Edge(object): - def __init__(self, src_node_id, trg_node_id, source_pop, target_pop, group_props, edge_types_props): + def __init__(self, src_node_id, trg_node_id, source_pop, target_pop, group_id, group_props, edge_types_props): self._src_node_id = src_node_id self._trg_node_id = trg_node_id self._source_population = source_pop self._target_population = target_pop self._group_props = group_props + self._group_id = group_id self._edge_type_props = edge_types_props @property @@ -65,6 +66,14 @@ def source_population(self): def target_population(self): return self._target_population + @property + def group_id(self): + return self._group_id + + @property + def edge_type_id(self): + return self._edge_type_props['edge_type_id'] + @property def dynamics_params(self): raise NotImplementedError @@ -75,4 +84,7 @@ def __getitem__(self, prop_key): elif prop_key in self._edge_type_props: return self._edge_type_props[prop_key] else: - raise KeyError \ No newline at end of file + raise KeyError('Property {} not found in edge.'.format(prop_key)) + + def __contains__(self, prop_key): + return prop_key in self._group_props or prop_key in self._edge_type_props diff --git a/src/pysonata/sonata/io/file.py b/src/pysonata/sonata/io/file.py index de8c729..d70f66a 100644 --- a/src/pysonata/sonata/io/file.py +++ b/src/pysonata/sonata/io/file.py @@ -20,20 +20,34 @@ # WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # -from sonata.io import utils -from sonata.io.file_root import NodesRoot, EdgesRoot +from . import utils +from .file_root import NodesRoot, EdgesRoot class File(object): - def __init__(self, data_files, data_type_files, mode='r', gid_table=None): + def __init__(self, data_files, data_type_files, mode='r', gid_table=None, require_magic=True): if mode != 'r': raise Exception('Currently only read mode is supported.') self._data_files = utils.listify(data_files) self._data_type_files = utils.listify(data_type_files) - # file handles - self._h5_file_handles = [(f, utils.load_h5(f, mode)) for f in self._data_files] + # Open and check HDF5 file(s) + self._h5_file_handles = [utils.load_h5(f, mode) for f in self._data_files] + if require_magic: + map(utils.check_magic, self._h5_file_handles) # Check magic attribute in h5 files + + # Check version number + avail_versions = set(map(utils.get_version, self._h5_file_handles)) + if len(avail_versions) == 1: + self._version = list(avail_versions)[0] + elif len(avail_versions) > 1: + # TODO: log as warning + print('Warning: Passing in multiple hdf5 files of different version') + self._version = ','.join(avail_versions) + else: + self._version = utils.VERSION_NA + self._csv_file_handles = [(f, utils.load_csv(f)) for f in self._data_type_files] self._has_nodes = False @@ -75,6 +89,10 @@ def edges(self): def has_edges(self): return self._has_edges + @property + def version(self): + return self._version + def _sort_types_file(self): # TODO: node/edge type_id columnn names should not be hardcoded for filename, df in self._csv_file_handles: @@ -92,11 +110,11 @@ def _sort_types_file(self): print('Warning: Could not determine if file {} was an edge-types or node-types file. Ignoring'.format(filename)) def _sort_h5_files(self): - for filename, h5 in self._h5_file_handles: + for h5 in self._h5_file_handles: has_nodes = '/nodes' in h5 has_edges = '/edges' in h5 if not (has_nodes or has_edges): - print('File {} contains neither nodes nor edges. Ignoring'.format(filename)) + print('File {} contains neither nodes nor edges. Ignoring'.format(h5.filename)) else: if has_nodes: self._nodes_groups.append(h5) diff --git a/src/pysonata/sonata/io/group.py b/src/pysonata/sonata/io/group.py index 4966152..32a4387 100644 --- a/src/pysonata/sonata/io/group.py +++ b/src/pysonata/sonata/io/group.py @@ -43,6 +43,8 @@ def __init__(self, group_id, h5_group, parent): self._types_index_col = self._types_table.index_column_name self._group_columns = ColumnProperty.from_h5(h5_group) + # TODO: combine group_columns, group_column_names and group_columns_map, doesn't need to be 3 structures + self._group_column_map = {col.name: col for col in self._group_columns} self._group_column_names = set(col.name for col in self._group_columns) self._group_table = {prop: h5_group[prop.name] for prop in self._group_columns} self._ncolumns = len(self._group_columns) @@ -74,10 +76,33 @@ def has_dynamics_params(self): def columns(self): return self._group_columns + @property + def group_columns(self): + return self._group_columns + @property def all_columns(self): return self._all_columns + @property + def has_gids(self): + return self._parent.has_gids + + @property + def parent(self): + return self._parent + + def get_dataset(self, column_name): + return self._group_table[column_name] + + def column(self, column_name, group_only=False): + if column_name in self._group_column_map: + return self._group_column_map[column_name] + elif not group_only and column_name in self._types_table.columns: + return self._types_table.column(column_name) + else: + return KeyError + def check_format(self): # Check that all the properties have the same number of rows col_counts = [col.nrows for col in self._group_columns + self._dynamics_params_columns] @@ -266,6 +291,7 @@ def filter(self, **filter_props): if group_filter: # Filter by group property values + # TODO: Allow group properties to handle lists src_failed = True for k, v in group_prop_filter.items(): if node[k] != v: @@ -306,6 +332,27 @@ def build_indicies(self, force=False): def to_dataframe(self): raise NotImplementedError + def _get_parent_ds(self, parent_ds): + self.build_indicies() + ds_vals = np.zeros(self._indicies_count, dtype=parent_ds.dtype) + c_indx = 0 + for indx_range in self._parent_indicies: + indx_beg, indx_end = indx_range[0], indx_range[1] + n_indx = c_indx + (indx_end - indx_beg) + ds_vals[c_indx:n_indx] = parent_ds[indx_beg:indx_end] + c_indx = n_indx + + return ds_vals + + def src_node_ids(self): + return self._get_parent_ds(self.parent._source_node_id_ds) + + def trg_node_ids(self): + return self._get_parent_ds(self.parent._target_node_id_ds) + + def node_type_ids(self): + return self._get_parent_ds(self.parent._type_id_ds) + def get_values(self, property_name, all_rows=False): # TODO: Need to take into account if property_name is in the edge-types if property_name not in self.columns: diff --git a/src/pysonata/sonata/io/node.py b/src/pysonata/sonata/io/node.py index 419d9f0..4fa24ae 100644 --- a/src/pysonata/sonata/io/node.py +++ b/src/pysonata/sonata/io/node.py @@ -20,6 +20,8 @@ # WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # + + class NodeSet(object): # TODO: Merge NodeSet and NodePopulation def __init__(self, node_indicies, population, **parameters): @@ -71,13 +73,14 @@ def __next__(self): class Node(object): # TODO: include population name/reference - # TODO: make a dictionary - def __init__(self, node_id, node_type_id, node_types_props, group_props, dynamics_params, gid=None): + # TODO: make a dictionary (or preferably a collections.MutableMap + def __init__(self, node_id, node_type_id, node_types_props, group_id, group_props, dynamics_params, gid=None): self._node_id = node_id self._gid = gid self._node_type_id = node_type_id - self._group_props = group_props self._node_type_props = node_types_props + self._group_id = group_id + self._group_props = group_props @property def node_id(self): @@ -87,10 +90,22 @@ def node_id(self): def gid(self): return self._gid + @property + def group_id(self): + return self._group_id + @property def node_type_id(self): return self._node_type_id + @property + def group_props(self): + return self._group_props + + @property + def node_type_properties(self): + return self._node_type_props + @property def dynamics_params(self): raise NotImplementedError @@ -100,8 +115,12 @@ def __getitem__(self, prop_key): return self._group_props[prop_key] elif prop_key in self._node_type_props: return self._node_type_props[prop_key] + elif prop_key == 'node_id': + return self.node_id + elif property == 'node_type_id': + return self.node_type_id else: - raise KeyError + raise KeyError('Unknown property {}'.format(prop_key)) def __contains__(self, prop_key): return prop_key in self._group_props or prop_key in self._node_type_props diff --git a/src/pysonata/sonata/io/population.py b/src/pysonata/sonata/io/population.py index 039433a..0edebd2 100644 --- a/src/pysonata/sonata/io/population.py +++ b/src/pysonata/sonata/io/population.py @@ -69,6 +69,10 @@ def groups(self): def types_table(self): return self._types_table + @property + def type_ids(self): + return np.array(self._type_id_ds) + @property def group_id_ds(self): return self._group_id_ds @@ -247,7 +251,7 @@ def get_row(self, row_indx): node_group_props = self.get_group(node_group_id)[node_group_index] node_gid = self._gid_lookup_fnc(row_indx) - return Node(node_id, node_type_id, node_type_props, node_group_props, None, gid=node_gid) + return Node(node_id, node_type_id, node_type_props, node_group_id, node_group_props, None, gid=node_gid) def get_rows(self, row_indicies): """Returns a set of all nodes based on list of row indicies. @@ -285,6 +289,11 @@ def get_gid(self, gid): row_indx = self._index_gid2row.iloc[gid]['row_id'] return self.get_row(row_indx) + def filter(self, **filter_props): + for grp in self.groups: + for node in grp.filter(**filter_props): + yield node + def _build_node_id_index(self, force=False): if self._node_id_index_built and not force: return @@ -310,6 +319,22 @@ def __next__(self): self.__itr_index += 1 return nxt_node + def __getitem__(self, item): + if isinstance(item, slice): + # TODO: Check + start = item.start if item.start is not None else 0 + stop = item.stop if item.stop is not None else self._nrows + row_indicies = range_itr(start, stop, item.step) + return NodeSet(row_indicies, self) + + elif isinstance(item, int): + return self.get_row(item) + + elif isinstance(item, list): + return NodeSet(item) + else: + print('Unable to get item using {}.'.format(type(item))) + class EdgePopulation(Population): class __IndexStruct(object): @@ -364,11 +389,11 @@ def target_population(self): @staticmethod def get_source_population(pop_group_h5): - return get_attribute_h5(pop_group_h5['source_node_id'], 'network', None) + return get_attribute_h5(pop_group_h5['source_node_id'], 'node_population', None) @staticmethod def get_target_population(pop_group_h5): - return get_attribute_h5(pop_group_h5['target_node_id'], 'network', None) + return get_attribute_h5(pop_group_h5['target_node_id'], 'node_population', None) @property def edge_types_table(self): @@ -392,7 +417,6 @@ def build_indicies(self): raise Exception('index {} in {} edges is missing column {}.'.format(index_name, self.name, 'node_id_to_range')) if 'range_to_edge_id' not in index_grp: - # TODO: make this more general, i.e 'id_to_range' thus we can index on gids, edge_types, etc raise Exception('index {} in {} edges is missing column {}.'.format(index_name, self.name, 'range_to_edge_id')) @@ -463,10 +487,10 @@ def get_row(self, index): edge_group_index = self._group_index_ds[index] edge_group_props = self.get_group(edge_group_id)[edge_group_index] return Edge(trg_node_id=trg_node, src_node_id=src_node, source_pop=self.source_population, - target_pop=self.target_population, group_props=edge_group_props, edge_types_props=edge_types_props) + target_pop=self.target_population, group_id = edge_group_id, + group_props=edge_group_props, edge_types_props=edge_types_props) def filter(self, **filter_props): - selected_edge_types = set(self.edge_types_table.edge_type_ids) types_filter = False # Do we need to filter results by edge_type_id if 'edge_type_id' in filter_props: @@ -557,8 +581,11 @@ def get_sources(self, source_node_ids): def _get_index(self, index_struct, lookup_id): # TODO: Use a EdgeSet instead - edges_table = index_struct.edge_table + if lookup_id >= len(index_struct.lookup_table): + # TODO: Store length in index + raise StopIteration + edges_table = index_struct.edge_table lookup_beg, lookup_end = index_struct.lookup_table[lookup_id] for i in range_itr(lookup_beg, lookup_end): edge_indx_beg, edge_indx_end = edges_table[i] diff --git a/src/pysonata/sonata/io/types_table.py b/src/pysonata/sonata/io/types_table.py index 531dbaf..375d332 100644 --- a/src/pysonata/sonata/io/types_table.py +++ b/src/pysonata/sonata/io/types_table.py @@ -21,6 +21,7 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # import numpy as np +import pandas as pd import numbers import math @@ -46,6 +47,9 @@ def __init__(self, parent=None): self._cached_node_types = {} self._df_cache = None + self._itr_indx = 0 + self._itr_end = 0 + @property def index_column_name(self): raise NotImplementedError @@ -93,13 +97,18 @@ def find(self, column_key, column_val, silent=False): if not silent and column_key not in self.columns: raise KeyError + is_list = isinstance(column_val, list) selected_ids = [] # running list of valid type-ids column_dtype = self.column(column_key).dtype for df in self._column_map[column_key]: # if a csv column has all NONE values, pandas will load the values as float(NaN)'s. Thus for str/object # columns we need to check dtype otherwise we'll get an invalid comparisson. if df[column_key].dtype == column_dtype: - indicies = df[df[column_key] == column_val].index + if is_list: + indicies = df[df[column_key].isin(column_val)].index + else: + indicies = df[df[column_key] == column_val].index + if len(indicies) > 0: selected_ids.extend(list(indicies)) @@ -117,7 +126,24 @@ def to_dataframe(self, cache=False): # merge all dataframes together merged_table = self._dataframes[0].reset_index() # TODO: just merge on the indicies rather than reset for df in self._dataframes[1:]: - merged_table = merged_table.merge(df.reset_index(), how='outer') + try: + merged_table = merged_table.merge(df.reset_index(), how='outer') + except ValueError as ve: + # There is a potential issue if merging where one dtype is different from another (ex, if all + # model_template's are NONE pandas will load column as float64). First solution is to find columns + # that differ and upcast columns as object's (TODO: look for better solution) + right_df = df.reset_index() + for col in set(merged_table.columns) & set(right_df.columns): + # find all shared columns whose dtype differs + if merged_table[col].dtype != right_df[col].dtype: + # change column(s) dtype to object + merged_table[col] = merged_table[col] if merged_table[col].dtype == object \ + else merged_table[col].astype(object) + right_df[col] = right_df[col] if right_df[col].dtype == object \ + else right_df[col].astype(object) + + merged_table = merged_table.merge(right_df, how='outer') + merged_table.set_index(self.index_column_name, inplace=True) if cache: @@ -125,6 +151,22 @@ def to_dataframe(self, cache=False): return merged_table + def __iter__(self): + self._itr_indx = 0 + self._itr_end = len(self.type_ids) + return self + + def next(self): + return self.__next__() + + def __next__(self): + if self._itr_indx >= self._itr_end: + raise StopIteration + + ntid = self.type_ids[self._itr_indx] + self._itr_indx += 1 + return self[ntid] + def __getitem__(self, type_id): if isinstance(type_id, tuple): return [self[ntid] for ntid in type_id] @@ -148,6 +190,9 @@ def __getitem__(self, type_id): def __contains__(self, type_id): return type_id in self._index_typeid2df + def __repr__(self): + return repr(self.to_dataframe()) + class NodeTypesTable(TypesTable): def __init__(self, parent=None): diff --git a/src/pysonata/sonata/io/utils.py b/src/pysonata/sonata/io/utils.py index 5c3743a..953572d 100644 --- a/src/pysonata/sonata/io/utils.py +++ b/src/pysonata/sonata/io/utils.py @@ -25,6 +25,21 @@ import h5py import pandas as pd +import numpy as np + +MAGIC_ATTR = 'magic' +MAGIC_VAL = 0x0A7A +VERSION_ATTR = 'version' +VERSION_NA = 'NA' +VERSION_CURRENT = '0.1' + +try: + ver_split = VERSION_CURRENT.split('.') + VERSION_MAJOR = ver_split[0] + VERSION_MINOR = ver_split[1] +except (IndexError, AttributeError) as err: + VERSION_MAJOR = 0 + VERSION_MINOR = 1 def listify(files): @@ -35,7 +50,7 @@ def listify(files): return files -def load_h5(h5file, mode): +def load_h5(h5file, mode='r'): # TODO: Allow for h5py.Group also if isinstance(h5file, h5py.File): return h5file @@ -53,7 +68,7 @@ def load_csv(csvfile): def get_attribute_h5(h5obj, attribut_name, default=None): - val = h5obj.attrs.get('network', default) + val = h5obj.attrs.get(attribut_name, default) if using_py3 and isinstance(val, bytes): # There is an but with h5py returning unicode/str based attributes as bytes val = val.decode() @@ -61,6 +76,38 @@ def get_attribute_h5(h5obj, attribut_name, default=None): return val +def check_magic(hdf5_file): + """Check the magic attribute exists according to the sonata format""" + h5_file_obj = load_h5(hdf5_file) + if MAGIC_ATTR not in h5_file_obj.attrs: + raise Exception('File {} missing top-level \"{}\" attribute.'.format(h5_file_obj.filename, MAGIC_ATTR)) + elif np.uint32(get_attribute_h5(hdf5_file, MAGIC_ATTR)) != MAGIC_VAL: + raise Exception('File {} has unexpected magic value (expected {})'.format(h5_file_obj.filename, MAGIC_VAL)) + + return True + + +def get_version(hdf5_file): + h5_file_obj = load_h5(hdf5_file) + if VERSION_ATTR not in h5_file_obj.attrs: + return VERSION_NA + + else: + version_val = get_attribute_h5(h5_file_obj, VERSION_ATTR) + version_str = str(version_val[0]) + for ver_sub in version_val[1:]: + version_str += '.{}'.format(ver_sub) + return version_str + + +def add_hdf5_magic(hdf5_handle): + hdf5_handle['/'].attrs['magic'] = np.uint32(0x0A7A) + + +def add_hdf5_version(hdf5_handle): + hdf5_handle['/'].attrs['version'] = [np.uint32(VERSION_MAJOR), np.uint32(VERSION_MINOR)] + + if sys.version_info[0] == 3: using_py3 = True range_itr = range diff --git a/src/pysonata/sonata/tests/examples/gid_table.h5 b/src/pysonata/sonata/tests/examples/gid_table.h5 index c59142b..2469b9f 100644 Binary files a/src/pysonata/sonata/tests/examples/gid_table.h5 and b/src/pysonata/sonata/tests/examples/gid_table.h5 differ diff --git a/src/pysonata/sonata/tests/examples/lgn_nodes.h5 b/src/pysonata/sonata/tests/examples/lgn_nodes.h5 index 2541b46..b126dd6 100644 Binary files a/src/pysonata/sonata/tests/examples/lgn_nodes.h5 and b/src/pysonata/sonata/tests/examples/lgn_nodes.h5 differ diff --git a/src/pysonata/sonata/tests/examples/lgn_v1_edges.h5 b/src/pysonata/sonata/tests/examples/lgn_v1_edges.h5 index 97f5b6b..ef61d7f 100644 Binary files a/src/pysonata/sonata/tests/examples/lgn_v1_edges.h5 and b/src/pysonata/sonata/tests/examples/lgn_v1_edges.h5 differ diff --git a/src/pysonata/sonata/tests/examples/v1_nodes.h5 b/src/pysonata/sonata/tests/examples/v1_nodes.h5 index bc21043..5850a9f 100644 Binary files a/src/pysonata/sonata/tests/examples/v1_nodes.h5 and b/src/pysonata/sonata/tests/examples/v1_nodes.h5 differ diff --git a/src/pysonata/sonata/tests/examples/v1_v1_edges.h5 b/src/pysonata/sonata/tests/examples/v1_v1_edges.h5 index dfbcadf..318b3d0 100644 Binary files a/src/pysonata/sonata/tests/examples/v1_v1_edges.h5 and b/src/pysonata/sonata/tests/examples/v1_v1_edges.h5 differ diff --git a/src/pysonata/sonata/tests/test_file.py b/src/pysonata/sonata/tests/test_file.py index da0b2bd..69774ea 100644 --- a/src/pysonata/sonata/tests/test_file.py +++ b/src/pysonata/sonata/tests/test_file.py @@ -1,4 +1,5 @@ import pytest +import tempfile from sonata.io import File @@ -26,6 +27,30 @@ def test_load_files(): assert(net.has_edges) +def test_version(): + net = File(data_files=['examples/v1_nodes.h5', 'examples/v1_v1_edges.h5'], + data_type_files=['examples/v1_node_types.csv', 'examples/v1_v1_edge_types.csv']) + assert(net.version == '0.1') + + +def test_bad_magic(): + import h5py + tmp_file, tmp_file_name = tempfile.mkstemp(suffix='.hdf5') + # no magic + with h5py.File(tmp_file_name, 'r+') as h5: + h5.create_group('nodes') + + with pytest.raises(Exception): + File(data_files=tmp_file_name, data_type_files='examples/v1_node_types.csv') + + # bad magic + with h5py.File(tmp_file_name, 'r+') as h5: + h5.attrs['magic'] = 0x0A7B + + with pytest.raises(Exception): + File(data_files=tmp_file_name, data_type_files='examples/v1_node_types.csv') + + def test_no_files(): with pytest.raises(Exception): File(data_files=[], data_type_files=[]) diff --git a/src/pysonata/sonata/tests/test_nodes.py b/src/pysonata/sonata/tests/test_nodes.py index f250d85..385f1e1 100644 --- a/src/pysonata/sonata/tests/test_nodes.py +++ b/src/pysonata/sonata/tests/test_nodes.py @@ -171,5 +171,3 @@ def test_group_search(net): test_group_df(net()) test_group_properties(net()) test_group_search(net()) - - diff --git a/src/pysonata/sonata/utils/__init__.py b/src/pysonata/sonata/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/pysonata/sonata/utils/add_magic.py b/src/pysonata/sonata/utils/add_magic.py new file mode 100644 index 0000000..453e2bb --- /dev/null +++ b/src/pysonata/sonata/utils/add_magic.py @@ -0,0 +1,29 @@ +import os +import sys +import h5py +import numpy as np +import glob + + +def add_magic(files): + # files = glob.glob('*.h5') #['v1_nodes.h5', 'v1_v1_edges.h5', 'lgn_nodes.h'] + for h5_file in files: + print('Updating {}.'.format(h5_file)) + with h5py.File(h5_file, 'r+') as h5: + h5['/'].attrs['magic'] = np.uint32(0x0A7A) + h5['/'].attrs['version'] = [np.uint32(0), np.uint32(1)] + + +if __name__ == '__main__': + if len(sys.argv) == 1: + add_magic(glob.glob('*.h5')) + else: + for pth in sys.argv[1:]: + abs_pth = os.path.abspath(pth) + if os.path.isdir(abs_pth): + lastwd = os.getcwd() + os.chdir(abs_pth) + add_magic(glob.glob("*.h5")) + os.chdir(lastwd) + if os.path.isfile(abs_pth): + add_magic(pth)