Done normalization steps:
-
added submodule to nomad-lab-base called normalizer-prototypes in normalizers/prototypes
-
git submodule add git@gitlab.mpcdf.mpg.de:nomad-lab/normalizer-prototypes.git prototypes
-
edited and added .gitmodules
-
added
setup-paths.py
in../prototypes/normalizer/normalizer-prototypes/setup_paths.py
SCALA PART
- added and adapted
PrototypesNormalizer.scala
in/nomad-lab-base/normalizers/prototypes/src/main/scala/eu/nomad_lab/normalizers
added cmd
cmd = Seq(DefaultPythonInterpreter.pythonExe(), "${envDir}/normalizers/protot\ ypes/normalizer/normalizer-prototypes/classify4me_prototypes.py",
and defined function outputSysSection that writes out to json secSystem
-
added
PrototypesNormalizerSpec.scala
in/nomad-lab-base/normalizers/prototypes/src/test/scala/eu/nomad_lab/normalizers
and included:object PrototypesNormalizerSpec extends Specification
-
included normalizer-prototypes in
AllNormalizers.scala
-
modified
build.sbt
lazy val prototypesNormalizer = (project in file("normalizers/prototypes")).
+ dependsOn(core).
+ enablePlugins(BuildInfoPlugin).
+ settings(gitVersionSettings: _*).
+ settings(commonSettings: _*).
+ settings(
+ name := "prototypes",
+ (unmanagedResourceDirectories in Compile) += baseDirectory.value / "normalizer"
+ ).
+ settings(Revolver.settings: _*)
and added also in
lazy val normalize = (project in file("normalize")).
dependsOn(stats).
dependsOn(fhiAimsBasis).
dependsOn(springer).
+ dependsOn(prototypesNormalizer).
enablePlugins(BuildInfoPlugin).
settings(gitVersionSettings: _*).
settings(commonSettings: _*).
After running the normalization and dumping the chosen h5 for our metadata we get the following result:
HDF5 "/normalized/newProductionH5/NcC/NcC8TDWGWCtQLhWeB2a1N8y9Q7y4r.h5" {
DATASET "/RcC8TDWGWCtQLhWeB2a1N8y9Q7y4r/C-EE2qc_XLeNhj4VC1vjZGV-JXVtx/section_run/section_system/prototype_label-v" {
DATATYPE H5T_STRING {
STRSIZE H5T_VARIABLE;
STRPAD H5T_STR_NULLTERM;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SIMPLE { ( 1, 1 ) / ( H5S_UNLIMITED, 1 ) }
DATA {
(0,0): "229-W-cI2"
}
ATTRIBUTE "dtypeStr" {
DATATYPE H5T_STRING {
STRSIZE H5T_VARIABLE;
STRPAD H5T_STR_NULLTERM;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SIMPLE { ( 1 ) / ( 1 ) }
DATA {
(0): "C"
}
}
}
}
Full example:
section_system" {
DATASET "atom_labels-index" {
DATATYPE H5T_STD_I64LE
DATASPACE SIMPLE { ( 3, 3 ) / ( H5S_UNLIMITED, 3 ) }
DATA {
(0,0): 0, 0, 2,
(1,0): 1, 1, 2,
(2,0): 2, 2, 2
}
ATTRIBUTE "dtypeStr" {
DATATYPE H5T_STRING {
STRSIZE H5T_VARIABLE;
STRPAD H5T_STR_NULLTERM;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SIMPLE { ( 1 ) / ( 1 ) }
DATA {
(0): "i64"
}
}
}
DATASET "atom_labels-v.C" {
DATATYPE H5T_STRING {
STRSIZE H5T_VARIABLE;
STRPAD H5T_STR_NULLTERM;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SIMPLE { ( 3, 2 ) / ( H5S_UNLIMITED, 2 ) }
DATA {
(0,0): "Sn", "Sn",
(1,0): "Sn", "Sn",
(2,0): "Sn", "Sn"
}
ATTRIBUTE "dtypeStr" {
DATATYPE H5T_STRING {
STRSIZE H5T_VARIABLE;
STRPAD H5T_STR_NULLTERM;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SIMPLE { ( 1 ) / ( 1 ) }
DATA {
(0): "C"
}
}
}
DATASET "atom_positions-index" {
DATATYPE H5T_STD_I64LE
DATASPACE SIMPLE { ( 3, 3 ) / ( H5S_UNLIMITED, 3 ) }
DATA {
(0,0): 0, 0, 2,
(1,0): 1, 1, 2,
(2,0): 2, 2, 2
}
ATTRIBUTE "dtypeStr" {
DATATYPE H5T_STRING {
STRSIZE H5T_VARIABLE;
STRPAD H5T_STR_NULLTERM;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SIMPLE { ( 1 ) / ( 1 ) }
DATA {
(0): "i64"
}
}
}
DATASET "atom_positions-v.C" {
DATATYPE H5T_IEEE_F64LE
DATASPACE SIMPLE { ( 3, 2, 3 ) / ( H5S_UNLIMITED, 2, 3 ) }
DATA {
(0,0,0): 0, 0, 0,
(0,1,0): 1.68681e-10, -9.73881e-11, 2.81227e-10,
(1,0,0): 0, 0, 0,
(1,1,0): 1.68681e-10, -9.73881e-11, 2.81227e-10,
(2,0,0): 0, 0, 0,
(2,1,0): 1.68681e-10, -9.73881e-11, 2.81227e-10
}
ATTRIBUTE "dtypeStr" {
DATATYPE H5T_STRING {
STRSIZE H5T_VARIABLE;
STRPAD H5T_STR_NULLTERM;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SIMPLE { ( 1 ) / ( 1 ) }
DATA {
(0): "f"
}
}
}
DATASET "atom_species-index" {
DATATYPE H5T_STD_I64LE
DATASPACE SIMPLE { ( 6, 1 ) / ( H5S_UNLIMITED, 1 ) }
DATA {
(0,0): 0,
(1,0): 0,
(2,0): 1,
(3,0): 1,
(4,0): 2,
(5,0): 2
}
ATTRIBUTE "dtypeStr" {
DATATYPE H5T_STRING {
STRSIZE H5T_VARIABLE;
STRPAD H5T_STR_NULLTERM;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SIMPLE { ( 1 ) / ( 1 ) }
DATA {
(0): "i64"
}
}
}
DATASET "atom_species-v" {
DATATYPE H5T_STD_I32LE
DATASPACE SIMPLE { ( 6, 1 ) / ( H5S_UNLIMITED, 1 ) }
DATA {
(0,0): 50,
(1,0): 50,
(2,0): 50,
(3,0): 50,
(4,0): 50,
(5,0): 50
}
ATTRIBUTE "dtypeStr" {
DATATYPE H5T_STRING {
STRSIZE H5T_VARIABLE;
STRPAD H5T_STR_NULLTERM;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SIMPLE { ( 1 ) / ( 1 ) }
DATA {
(0): "i"
}
}
}
DATASET "configuration_periodic_dimensions-index" {
DATATYPE H5T_STD_I64LE
DATASPACE SIMPLE { ( 3, 1 ) / ( H5S_UNLIMITED, 1 ) }
DATA {
(0,0): 0,
(1,0): 1,
(2,0): 2
}
ATTRIBUTE "dtypeStr" {
DATATYPE H5T_STRING {
STRSIZE H5T_VARIABLE;
STRPAD H5T_STR_NULLTERM;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SIMPLE { ( 1 ) / ( 1 ) }
DATA {
(0): "i64"
}
}
}
DATASET "configuration_periodic_dimensions-v" {
DATATYPE H5T_STD_I8LE
DATASPACE SIMPLE { ( 3, 3 ) / ( H5S_UNLIMITED, 3 ) }
DATA {
(0,0): 1, 1, 1,
(1,0): 1, 1, 1,
(2,0): 1, 1, 1
}
ATTRIBUTE "dtypeStr" {
DATATYPE H5T_STRING {
STRSIZE H5T_VARIABLE;
STRPAD H5T_STR_NULLTERM;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SIMPLE { ( 1 ) / ( 1 ) }
DATA {
(0): "b"
}
}
}
DATASET "configuration_raw_gid-index" {
DATATYPE H5T_STD_I64LE
DATASPACE SIMPLE { ( 3, 1 ) / ( H5S_UNLIMITED, 1 ) }
DATA {
(0,0): 0,
(1,0): 1,
(2,0): 2
}
ATTRIBUTE "dtypeStr" {
DATATYPE H5T_STRING {
STRSIZE H5T_VARIABLE;
STRPAD H5T_STR_NULLTERM;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SIMPLE { ( 1 ) / ( 1 ) }
DATA {
(0): "i64"
}
}
}
DATASET "configuration_raw_gid-v" {
DATATYPE H5T_STRING {
STRSIZE H5T_VARIABLE;
STRPAD H5T_STR_NULLTERM;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SIMPLE { ( 3, 1 ) / ( H5S_UNLIMITED, 1 ) }
DATA {
(0,0): "sOOqDEFMMzV_Wz8ZWWu3t2tc3E8lw",
(1,0): "sOOqDEFMMzV_Wz8ZWWu3t2tc3E8lw",
(2,0): "sOOqDEFMMzV_Wz8ZWWu3t2tc3E8lw"
}
ATTRIBUTE "dtypeStr" {
DATATYPE H5T_STRING {
STRSIZE H5T_VARIABLE;
STRPAD H5T_STR_NULLTERM;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SIMPLE { ( 1 ) / ( 1 ) }
DATA {
(0): "C"
}
}
}
DATASET "lattice_vectors-index" {
DATATYPE H5T_STD_I64LE
DATASPACE SIMPLE { ( 3, 1 ) / ( H5S_UNLIMITED, 1 ) }
DATA {
(0,0): 0,
(1,0): 1,
(2,0): 2
}
ATTRIBUTE "dtypeStr" {
DATATYPE H5T_STRING {
STRSIZE H5T_VARIABLE;
STRPAD H5T_STR_NULLTERM;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SIMPLE { ( 1 ) / ( 1 ) }
DATA {
(0): "i64"
}
}
}
DATASET "lattice_vectors-v" {
DATATYPE H5T_IEEE_F64LE
DATASPACE SIMPLE { ( 3, 3, 3 ) / ( H5S_UNLIMITED, 3, 3 ) }
DATA {
(0,0,0): 1.68681e-10, -2.92164e-10, 0,
(0,1,0): 1.68681e-10, 2.92164e-10, 0,
(0,2,0): 0, 0, 5.62454e-10,
(1,0,0): 1.68681e-10, -2.92164e-10, 0,
(1,1,0): 1.68681e-10, 2.92164e-10, 0,
(1,2,0): 0, 0, 5.62454e-10,
(2,0,0): 1.68681e-10, -2.92164e-10, 0,
(2,1,0): 1.68681e-10, 2.92164e-10, 0,
(2,2,0): 0, 0, 5.62454e-10
}
ATTRIBUTE "dtypeStr" {
DATATYPE H5T_STRING {
STRSIZE H5T_VARIABLE;
STRPAD H5T_STR_NULLTERM;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SIMPLE { ( 1 ) / ( 1 ) }
DATA {
(0): "f"
}
}
}
DATASET "prototype_label-index" {
DATATYPE H5T_STD_I64LE
DATASPACE SIMPLE { ( 2, 1 ) / ( H5S_UNLIMITED, 1 ) }
DATA {
(0,0): 0,
(1,0): 2
}
ATTRIBUTE "dtypeStr" {
DATATYPE H5T_STRING {
STRSIZE H5T_VARIABLE;
STRPAD H5T_STR_NULLTERM;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SIMPLE { ( 1 ) / ( 1 ) }
DATA {
(0): "i64"
}
}
}
DATASET "prototype_label-v" {
DATATYPE H5T_STRING {
STRSIZE H5T_VARIABLE;
STRPAD H5T_STR_NULLTERM;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SIMPLE { ( 2, 1 ) / ( H5S_UNLIMITED, 1 ) }
DATA {
(0,0): "194-Mg-hP2",
(1,0): "194-Mg-hP2"
}
ATTRIBUTE "dtypeStr" {
DATATYPE H5T_STRING {
STRSIZE H5T_VARIABLE;
STRPAD H5T_STR_NULLTERM;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SIMPLE { ( 1 ) / ( 1 ) }
DATA {
(0): "C"
}
}
}
DATASET "section_system-index" {
DATATYPE H5T_STD_I64LE
DATASPACE SIMPLE { ( 3, 1 ) / ( H5S_UNLIMITED, 1 ) }
DATA {
(0,0): 0,
(1,0): 0,
(2,0): 0
}
ATTRIBUTE "dtypeStr" {
DATATYPE H5T_STRING {
STRSIZE H5T_VARIABLE;
STRPAD H5T_STR_NULLTERM;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SIMPLE { ( 1 ) / ( 1 ) }
DATA {
(0): "i64"
}
}
}
DATASET "simulation_cell-index" {
DATATYPE H5T_STD_I64LE
DATASPACE SIMPLE { ( 3, 1 ) / ( H5S_UNLIMITED, 1 ) }
DATA {
(0,0): 0,
(1,0): 1,
(2,0): 2
}
ATTRIBUTE "dtypeStr" {
DATATYPE H5T_STRING {
STRSIZE H5T_VARIABLE;
STRPAD H5T_STR_NULLTERM;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SIMPLE { ( 1 ) / ( 1 ) }
DATA {
(0): "i64"
}
}
}
DATASET "simulation_cell-v" {
DATATYPE H5T_IEEE_F64LE
DATASPACE SIMPLE { ( 3, 3, 3 ) / ( H5S_UNLIMITED, 3, 3 ) }
DATA {
(0,0,0): 1.68681e-10, -2.92164e-10, 0,
(0,1,0): 1.68681e-10, 2.92164e-10, 0,
(0,2,0): 0, 0, 5.62454e-10,
(1,0,0): 1.68681e-10, -2.92164e-10, 0,
(1,1,0): 1.68681e-10, 2.92164e-10, 0,
(1,2,0): 0, 0, 5.62454e-10,
(2,0,0): 1.68681e-10, -2.92164e-10, 0,
(2,1,0): 1.68681e-10, 2.92164e-10, 0,
(2,2,0): 0, 0, 5.62454e-10
}
ATTRIBUTE "dtypeStr" {
DATATYPE H5T_STRING {
STRSIZE H5T_VARIABLE;
STRPAD H5T_STR_NULLTERM;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SIMPLE { ( 1 ) / ( 1 ) }
DATA {
(0): "f"
}
}
}
DATASET "system_composition-index" {
DATATYPE H5T_STD_I64LE
DATASPACE SIMPLE { ( 3, 1 ) / ( H5S_UNLIMITED, 1 ) }
DATA {
(0,0): 0,
(1,0): 1,
(2,0): 2
}
ATTRIBUTE "dtypeStr" {
DATATYPE H5T_STRING {
STRSIZE H5T_VARIABLE;
STRPAD H5T_STR_NULLTERM;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SIMPLE { ( 1 ) / ( 1 ) }
DATA {
(0): "i64"
}
}
}
DATASET "system_composition-v" {
DATATYPE H5T_STRING {
STRSIZE H5T_VARIABLE;
STRPAD H5T_STR_NULLTERM;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SIMPLE { ( 3, 1 ) / ( H5S_UNLIMITED, 1 ) }
DATA {
(0,0): "Sn2",
(1,0): "Sn2",
(2,0): "Sn2"
}
ATTRIBUTE "dtypeStr" {
DATATYPE H5T_STRING {
STRSIZE H5T_VARIABLE;
STRPAD H5T_STR_NULLTERM;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SIMPLE { ( 1 ) / ( 1 ) }
DATA {
(0): "C"
}
}
}
DATASET "system_configuration_consistent-index" {
DATATYPE H5T_STD_I64LE
DATASPACE SIMPLE { ( 3, 1 ) / ( H5S_UNLIMITED, 1 ) }
DATA {
(0,0): 0,
(1,0): 1,
(2,0): 2
}
ATTRIBUTE "dtypeStr" {
DATATYPE H5T_STRING {
STRSIZE H5T_VARIABLE;
STRPAD H5T_STR_NULLTERM;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SIMPLE { ( 1 ) / ( 1 ) }
DATA {
(0): "i64"
}
}
}
DATASET "system_configuration_consistent-v" {
DATATYPE H5T_STD_I8LE
DATASPACE SIMPLE { ( 3, 1 ) / ( H5S_UNLIMITED, 1 ) }
DATA {
(0,0): 1,
(1,0): 1,
(2,0): 1
}
ATTRIBUTE "dtypeStr" {
DATATYPE H5T_STRING {
STRSIZE H5T_VARIABLE;
STRPAD H5T_STR_NULLTERM;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SIMPLE { ( 1 ) / ( 1 ) }
DATA {
(0): "b"
}
}
}
DATASET "system_reweighted_composition-index" {
DATATYPE H5T_STD_I64LE
DATASPACE SIMPLE { ( 3, 1 ) / ( H5S_UNLIMITED, 1 ) }
DATA {
(0,0): 0,
(1,0): 1,
(2,0): 2
}
ATTRIBUTE "dtypeStr" {
DATATYPE H5T_STRING {
STRSIZE H5T_VARIABLE;
STRPAD H5T_STR_NULLTERM;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SIMPLE { ( 1 ) / ( 1 ) }
DATA {
(0): "i64"
}
}
}
DATASET "system_reweighted_composition-v" {
DATATYPE H5T_STRING {
STRSIZE H5T_VARIABLE;
STRPAD H5T_STR_NULLTERM;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SIMPLE { ( 3, 1 ) / ( H5S_UNLIMITED, 1 ) }
DATA {
(0,0): "Sn100",
(1,0): "Sn100",
(2,0): "Sn100"
}
ATTRIBUTE "dtypeStr" {
DATATYPE H5T_STRING {
STRSIZE H5T_VARIABLE;