Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
nomad-lab
nomad-FAIR
Commits
9cffe14b
Commit
9cffe14b
authored
Apr 04, 2019
by
Markus Scheidgen
Browse files
Added the EMS domain and a new parser skeleton.
parent
1105db4c
Changes
20
Hide whitespace changes
Inline
Side-by-side
nomad-meta-info
@
0d50b13f
Compare
8c917126
...
0d50b13f
Subproject commit
8c917126279cbbecb7adb7e30fc7d641d9a31a78
Subproject commit
0d50b13f66f45891bc441a82af73bb425167ee71
photoemission
@
51ad1335
Compare
d4adc7e3
...
51ad1335
Subproject commit
d4adc7e3266d420ffe12ed43cb1a6d87e8bd4df6
Subproject commit
51ad1335acc2ab3cffac57117a7cfaa63e092103
skeleton
@
7d911fa1
Compare
d8414907
...
7d911fa1
Subproject commit
d8414907d150ee677d4b7ff89bbfcc7e39b159c8
Subproject commit
7d911fa1a65885efb73381b77ee2dd975845c899
python_common
@
567a96be
Compare
d8ee0292
...
567a96be
Subproject commit
d8ee02926a9f37317c880ba3caa31f5140e7730e
Subproject commit
567a96be45394556d94c61cb29e345ebb31a2f9f
nomad/client/local.py
View file @
9cffe14b
...
...
@@ -140,13 +140,13 @@ class CalcProcReproduction:
if
not
parser_backend
.
status
[
0
]
==
'ParseSuccess'
:
self
.
logger
.
error
(
'parsing was not successful'
,
status
=
parser_backend
.
status
)
parser_backend
.
openNonOverlappingSection
(
'section_
calculation
_info'
)
parser_backend
.
openNonOverlappingSection
(
'section_
entry
_info'
)
parser_backend
.
addValue
(
'upload_id'
,
self
.
upload_id
)
parser_backend
.
addValue
(
'calc_id'
,
self
.
calc_id
)
parser_backend
.
addValue
(
'calc_hash'
,
"no hash"
)
parser_backend
.
addValue
(
'main
_
file'
,
self
.
mainfile
)
parser_backend
.
addValue
(
'mainfile'
,
self
.
mainfile
)
parser_backend
.
addValue
(
'parser_name'
,
parser
.
__class__
.
__name__
)
parser_backend
.
closeNonOverlappingSection
(
'section_
calculation
_info'
)
parser_backend
.
closeNonOverlappingSection
(
'section_
entry
_info'
)
self
.
logger
.
info
(
'ran parser'
)
return
parser_backend
...
...
nomad/datamodel/__init__.py
View file @
9cffe14b
...
...
@@ -48,7 +48,9 @@ about ids, user metadata, etc. To define domain specific quantities the classes
import
sys
from
nomad.datamodel.base
import
UploadWithMetadata
,
CalcWithMetadata
,
Domain
from
nomad.datamodel
import
ems
,
dft
from
nomad.datamodel.dft
import
DFTCalcWithMetadata
from
nomad.datamodel.ems
import
EMSEntryWithMetadata
# Override the CalcWithMetadata with the domain specific decendant
setattr
(
sys
.
modules
[
'nomad.datamodel'
],
'CalcWithMetadata'
,
Domain
.
instance
.
domain_entry_class
)
nomad/datamodel/base.py
View file @
9cffe14b
...
...
@@ -212,38 +212,40 @@ class Domain:
instances of :class:`DomainQuantity`.
"""
instance
:
'Domain'
=
None
instances
:
Dict
[
str
,
'Domain'
]
=
{}
def
__init__
(
self
,
name
:
str
,
domain_entry_class
:
Type
[
CalcWithMetadata
],
quantities
:
Dict
[
str
,
DomainQuantity
])
->
None
:
assert
Domain
.
instance
is
None
,
'you can only define one domain.'
quantities
:
Dict
[
str
,
DomainQuantity
],
root_sections
=
[
'section_run'
,
'section_entry_info'
])
->
None
:
if
name
==
config
.
domain
:
assert
Domain
.
instance
is
None
,
'you can only define one domain.'
Domain
.
instance
=
self
Domain
.
instances
[
name
]
=
self
self
.
name
=
name
self
.
domain_entry_class
=
domain_entry_class
self
.
quantities
:
List
[
DomainQuantity
]
=
[]
self
.
root_sections
=
root_sections
reference_domain_calc
=
domain_entry_class
()
reference_general_calc
=
CalcWithMetadata
()
for
name
,
value
in
reference_domain_calc
.
__dict__
.
items
():
if
not
hasattr
(
reference_general_calc
,
name
):
quantity
=
quantities
.
get
(
name
,
None
)
for
quantity_
name
,
value
in
reference_domain_calc
.
__dict__
.
items
():
if
not
hasattr
(
reference_general_calc
,
quantity_
name
):
quantity
=
quantities
.
get
(
quantity_
name
,
None
)
if
quantity
is
None
:
quantity
=
DomainQuantity
()
quantities
[
name
]
=
quantity
quantity
.
name
=
name
quantities
[
quantity_
name
]
=
quantity
quantity
.
name
=
quantity_
name
quantity
.
multi
=
isinstance
(
value
,
list
)
self
.
quantities
.
append
(
quantity
)
for
name
in
quantities
.
keys
():
assert
hasattr
(
reference_domain_calc
,
name
)
and
not
hasattr
(
reference_general_calc
,
name
),
\
for
quantity_
name
in
quantities
.
keys
():
assert
hasattr
(
reference_domain_calc
,
quantity_
name
)
and
not
hasattr
(
reference_general_calc
,
quantity_
name
),
\
'quantity does not exist or overrides general non domain quantity'
assert
any
(
quantity
.
order_default
for
quantity
in
Domain
.
instance
.
quantities
),
\
assert
any
(
quantity
.
order_default
for
quantity
in
Domain
.
instance
s
[
name
]
.
quantities
),
\
'you need to define a order default quantity'
@
property
...
...
@@ -278,3 +280,30 @@ class Domain:
def
aggregations_names
(
self
)
->
Iterable
[
str
]:
""" Just the names of all metrics. """
return
list
(
self
.
aggregations
.
keys
())
def
get_optional_backend_value
(
backend
,
key
,
section
,
unavailable_value
=
None
,
logger
=
None
):
# Section is section_system, section_symmetry, etc...
val
=
None
# Initialize to None, so we can compare section values.
# Loop over the sections with the name section in the backend.
for
section_index
in
backend
.
get_sections
(
section
):
try
:
new_val
=
backend
.
get_value
(
key
,
section_index
)
except
KeyError
:
new_val
=
None
# Compare values from iterations.
if
val
is
not
None
and
new_val
is
not
None
:
if
val
.
__repr__
()
!=
new_val
.
__repr__
()
and
logger
:
logger
.
warning
(
'The values for %s differ between different %s: %s vs %s'
%
(
key
,
section
,
str
(
val
),
str
(
new_val
)))
val
=
new_val
if
new_val
is
not
None
else
val
if
val
is
None
and
logger
:
logger
.
warning
(
'The values for %s where not available in any %s'
%
(
key
,
section
))
return
unavailable_value
if
unavailable_value
is
not
None
else
config
.
services
.
unavailable_value
else
:
return
val
nomad/datamodel/dft.py
View file @
9cffe14b
...
...
@@ -22,7 +22,7 @@ from elasticsearch_dsl import Integer
from
nomad
import
utils
,
config
from
.base
import
CalcWithMetadata
,
DomainQuantity
,
Domain
from
.base
import
CalcWithMetadata
,
DomainQuantity
,
Domain
,
get_optional_backend_value
xc_treatments
=
{
...
...
@@ -92,58 +92,30 @@ class DFTCalcWithMetadata(CalcWithMetadata):
logger
=
utils
.
get_logger
(
__name__
).
bind
(
upload_id
=
self
.
upload_id
,
calc_id
=
self
.
calc_id
,
mainfile
=
self
.
mainfile
)
def
get_optional_value
(
key
,
section
,
unavailable_value
=
None
):
# Section is section_system, section_symmetry, etc...
val
=
None
# Initialize to None, so we can compare section values.
# Loop over the sections with the name section in the backend.
for
section_index
in
backend
.
get_sections
(
section
):
try
:
new_val
=
backend
.
get_value
(
key
,
section_index
)
except
KeyError
:
new_val
=
None
# Compare values from iterations.
if
val
is
not
None
and
new_val
is
not
None
:
if
val
.
__repr__
()
!=
new_val
.
__repr__
():
logger
.
warning
(
'The values for %s differ between different %s: %s vs %s'
%
(
key
,
section
,
str
(
val
),
str
(
new_val
)))
val
=
new_val
if
new_val
is
not
None
else
val
if
val
is
None
:
logger
.
warning
(
'The values for %s where not available in any %s'
%
(
key
,
section
))
return
unavailable_value
if
unavailable_value
is
not
None
else
config
.
services
.
unavailable_value
else
:
return
val
if
self
.
calc_id
is
None
:
self
.
calc_id
=
backend
.
get_value
(
'calc_id'
)
if
self
.
upload_id
is
None
:
self
.
upload_id
=
backend
.
get_value
(
'upload_id'
)
if
self
.
mainfile
is
None
:
self
.
mainfile
=
backend
.
get_value
(
'main_file'
)
self
.
code_name
=
backend
.
get_value
(
'program_name'
,
0
)
self
.
code_version
=
simplify_version
(
backend
.
get_value
(
'program_version'
,
0
))
self
.
atoms
=
get_optional_
value
(
'atom_labels'
,
'section_system'
)
self
.
atoms
=
get_optional_
backend_value
(
backend
,
'atom_labels'
,
'section_system'
,
logger
=
logger
)
if
hasattr
(
self
.
atoms
,
'tolist'
):
self
.
atoms
=
self
.
atoms
.
tolist
()
self
.
n_atoms
=
len
(
self
.
atoms
)
self
.
atoms
=
list
(
set
(
self
.
atoms
))
self
.
atoms
.
sort
()
self
.
crystal_system
=
get_optional_value
(
'crystal_system'
,
'section_symmetry'
)
self
.
spacegroup
=
get_optional_value
(
'space_group_number'
,
'section_symmetry'
,
0
)
self
.
spacegroup_symbol
=
get_optional_value
(
'international_short_symbol'
,
'section_symmetry'
,
0
)
self
.
crystal_system
=
get_optional_backend_value
(
backend
,
'crystal_system'
,
'section_symmetry'
,
logger
=
logger
)
self
.
spacegroup
=
get_optional_backend_value
(
backend
,
'space_group_number'
,
'section_symmetry'
,
0
,
logger
=
logger
)
self
.
spacegroup_symbol
=
get_optional_backend_value
(
backend
,
'international_short_symbol'
,
'section_symmetry'
,
0
,
logger
=
logger
)
self
.
basis_set
=
map_basis_set_to_basis_set_label
(
get_optional_value
(
'program_basis_set_type'
,
'section_run'
))
self
.
system
=
get_optional_value
(
'system_type'
,
'section_system'
)
self
.
formula
=
get_optional_value
(
'chemical_composition_bulk_reduced'
,
'section_system'
)
get_optional_backend_value
(
backend
,
'program_basis_set_type'
,
'section_run'
,
logger
=
logger
))
self
.
system
=
get_optional_backend_value
(
backend
,
'system_type'
,
'section_system'
,
logger
=
logger
)
self
.
formula
=
get_optional_backend_value
(
backend
,
'chemical_composition_bulk_reduced'
,
'section_system'
,
logger
=
logger
)
self
.
xc_functional
=
map_functional_name_to_xc_treatment
(
get_optional_
value
(
'XC_functional_name'
,
'section_method'
))
get_optional_
backend_value
(
backend
,
'XC_functional_name'
,
'section_method'
,
logger
=
logger
))
self
.
group_hash
=
utils
.
hash
(
self
.
formula
,
...
...
@@ -202,6 +174,10 @@ Domain('DFT', DFTCalcWithMetadata, quantities=dict(
'Hashes that describe unique geometries simulated by this code run.'
,
metric
=
(
'geometries'
,
'cardinality'
)
),
quantities
=
DomainQuantity
(
'All quantities that are used by this calculation'
,
metric
=
(
'quantities'
,
'value_count'
)
),
n_total_energies
=
DomainQuantity
(
'Number of total energy calculations'
,
metric
=
(
'total_energies'
,
'sum'
),
...
...
nomad/datamodel/ems.py
0 → 100644
View file @
9cffe14b
# Copyright 2018 Markus Scheidgen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Experimental material science specific metadata
"""
from
typing
import
List
import
ase.data
from
nomad
import
utils
from
.base
import
CalcWithMetadata
,
DomainQuantity
,
Domain
,
get_optional_backend_value
class
EMSEntryWithMetadata
(
CalcWithMetadata
):
def
__init__
(
self
,
**
kwargs
):
# sample quantities
self
.
formula
:
str
=
None
self
.
atoms
:
List
[
str
]
=
[]
self
.
n_atoms
:
int
=
0
self
.
chemical
:
str
=
None
# general metadata
self
.
location
:
str
=
None
self
.
experiment_time
:
str
=
None
self
.
method
:
str
=
None
self
.
quantities
=
[]
self
.
group_hash
:
str
=
None
super
().
__init__
(
**
kwargs
)
def
apply_domain_metadata
(
self
,
backend
):
logger
=
utils
.
get_logger
(
__name__
).
bind
(
upload_id
=
self
.
upload_id
,
calc_id
=
self
.
calc_id
,
mainfile
=
self
.
mainfile
)
self
.
formula
=
get_optional_backend_value
(
backend
,
'sample_formula'
,
'section_sample'
,
logger
=
logger
)
self
.
atoms
=
get_optional_backend_value
(
backend
,
'sample_atoms'
,
'section_sample'
,
logger
=
logger
)
if
hasattr
(
self
.
atoms
,
'tolist'
):
self
.
atoms
=
self
.
atoms
.
tolist
()
self
.
n_atoms
=
len
(
self
.
atoms
)
self
.
atoms
=
list
(
set
(
self
.
atoms
))
self
.
atoms
.
sort
()
self
.
chemical
=
get_optional_backend_value
(
backend
,
'sample_formula'
,
'section_sample'
,
logger
=
logger
)
self
.
location
=
get_optional_backend_value
(
backend
,
'experiment_location'
,
'section_experiment'
,
logger
=
logger
)
self
.
experiment_time
=
get_optional_backend_value
(
backend
,
'experiment_time'
,
'section_experiment'
,
logger
=
logger
)
self
.
method
=
get_optional_backend_value
(
backend
,
'experiment_method'
,
'section_experiment'
,
logger
=
logger
)
self
.
group_hash
=
utils
.
hash
(
self
.
formula
,
self
.
method
,
self
.
location
,
self
.
with_embargo
,
self
.
comment
,
self
.
references
,
self
.
uploader
,
self
.
coauthors
)
quantities
=
set
()
for
meta_info
,
_
,
_
in
backend
.
_delegate
.
results
.
traverse
(
root_section
=
'section_experiment'
):
quantities
.
add
(
meta_info
)
self
.
quantities
=
list
(
quantities
)
Domain
(
'EMS'
,
EMSEntryWithMetadata
,
root_sections
=
[
'section_experiment'
,
'section_entry_info'
],
quantities
=
dict
(
formula
=
DomainQuantity
(
'The chemical (hill) formula of the simulated system.'
,
order_default
=
True
),
atoms
=
DomainQuantity
(
'The atom labels of all atoms in the simulated system.'
,
aggregations
=
len
(
ase
.
data
.
chemical_symbols
)),
method
=
DomainQuantity
(
'The experimental method used.'
,
aggregations
=
20
),
location
=
DomainQuantity
(
'The used basis set functions.'
,
aggregations
=
10
),
quantities
=
DomainQuantity
(
'All quantities that are used by this calculation'
,
metric
=
(
'quantities'
,
'value_count'
)
)))
nomad/normalizing/normalizer.py
View file @
9cffe14b
...
...
@@ -31,10 +31,13 @@ class Normalizer(metaclass=ABCMeta):
for read and write. Normalizer instances are reused.
Arguments:
backend:
t
he backend used to read and write data from and to
backend:
T
he backend used to read and write data from and to
.
"""
def
__init__
(
self
,
backend
:
AbstractParserBackend
)
->
None
:
domain
=
'DFT'
""" The domain this normalizer should be used in. Default for all normalizer is 'DFT'. """
def
__init__
(
self
,
backend
:
AbstractParserBackend
)
->
None
:
self
.
_backend
=
backend
self
.
logger
=
get_logger
(
__name__
)
...
...
nomad/parsing/__init__.py
View file @
9cffe14b
...
...
@@ -99,9 +99,10 @@ def match_parser(mainfile: str, upload_files: files.StagingUploadFiles) -> 'Pars
mime_type
=
magic
.
from_buffer
(
buffer
,
mime
=
True
)
for
parser
in
parsers
:
if
parser
.
is_mainfile
(
mainfile_path
,
mime_type
,
buffer
,
compression
):
# TODO: deal with multiple possible parser specs
return
parser
if
parser
.
domain
==
config
.
domain
:
if
parser
.
is_mainfile
(
mainfile_path
,
mime_type
,
buffer
,
compression
):
# TODO: deal with multiple possible parser specs
return
parser
return
None
...
...
@@ -358,6 +359,12 @@ parsers = [
mainfile_contents_re
=
(
r
'\s*(P?<progr>[a-zA-z0-9_]+)\s*(?:\([^()]+\))\s*:\s*TURBOMOLE\s*(P?<version>.*)'
r
'\s*Copyright \(C\) [0-9]+ TURBOMOLE GmbH, Karlsruhe'
)
),
LegacyParser
(
name
=
'parsers/skeleton'
,
code_name
=
'skeleton'
,
domain
=
'EMS'
,
parser_class_name
=
'skeletonparser.SkeletonParserInterface'
,
mainfile_mime_re
=
r
'(application/json)|(text/.*)'
,
mainfile_contents_re
=
(
r
'skeleton experimental metadata format'
)
)
]
...
...
nomad/parsing/parser.py
View file @
9cffe14b
...
...
@@ -34,6 +34,9 @@ class Parser(metaclass=ABCMeta):
and extracted files. Further, allows to run the parser on those 'main files'.
"""
def
__init__
(
self
):
self
.
domain
=
'DFT'
@
abstractmethod
def
is_mainfile
(
self
,
filename
:
str
,
mime
:
str
,
buffer
:
bytes
,
compression
:
str
=
None
)
->
bool
:
"""
...
...
@@ -76,6 +79,7 @@ class LegacyParser(Parser):
mainfile_contents_re: A regexp that is used to match the first 1024 bytes of a
potential mainfile.
mainfile_name_re: A regexp that is used to match the paths of potential mainfiles
domain: The domain that this parser should be used for. Default is 'DFT'.
supported_compressions: A list of [gz, bz2], if the parser supports compressed files
"""
def
__init__
(
...
...
@@ -83,10 +87,14 @@ class LegacyParser(Parser):
mainfile_contents_re
:
str
=
None
,
mainfile_mime_re
:
str
=
r
'text/.*'
,
mainfile_name_re
:
str
=
r
'.*'
,
domain
=
'DFT'
,
supported_compressions
:
List
[
str
]
=
[])
->
None
:
super
().
__init__
()
self
.
name
=
name
self
.
parser_class_name
=
parser_class_name
self
.
domain
=
domain
self
.
_mainfile_mime_re
=
re
.
compile
(
mainfile_mime_re
)
self
.
_mainfile_name_re
=
re
.
compile
(
mainfile_name_re
)
# Assign private variable this way to avoid static check issue.
...
...
nomad/processing/data.py
View file @
9cffe14b
...
...
@@ -38,7 +38,7 @@ from nomad.files import PathObject, UploadFiles, ExtractError, ArchiveBasedStagi
from
nomad.processing.base
import
Proc
,
process
,
task
,
PENDING
,
SUCCESS
,
FAILURE
from
nomad.parsing
import
parser_dict
,
match_parser
,
LocalBackend
from
nomad.normalizing
import
normalizers
from
nomad.datamodel
import
UploadWithMetadata
,
CalcWithMetadata
from
nomad.datamodel
import
UploadWithMetadata
,
CalcWithMetadata
,
Domain
class
Calc
(
Proc
):
...
...
@@ -213,6 +213,7 @@ class Calc(Proc):
context
=
dict
(
parser
=
self
.
parser
,
step
=
self
.
parser
)
logger
=
self
.
get_logger
(
**
context
)
parser
=
parser_dict
[
self
.
parser
]
self
.
metadata
[
'parser_name'
]
=
self
.
parser
with
utils
.
timer
(
logger
,
'parser executed'
,
input_size
=
self
.
mainfile_file
.
size
):
try
:
...
...
@@ -226,29 +227,29 @@ class Calc(Proc):
# add the non code specific calc metadata to the backend
# all other quantities have been determined by parsers/normalizers
self
.
_parser_backend
.
openNonOverlappingSection
(
'section_
calculation
_info'
)
self
.
_parser_backend
.
openNonOverlappingSection
(
'section_
entry
_info'
)
self
.
_parser_backend
.
addValue
(
'upload_id'
,
self
.
upload_id
)
self
.
_parser_backend
.
addValue
(
'calc_id'
,
self
.
calc_id
)
self
.
_parser_backend
.
addValue
(
'calc_hash'
,
self
.
metadata
[
'calc_hash'
])
self
.
_parser_backend
.
addValue
(
'main
_
file'
,
self
.
mainfile
)
self
.
_parser_backend
.
addValue
(
'mainfile'
,
self
.
mainfile
)
self
.
_parser_backend
.
addValue
(
'parser_name'
,
self
.
parser
)
filepaths
=
self
.
metadata
[
'files'
]
self
.
_parser_backend
.
addValue
(
'number_of_files'
,
len
(
filepaths
))
self
.
_parser_backend
.
addValue
(
'filepaths'
,
filepaths
)
uploader
=
self
.
upload
.
uploader
self
.
_parser_backend
.
addValue
(
'entry_uploader_name'
,
'%s, %s'
%
(
uploader
.
first_name
,
uploader
.
last_name
))
self
.
_parser_backend
.
addValue
(
'entry_uploader_id'
,
str
(
uploader
.
user_id
))
self
.
_parser_backend
.
addValue
(
'entry_upload_time'
,
int
(
self
.
upload
.
upload_time
.
timestamp
()))
self
.
_parser_backend
.
closeNonOverlappingSection
(
'section_entry_info'
)
self
.
add_processor_info
(
self
.
parser
)
if
self
.
_parser_backend
.
status
[
0
]
!=
'ParseSuccess'
:
logger
.
error
(
self
.
_parser_backend
.
status
[
1
])
error
=
self
.
_parser_backend
.
status
[
1
]
self
.
_parser_backend
.
addValue
(
'parse_status'
,
'ParseFailure'
)
self
.
fail
(
error
,
level
=
logging
.
INFO
,
**
context
)
else
:
self
.
_parser_backend
.
addValue
(
'parse_status'
,
'ParseSuccess'
)
self
.
_parser_backend
.
closeNonOverlappingSection
(
'section_calculation_info'
)
self
.
_parser_backend
.
openNonOverlappingSection
(
'section_repository_info'
)
self
.
_parser_backend
.
addValue
(
'repository_archive_gid'
,
'%s/%s'
%
(
self
.
upload_id
,
self
.
calc_id
))
self
.
_parser_backend
.
addValue
(
'repository_filepaths'
,
self
.
metadata
[
'files'
])
self
.
_parser_backend
.
closeNonOverlappingSection
(
'section_repository_info'
)
self
.
add_processor_info
(
self
.
parser
)
@
contextmanager
def
use_parser_backend
(
self
,
processor_name
):
...
...
@@ -257,7 +258,7 @@ class Calc(Proc):
self
.
add_processor_info
(
processor_name
)
def
add_processor_info
(
self
,
processor_name
:
str
)
->
None
:
self
.
_parser_backend
.
openContext
(
'/section_
calculation
_info/0'
)
self
.
_parser_backend
.
openContext
(
'/section_
entry
_info/0'
)
self
.
_parser_backend
.
openNonOverlappingSection
(
'section_archive_processing_info'
)
self
.
_parser_backend
.
addValue
(
'archive_processor_name'
,
processor_name
)
...
...
@@ -265,7 +266,7 @@ class Calc(Proc):
warnings
=
getattr
(
self
.
_parser_backend
,
'_warnings'
,
[])
if
len
(
warnings
)
>
0
:
self
.
_parser_backend
.
addValue
(
'archive_processor_status'
,
'WithWarnings'
)
self
.
_parser_backend
.
addValue
(
'archive_processor_warning
_number
'
,
len
(
warnings
))
self
.
_parser_backend
.
addValue
(
'
number_of_
archive_processor_warning
s
'
,
len
(
warnings
))
self
.
_parser_backend
.
addArrayValues
(
'archive_processor_warnings'
,
[
str
(
warning
)
for
warning
in
warnings
])
else
:
self
.
_parser_backend
.
addValue
(
'archive_processor_status'
,
'Success'
)
...
...
@@ -274,11 +275,14 @@ class Calc(Proc):
self
.
_parser_backend
.
addValue
(
'archive_processor_error'
,
str
(
errors
))
self
.
_parser_backend
.
closeNonOverlappingSection
(
'section_archive_processing_info'
)
self
.
_parser_backend
.
closeContext
(
'/section_
calculation
_info/0'
)
self
.
_parser_backend
.
closeContext
(
'/section_
entry
_info/0'
)
@
task
def
normalizing
(
self
):
for
normalizer
in
normalizers
:
if
normalizer
.
domain
!=
config
.
domain
:
continue
normalizer_name
=
normalizer
.
__name__
context
=
dict
(
normalizer
=
normalizer_name
,
step
=
normalizer_name
)
logger
=
self
.
get_logger
(
**
context
)
...
...
@@ -310,7 +314,6 @@ class Calc(Proc):
calc_with_metadata
=
CalcWithMetadata
(
**
self
.
metadata
)
calc_with_metadata
.
apply_domain_metadata
(
self
.
_parser_backend
)
calc_with_metadata
.
parser_name
=
self
.
parser
calc_with_metadata
.
processed
=
True
# persist the calc metadata
...
...
@@ -326,7 +329,7 @@ class Calc(Proc):
logger
,
'archived'
,
step
=
'archive'
,
input_size
=
self
.
mainfile_file
.
size
)
as
log_data
:
with
self
.
upload_files
.
archive_file
(
self
.
calc_id
,
'wt'
)
as
out
:
self
.
_parser_backend
.
write_json
(
out
,
pretty
=
True
)
self
.
_parser_backend
.
write_json
(
out
,
pretty
=
True
,
root_sections
=
Domain
.
instance
.
root_sections
)
log_data
.
update
(
archive_size
=
self
.
upload_files
.
archive_file_object
(
self
.
calc_id
).
size
)
...
...
ops/scripts/misc.http
View file @
9cffe14b
...
...
@@ -29,7 +29,7 @@ Content-Type: application/json
{
"query": {
"match": {
"published":
tru
e
"published":
fals
e
}
}
}
...
...
tests/data/proc/example_ems.zip
0 → 100644
View file @
9cffe14b
File added
tests/processing/test_data.py
View file @
9cffe14b
...
...
@@ -19,7 +19,7 @@ import os.path
import
json
import
re
from
nomad
import
utils
,
infrastructure
,
config
from
nomad
import
utils
,
infrastructure
,
config
,
datamodel
from
nomad.files
import
UploadFiles
,
StagingUploadFiles
,
PublicUploadFiles
from
nomad.processing
import
Upload
,
Calc
from
nomad.processing.base
import
task
as
task_decorator
,
FAILURE
,
SUCCESS
...
...
@@ -82,7 +82,7 @@ def assert_processing(upload: Upload):
with
upload_files
.
archive_file
(
calc
.
calc_id
)
as
archive_json
:
archive
=
json
.
load
(
archive_json
)
assert
'section_run'
in
archive
assert
'section_
calculation
_info'
in
archive
assert
'section_
entry
_info'
in
archive
with
upload_files
.
archive_log_file
(
calc
.
calc_id
)
as
f
:
assert
'a test'
in
f
.
read
()
...
...
@@ -233,3 +233,17 @@ def test_malicious_parser_task_failure(proc_infra, failure, test_user):
assert
not
calc
.
tasks_running
assert
calc
.
tasks_status
==
FAILURE
assert
len
(
calc
.
errors
)
==
1
def
test_ems_data
(
proc_infra
,
test_user
,
monkeypatch
):
monkeypatch
.
setattr
(
'nomad.config.domain'
,
'EMS'
)
monkeypatch
.
setattr
(
'nomad.datamodel.Domain.instance'
,
datamodel
.
Domain
.
instances
[
'EMS'
])
monkeypatch
.
setattr
(
'nomad.datamodel.CalcWithMetadata'
,
datamodel
.
Domain
.
instance
.
domain_entry_class
)
upload
=
run_processing
((
'test_ems_upload'
,
'tests/data/proc/example_ems.zip'
),
test_user
)
additional_keys
=
[
'method'
,
'location'
,
'experiment_date'
]
assert
upload
.
total_calcs
==
1
assert_upload_files
(
upload
,
StagingUploadFiles
,
additional_keys
,
published
=
False
)
assert_search_upload
(
upload
,
additional_keys
,
published
=
False
)
tests/test_api.py
View file @
9cffe14b
...
...
@@ -554,6 +554,7 @@ class TestRepo():
clear_elastic
(
elastic_infra
)
calc_with_metadata
=
CalcWithMetadata
(
upload_id
=
0
,
calc_id
=
0
)
calc_with_metadata
.
files
=
[
'test/mainfile.txt'
]
calc_with_metadata
.
apply_domain_metadata
(
normalized
)
calc_with_metadata
.
update
(
calc_id
=
'1'
,
uploader
=
test_user
.
to_popo
(),
published
=
True
,
with_embargo
=
False
)
...
...
tests/test_datamodel.py
View file @
9cffe14b
...
...
@@ -12,6 +12,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.
"""
A generator for random test calculations.
"""
import
names
import
random
from
essential_generators
import
DocumentGenerator
...
...
tests/test_parsing.py
View file @
9cffe14b
...
...
@@ -119,19 +119,19 @@ class TestLocalBackend(object):
backend
.
addValue
(
'program_name'
,
't0'
)
backend
.
closeSection
(
'section_run'
,
0
)
g_index
=
backend
.
openSection
(
'section_
calculation
_info'
)
g_index
=
backend
.
openSection
(
'section_
entry
_info'
)
assert
g_index
==
0
backend
.
addValue
(
'parser_name'
,
'p0'
)
backend
.
closeSection
(
'section_
calculation
_info'
,
0
)
backend
.
closeSection
(
'section_
entry
_info'
,
0
)
assert
backend
.
get_sections
(
'section_run'
)
==
[
0
]
assert
backend
.
get_sections
(
'section_
calculation
_info'
)
==
[
0
]
assert
backend
.
get_sections
(
'section_
entry
_info'
)
==
[
0
]
output
=
StringIO
()
backend
.
write_json
(
output
)
archive
=
json
.
loads
(
output
.
getvalue
())
assert
'section_run'
in
archive
assert
'section_
calculation
_info'
in
archive
assert
'section_
entry
_info'
in
archive