Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
nomad-lab
nomad-FAIR
Commits
fc32f513
Commit
fc32f513
authored
Jan 08, 2021
by
Markus Scheidgen
Browse files
Added index transformation to fix optimade entries.
#450
#461
parent
c52c39c5
Changes
4
Hide whitespace changes
Inline
Side-by-side
nomad/cli/admin/admin.py
View file @
fc32f513
...
...
@@ -192,44 +192,6 @@ def lift_embargo(dry, parallel):
__run_processing
(
uploads_to_repack
,
parallel
,
lambda
upload
:
upload
.
re_pack
(),
're-packing'
)
@
admin
.
command
(
help
=
'(Re-)index all calcs.'
)
@
click
.
option
(
'--threads'
,
type
=
int
,
default
=
1
,
help
=
'Number of threads to use.'
)
@
click
.
option
(
'--dry'
,
is_flag
=
True
,
help
=
'Do not index, just compute entries.'
)
def
index
(
threads
,
dry
):
infrastructure
.
setup_mongo
()
infrastructure
.
setup_elastic
()
all_calcs
=
proc
.
Calc
.
objects
().
count
()
print
(
'indexing %d ...'
%
all_calcs
)
def
elastic_updates
():
with
utils
.
ETA
(
all_calcs
,
' index %10d or %10d calcs, ETA %s'
)
as
eta
:
for
calc
in
proc
.
Calc
.
objects
():
eta
.
add
()
entry_metadata
=
datamodel
.
EntryMetadata
.
m_from_dict
(
calc
.
metadata
)
entry
=
entry_metadata
.
a_elastic
.
create_index_entry
().
to_dict
(
include_meta
=
True
)
entry
[
'_op_type'
]
=
'index'
yield
entry
if
dry
:
for
_
in
elastic_updates
():
pass
else
:
if
threads
>
1
:
print
(
' use %d threads'
%
threads
)
for
_
in
elasticsearch
.
helpers
.
parallel_bulk
(
infrastructure
.
elastic_client
,
elastic_updates
(),
chunk_size
=
500
,
thread_count
=
threads
):
pass
else
:
elasticsearch
.
helpers
.
bulk
(
infrastructure
.
elastic_client
,
elastic_updates
())
search
.
refresh
()
print
(
''
)
print
(
'indexing completed'
)
@
admin
.
command
()
@
click
.
option
(
'--threads'
,
type
=
int
,
default
=
1
,
help
=
'Number of threads to use.'
)
@
click
.
option
(
'--code'
,
multiple
=
True
,
type
=
str
,
help
=
'Index only calculcations of given codes.'
)
...
...
nomad/cli/admin/uploads.py
View file @
fc32f513
...
...
@@ -247,19 +247,32 @@ def reset(ctx, uploads, with_calcs):
@
uploads
.
command
(
help
=
'(Re-)index all calcs of the given uploads.'
)
@
click
.
argument
(
'UPLOADS'
,
nargs
=-
1
)
@
click
.
option
(
'--parallel'
,
default
=
1
,
type
=
int
,
help
=
'Use the given amount of parallel processes. Default is 1.'
)
@
click
.
option
(
'--transformer'
,
help
=
'Qualified name to a Python function that should be applied to each EntryMetadata.'
)
@
click
.
pass_context
def
index
(
ctx
,
uploads
,
parallel
):
def
index
(
ctx
,
uploads
,
parallel
,
transformer
):
transformer_func
=
None
if
transformer
is
not
None
:
import
importlib
module_name
,
func_name
=
transformer
.
rsplit
(
'.'
,
1
)
module
=
importlib
.
import_module
(
module_name
)
transformer_func
=
getattr
(
module
,
func_name
)
_
,
uploads
=
query_uploads
(
ctx
,
uploads
)
def
transform
(
calcs
):
for
calc
in
calcs
:
try
:
calc
=
transformer_func
(
calc
)
except
Exception
as
e
:
import
traceback
traceback
.
print_exc
()
print
(
f
' ERROR failed to transform calc (stop transforming for upload):
{
str
(
e
)
}
'
)
break
def
index_upload
(
upload
,
logger
):
with
upload
.
entries_metadata
()
as
calcs
:
# This is just a temporary fix to update the group hash without re-processing
try
:
for
calc
in
calcs
:
if
calc
.
dft
is
not
None
:
calc
.
dft
.
update_group_hash
()
except
Exception
:
pass
if
transformer
is
not
None
:
transform
(
calcs
)
failed
=
search
.
index_all
(
calcs
)
if
failed
>
0
:
print
(
' WARNING failed to index %d entries'
%
failed
)
...
...
nomad/normalizing/optimade.py
View file @
fc32f513
...
...
@@ -33,6 +33,30 @@ from nomad.datamodel.metainfo.public import section_system
species_re
=
re
.
compile
(
r
'^([A-Z][a-z]?)(\d*)$'
)
def
transform_to_v1
(
entry
:
EntryMetadata
)
->
EntryMetadata
:
'''
Transformation function to use during re-indexing of entries with outdated optimade
format. Fixes formulas and periodic dimensions, removed entries with X in formula.
'''
optimade
=
entry
.
dft
.
optimade
if
entry
.
dft
is
not
None
else
None
if
optimade
is
None
:
return
entry
if
'X'
in
optimade
.
chemical_formula_reduced
:
entry
.
dft
.
m_remove_sub_section
(
DFTMetadata
.
optimade
,
-
1
)
return
entry
optimade
.
chemical_formula_reduced
=
optimade_chemical_formula_reduced
(
optimade
.
chemical_formula_reduced
)
optimade
.
chemical_formula_anonymous
=
optimade_chemical_formula_anonymous
(
optimade
.
chemical_formula_reduced
)
optimade
.
chemical_formula_hill
=
optimade_chemical_formula_hill
(
optimade
.
chemical_formula_hill
)
optimade
.
chemical_formula_descriptive
=
optimade
.
chemical_formula_hill
dimension_types
=
optimade
.
dimension_types
if
isinstance
(
dimension_types
,
int
):
optimade
.
dimension_types
=
[
1
]
*
dimension_types
+
[
0
]
*
(
3
-
dimension_types
)
return
entry
def
optimade_chemical_formula_reduced
(
formula
:
str
):
if
formula
is
None
:
return
formula
...
...
tests/test_cli.py
View file @
fc32f513
...
...
@@ -117,21 +117,6 @@ class TestAdmin:
with
files
.
UploadFiles
.
get
(
upload_id
=
upload_id
).
read_archive
(
calc_id
=
calc
.
calc_id
)
as
archive
:
assert
calc
.
calc_id
in
archive
def
test_index
(
self
,
published
):
upload_id
=
published
.
upload_id
calc
=
Calc
.
objects
(
upload_id
=
upload_id
).
first
()
calc
.
metadata
[
'comment'
]
=
'specific'
calc
.
save
()
assert
search
.
SearchRequest
().
search_parameter
(
'comment'
,
'specific'
).
execute
()[
'total'
]
==
0
result
=
click
.
testing
.
CliRunner
().
invoke
(
cli
,
[
'admin'
,
'index'
,
'--threads'
,
'2'
],
catch_exceptions
=
False
)
assert
result
.
exit_code
==
0
assert
'index'
in
result
.
stdout
assert
search
.
SearchRequest
().
search_parameter
(
'comment'
,
'specific'
).
execute
()[
'total'
]
==
1
def
test_delete_entry
(
self
,
published
):
upload_id
=
published
.
upload_id
calc
=
Calc
.
objects
(
upload_id
=
upload_id
).
first
()
...
...
@@ -145,6 +130,11 @@ class TestAdmin:
assert
Calc
.
objects
(
calc_id
=
calc
.
calc_id
).
first
()
is
None
def
transform_for_index_test
(
calc
):
calc
.
comment
=
'specific'
return
calc
@
pytest
.
mark
.
usefixtures
(
'reset_config'
,
'no_warn'
)
class
TestAdminUploads
:
...
...
@@ -236,6 +226,21 @@ class TestAdminUploads:
assert
search
.
SearchRequest
().
search_parameters
(
comment
=
'specific'
).
execute
()[
'total'
]
==
1
def
test_index_with_transform
(
self
,
published
):
upload_id
=
published
.
upload_id
assert
search
.
SearchRequest
().
search_parameters
(
comment
=
'specific'
).
execute
()[
'total'
]
==
0
result
=
click
.
testing
.
CliRunner
().
invoke
(
cli
,
[
'admin'
,
'uploads'
,
'index'
,
'--transformer'
,
'tests.test_cli.transform_for_index_test'
,
upload_id
],
catch_exceptions
=
False
)
assert
result
.
exit_code
==
0
assert
'index'
in
result
.
stdout
assert
search
.
SearchRequest
().
search_parameters
(
comment
=
'specific'
).
execute
()[
'total'
]
==
1
def
test_re_process
(
self
,
published
,
monkeypatch
):
monkeypatch
.
setattr
(
'nomad.config.meta.version'
,
'test_version'
)
upload_id
=
published
.
upload_id
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment