Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
nomad-lab
nomad-FAIR
Commits
64a67b2e
Commit
64a67b2e
authored
Feb 21, 2020
by
Alvin Noe Ladines
Browse files
Fixed archive_query after refactoring
parent
21616509
Pipeline
#69804
passed with stages
in 20 minutes and 55 seconds
Changes
3
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
nomad/app/api/common.py
View file @
64a67b2e
...
...
@@ -126,7 +126,6 @@ def add_search_parameters(request_parser):
action
=
quantity
.
argparse_action
if
quantity
.
multi
else
None
)
def
apply_search_parameters
(
search_request
:
search
.
SearchRequest
,
args
:
Dict
[
str
,
Any
]):
"""
Help that adds query relevant request args to the given SearchRequest.
...
...
nomad/archive_query.py
View file @
64a67b2e
...
...
@@ -20,7 +20,7 @@ json data or as a filename of an existing msgpack database. The metainfo
can then queried by providing a schema.
.. code-block: python
am = ArchiveMetainfo(
"db.msg"
)
am = ArchiveMetainfo(
archive_data
)
for calc in am.calcs:
c.section_run.section_single_configuration_calculation[0]({'energy_total':None})
...
...
@@ -35,16 +35,13 @@ and a query schema similar to the archive json format can be provided to filter
"""
import
numpy
as
np
from
io
import
BytesIO
import
json
import
requests
import
os.path
from
urllib.parse
import
urlparse
from
typing
import
Dict
,
List
,
Any
,
Union
from
typing
import
Dict
,
List
,
Any
from
nomad.metainfo
import
MSection
,
Quantity
,
SubSection
from
nomad.metainfo.metainfo
import
MObjectMeta
# from nomad.archive import ArchiveFileDB
from
nomad
import
config
as
nomad_config
from
nomad.cli.client.client
import
KeycloakAuthenticator
...
...
@@ -53,54 +50,25 @@ class ArchiveMetainfo:
"""
Converts archive data in json format to the new nomad metainfo model
Arguments:
archive_data: the archive data in json format or msgdb filename
archive_schema: dict with the desired quantities as keys and None as placeholder
for the values which are queried from the data
archive_data: the archive data in json format
"""
def
__init__
(
self
,
archive_data
,
archive_schema
=
None
):
def
__init__
(
self
,
archive_data
:
List
[
Dict
[
str
,
Any
]]
):
self
.
_archive_data
=
archive_data
self
.
_archive_schema
=
archive_schema
self
.
metainfo
=
None
self
.
_metacls
=
None
self
.
_calcs
=
{}
self
.
_calc_ids
=
[]
self
.
_archive_db
=
None
self
.
_calcs
:
Dict
[
str
,
MSection
]
=
{}
self
.
_calc_ids
:
List
[
str
]
=
[]
self
.
_base_metacls
=
None
self
.
_base_metainfo
=
None
self
.
_base_data
=
None
self
.
_prefix
=
'calc'
self
.
_load_archive_db
()
self
.
_init_calcs
()
def
_load_archive_db
(
self
):
if
isinstance
(
self
.
_archive_data
,
str
):
self
.
_archive_db
=
ArchiveFileDB
(
self
.
_archive_data
)
else
:
db
=
ArchiveFileDB
(
BytesIO
(),
mode
=
'wb'
)
if
isinstance
(
self
.
_archive_data
,
dict
):
for
calc_id
,
run
in
self
.
_archive_data
.
items
():
db
.
add_data
({
calc_id
:
run
})
elif
isinstance
(
self
.
_archive_data
,
list
):
for
entry
in
self
.
_archive_data
:
if
not
entry
:
continue
db
.
add_data
(
entry
)
db
.
create_db
()
self
.
_archive_db
=
db
@
property
def
archive_schema
(
self
):
return
json
.
loads
(
json
.
dumps
(
self
.
_archive_schema
))
def
_init_calcs
(
self
):
for
i
in
range
(
len
(
self
.
calc_ids
)):
calc_id
=
self
.
calc_ids
[
i
]
if
self
.
_archive_schema
is
None
:
self
.
_calcs
[
calc_id
]
=
self
.
base_metainfo
else
:
data
=
self
.
_archive_db
.
query
({
calc_id
:
self
.
archive_schema
})[
calc_id
]
self
.
_calcs
[
calc_id
]
=
self
.
base_metacls
.
m_from_dict
(
data
)
self
.
_calcs
[
calc_id
].
archive_db
=
self
.
_archive_db
for
calc
in
self
.
_archive_data
:
calc_id
=
list
(
calc
.
keys
())[
0
]
data
=
calc
[
calc_id
]
self
.
_calc_ids
.
append
(
calc_id
)
self
.
_calcs
[
calc_id
]
=
self
.
_build_meta_cls
(
data
,
calc_id
).
m_from_dict
(
data
)
def
__getitem__
(
self
,
key
):
if
isinstance
(
key
,
str
):
...
...
@@ -135,52 +103,8 @@ class ArchiveMetainfo:
raise
StopIteration
calc
=
list
(
self
.
_calcs
.
values
())[
self
.
_n
]
calc
.
calc_id
=
list
(
self
.
_calcs
.
keys
())[
self
.
_n
]
calc
.
archive_db
=
self
.
_archive_db
return
calc
@
staticmethod
def
to_nested_dict
(
path_str
:
Union
[
str
,
List
])
->
Dict
[
str
,
Any
]:
if
isinstance
(
path_str
,
str
):
path_str
=
path_str
.
split
(
'/'
)
if
len
(
path_str
)
==
1
:
return
{
path_str
[
0
]:
'*'
}
else
:
pdict
=
{}
pdict
[
path_str
[
0
]]
=
ArchiveMetainfo
.
to_nested_dict
(
path_str
[
1
:])
return
pdict
@
staticmethod
def
append_data
(
entry
:
Dict
[
str
,
Any
],
val
:
Any
)
->
Dict
[
str
,
Any
]:
for
k
,
v
in
entry
.
items
():
if
not
isinstance
(
v
,
dict
):
entry
[
k
]
=
val
else
:
entry
[
k
]
=
ArchiveMetainfo
.
append_data
(
v
,
val
)
return
entry
@
staticmethod
def
get_path_from_section
(
content
):
path
=
content
.
m_path
()
path
=
path
.
split
(
'/'
)
s
=
''
for
p
in
path
:
try
:
p
=
int
(
p
)
s
+=
'[%s]'
%
p
except
ValueError
:
s
+=
'/%s'
%
p
return
s
[
1
:]
@
staticmethod
def
get_data_from_db
(
content
,
qschema
):
db
=
content
.
m_root
().
archive_db
calc_id
=
content
.
m_root
().
calc_id
root
=
calc_id
+
ArchiveMetainfo
.
get_path_from_section
(
content
)
qs
=
ArchiveMetainfo
.
append_data
(
ArchiveMetainfo
.
to_nested_dict
(
root
),
qschema
)
data
=
db
.
query
(
qs
)
return
data
@
property
def
calcs
(
self
):
"""
...
...
@@ -191,39 +115,13 @@ class ArchiveMetainfo:
self
.
_init_calcs
()
for
calc_id
,
calc
in
self
.
_calcs
.
items
():
calc
.
calc_id
=
calc_id
calc
.
archive_db
=
self
.
_archive_db
yield
calc
@
property
def
calc_ids
(
self
):
if
not
self
.
_calc_ids
:
self
.
_calc_ids
=
[
s
.
strip
()
for
s
in
self
.
_archive_db
.
ids
]
return
self
.
_calc_ids
def
_nullify_metainfo
(
self
,
metainfo
):
if
hasattr
(
metainfo
,
'm_contents'
):
for
content
in
metainfo
.
m_contents
():
self
.
_nullify_metainfo
(
content
)
return
metainfo
def
_nullify_data
(
self
,
data
):
if
not
data
:
return
elif
isinstance
(
data
,
dict
):
for
key
,
val
in
data
.
items
():
data
[
key
]
=
self
.
_nullify_data
(
val
)
elif
isinstance
(
data
,
list
)
and
isinstance
(
data
[
0
],
dict
):
for
i
in
range
(
len
(
data
)):
data
[
i
]
=
self
.
_nullify_data
(
data
[
i
])
else
:
data
=
None
return
data
@
property
def
base_data
(
self
):
if
self
.
_base_data
is
None
:
calc_id
=
self
.
calc_ids
[
0
]
self
.
_base_data
=
self
.
_archive_d
b
.
query
({
calc_id
:
self
.
archive_schema
})
[
calc_id
]
calc_id
=
self
.
_
calc_ids
[
0
]
self
.
_base_data
=
self
.
_archive_d
ata
[
calc_id
]
return
self
.
_base_data
@
property
...
...
@@ -236,23 +134,12 @@ class ArchiveMetainfo:
self
.
_base_metacls
=
self
.
_build_meta_cls
(
self
.
base_data
,
name
)
return
self
.
_base_metacls
@
property
def
base_metainfo
(
self
):
"""
The base metainfo to enable auto completion for each calc
"""
if
self
.
_base_metainfo
is
None
:
metacls
=
self
.
base_metacls
base_data
=
self
.
_nullify_data
(
self
.
base_data
)
self
.
_base_metainfo
=
metacls
.
m_from_dict
(
base_data
)
return
self
.
_base_metainfo
def
get_dtype
(
self
,
data
):
def
_get_dtype
(
self
,
data
):
if
isinstance
(
data
,
np
.
ndarray
):
if
len
(
data
)
==
0
:
dtype
=
int
else
:
dtype
=
self
.
get_dtype
(
data
[
0
])
dtype
=
self
.
_
get_dtype
(
data
[
0
])
else
:
dtype
=
type
(
data
)
return
dtype
...
...
@@ -265,7 +152,7 @@ class ArchiveMetainfo:
else
:
if
isinstance
(
content
,
list
):
content
=
np
.
array
(
content
)
dtype
=
self
.
get_dtype
(
content
)
dtype
=
self
.
_
get_dtype
(
content
)
if
isinstance
(
content
,
np
.
ndarray
):
dtype
=
np
.
dtype
(
dtype
)
shape
=
np
.
shape
(
content
)
...
...
@@ -274,17 +161,10 @@ class ArchiveMetainfo:
return
Quantity
(
type
=
dtype
)
def
_create_section
(
self
,
name
,
contents
):
contents
[
'get'
]
=
ArchiveMetainfo
.
get_data_from_db
section
=
type
(
name
.
title
(),
(
MSection
,),
contents
)
section
.
__call__
=
ArchiveMetainfo
.
get_data_from_db
return
section
def
_build_meta_cls
(
self
,
data
=
None
,
name
=
None
,
return_section
=
True
):
if
name
is
None
:
data
=
self
.
_archive_data
name
=
self
.
_prefix
if
data
is
None
:
return
def
_build_meta_cls
(
self
,
data
,
name
,
return_section
=
True
):
if
isinstance
(
data
,
dict
):
contents
=
{}
for
key
,
val
in
data
.
items
():
...
...
@@ -314,16 +194,9 @@ class ArchiveMetainfo:
else
:
return
self
.
_to_meta_obj
(
data
)
def
to_metainfo
(
self
,
data
=
None
):
if
data
is
None
:
data
=
self
.
_archive_data
self
.
metainfo
=
self
.
base_metacls
.
m_from_dict
(
data
)
class
ArchiveQuery
:
def
__init__
(
self
,
*
args
,
**
kwargs
):
self
.
_archive_path
=
'archive'
self
.
_query_path
=
'query'
self
.
archive_data
=
[]
self
.
_scroll_id
=
None
self
.
_page
=
None
...
...
@@ -332,9 +205,6 @@ class ArchiveQuery:
self
.
_query_params
=
args
[
0
]
if
kwargs
:
self
.
_query_params
.
update
(
kwargs
)
self
.
_archive_schema
=
self
.
_query_params
.
pop
(
'archive_data'
,
None
)
if
not
isinstance
(
self
.
_archive_schema
,
list
):
self
.
_archive_schema
=
[
self
.
_archive_schema
]
self
.
_max_n_pages
=
self
.
_query_params
.
pop
(
'max_n_pages'
,
100000
)
self
.
_authentication
=
self
.
_query_params
.
pop
(
'authentication'
,
None
)
self
.
_url
=
self
.
_query_params
.
pop
(
'url'
,
None
)
...
...
@@ -384,17 +254,14 @@ class ArchiveQuery:
return
self
.
_authentication
def
_api_query
(
self
):
url
=
os
.
path
.
join
(
nomad_config
.
client
.
url
,
self
.
_archive_path
,
self
.
_query_path
)
data
=
self
.
_query_params
data
[
'results'
]
=
self
.
_archive_schema
url
=
os
.
path
.
join
(
nomad_config
.
client
.
url
,
'archive'
,
'query'
)
if
self
.
_page
is
not
None
:
# increment the page number
self
.
_set_value
(
'page'
,
self
.
_page
+
1
,
data
)
if
self
.
_scroll_id
is
not
None
:
self
.
_set_value
(
'scroll_id'
,
self
.
_scroll_id
,
data
)
self
.
_query_params
[
'scroll'
][
'scroll_id'
]
=
self
.
_scroll_id
elif
self
.
_page
is
not
None
:
self
.
_query_params
[
'pagination'
][
'page'
]
=
self
.
_page
+
1
response
=
requests
.
post
(
url
,
headers
=
self
.
_get_authentication
(),
json
=
data
)
response
=
requests
.
post
(
url
,
headers
=
self
.
_get_authentication
(),
json
=
self
.
_query_params
)
if
response
.
status_code
!=
200
:
raise
response
.
raise_for_status
()
...
...
@@ -403,12 +270,8 @@ class ArchiveQuery:
data
=
data
()
results
=
data
.
get
(
'results'
,
[])
scroll
=
data
.
get
(
'Scroll'
,
None
)
if
scroll
:
self
.
_scroll_id
=
scroll
.
get
(
'scroll_id'
,
None
)
pagination
=
data
.
get
(
'Pagination'
,
None
)
if
pagination
:
self
.
_page
=
pagination
.
get
(
'page'
,
None
)
self
.
_scroll_id
=
data
.
get
(
'scroll'
,
{}).
get
(
'scroll_id'
,
None
)
self
.
_page
=
data
.
get
(
'pagination'
,
{}).
get
(
'page'
,
None
)
return
results
...
...
@@ -426,8 +289,10 @@ class ArchiveQuery:
def
query
(
self
):
self
.
_get_archive_data
()
if
self
.
archive_data
:
self
.
metainfo
=
ArchiveMetainfo
(
archive_data
=
self
.
archive_data
,
archive_schema
=
'*'
)
self
.
metainfo
=
ArchiveMetainfo
(
archive_data
=
self
.
archive_data
)
# TODO
# def query()...
\ No newline at end of file
def
query
(
*
args
,
**
kwargs
):
archive_query_obj
=
ArchiveQuery
(
*
args
,
**
kwargs
)
archive_query_obj
.
query
()
return
archive_query_obj
.
metainfo
tests/test_archive_
libra
ry.py
→
tests/test_archive_
que
ry.py
View file @
64a67b2e
import
pytest
import
os
from
nomad.archive
import
ArchiveFileDB
from
nomad.archive_query
import
ArchiveQuery
,
ArchiveMetainfo
from
tests.app.test_app
import
BlueprintClient
@
pytest
.
fixture
(
scope
=
'function'
)
def
example_msgdb
():
def
create_msgdb
(
payload
):
filename
=
'archive_test.msg'
msgdbo
=
ArchiveFileDB
(
filename
,
mode
=
'w'
,
entry_toc_depth
=
1
)
msgdbo
.
add_data
(
payload
)
msgdbo
.
close
()
msgdbo
=
ArchiveFileDB
(
filename
,
mode
=
'r'
)
return
msgdbo
filename
=
'archive_test.msg'
yield
create_msgdb
os
.
remove
(
filename
)
class
TestArchiveMetainfo
:
@
pytest
.
fixture
(
scope
=
'function'
)
def
data
(
self
):
...
...
@@ -30,13 +13,8 @@ class TestArchiveMetainfo:
def
assert_metainfo
(
self
,
metainfo
):
for
calc
in
metainfo
.
calcs
:
assert
calc
.
secA
({
'propA'
:
'*'
})
is
not
None
assert
calc
({
'secA'
:
{
'propA'
:
'*'
,
'propB'
:
'*'
}})
is
not
None
def
test_query_from_file
(
self
,
data
,
example_msgdb
):
_
=
example_msgdb
(
data
)
metainfo
=
ArchiveMetainfo
(
archive_data
=
'archive_test.msg'
,
archive_schema
=
{
'secA'
:
'*'
})
self
.
assert_metainfo
(
metainfo
)
assert
isinstance
(
calc
.
secA
.
propA
,
float
)
assert
calc
.
secA
.
m_to_dict
()
is
not
None
def
test_query_from_data
(
self
,
data
):
metainfo
=
ArchiveMetainfo
(
archive_data
=
data
)
...
...
@@ -51,9 +29,9 @@ class TestArchiveQuery:
def
test_query_from_json
(
self
,
api
,
published_wo_user_metadata
,
test_user_auth
,
monkeypatch
):
monkeypatch
.
setattr
(
'nomad.archive_query.requests'
,
api
)
q_params
=
{
'
P
agination'
:
{
'order'
:
1
,
'per_page'
:
5
}}
q_params
=
{
'
p
agination'
:
{
'order'
:
1
,
'per_page'
:
5
}}
q_schema
=
{
'section_entry_info'
:
'*'
}
q
=
ArchiveQuery
(
q_params
,
archive_dat
a
=
q_schema
,
authentication
=
test_user_auth
)
q
=
ArchiveQuery
(
q_params
,
query_schem
a
=
q_schema
,
authentication
=
test_user_auth
)
q
.
query
()
for
calc
in
q
.
metainfo
:
assert
calc
.
section_entry_info
.
calc_id
is
not
None
...
...
@@ -61,7 +39,9 @@ class TestArchiveQuery:
def
test_query_from_kwargs
(
self
,
api
,
published_wo_user_metadata
,
other_test_user_auth
,
monkeypatch
):
monkeypatch
.
setattr
(
'nomad.archive_query.requests'
,
api
)
q_schema
=
{
'section_entry_info'
:
'*'
}
q
=
ArchiveQuery
(
order
=
1
,
per_page
=
5
,
scroll
=
True
,
archive_data
=
q_schema
,
authentication
=
other_test_user_auth
)
q
=
ArchiveQuery
(
scroll
=
dict
(
scroll
=
True
),
pagination
=
dict
(
per_page
=
5
),
query_schema
=
q_schema
,
authentication
=
other_test_user_auth
)
q
.
query
()
for
calc
in
q
.
metainfo
:
assert
calc
.
section_entry_info
.
calc_id
is
not
None
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment