Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
nomad-lab
nomad-FAIR
Commits
0fb3e3e3
Commit
0fb3e3e3
authored
Jan 08, 2021
by
Markus Scheidgen
Browse files
Refactored dcat arg parsing. Hydra is now sibling of catalog.
parent
e67118fd
Pipeline
#90843
passed with stages
in 29 minutes and 53 seconds
Changes
5
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
nomad/app/dcat/api.py
View file @
0fb3e3e3
# Copyright 2018 Markus Scheidgen
#
# Copyright The NOMAD Authors.
#
# This file is part of NOMAD. See https://nomad-lab.eu for further info.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# distributed under the License is distributed on an
"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from
flask
import
Blueprint
from
flask_restplus
import
Api
from
flask
import
Blueprint
,
Response
from
flask_restplus
import
Api
,
reqparse
import
urllib.parse
from
rdflib
import
Graph
from
nomad
import
config
blueprint
=
Blueprint
(
'dcat'
,
__name__
)
base_url
=
'https://%s/%s/dcat'
%
(
config
.
services
.
api_host
.
strip
(
'/'
),
config
.
services
.
api_base_path
.
strip
(
'/'
))
base_url
=
config
.
api_url
(
api
=
'dcat'
)
def
url
(
*
args
,
**
kwargs
):
''' Returns the full dcat api url for the given path (args) and query (kwargs) parameters. '''
url
=
base_url
+
'/'
+
'/'
.
join
(
args
)
url
=
f
'
{
base_url
.
rstrip
(
"/"
)
}
/
{
"/"
.
join
(
args
).
lstrip
(
"/"
)
}
'
if
len
(
kwargs
)
>
0
:
return
'%s?%s'
%
(
url
,
urllib
.
parse
.
urlencode
(
kwargs
)
)
return
f
'
{
url
}
?
{
urllib
.
parse
.
urlencode
(
kwargs
)
}
'
else
:
return
url
...
...
@@ -48,3 +51,24 @@ api = Api(
def
errorhandler
(
error
):
'''When an internal server error is caused by an unexpected exception.'''
return
str
(
error
)
arg_parser
=
reqparse
.
RequestParser
()
arg_parser
.
add_argument
(
'format'
,
type
=
str
,
choices
=
[
'xml'
,
'n3'
,
'turtle'
,
'nt'
,
'pretty-xml'
,
'trig'
])
def
rdf_respose
(
g
:
Graph
)
->
Response
:
args
=
arg_parser
.
parse_args
()
format_
=
args
.
get
(
'format'
)
if
format_
is
None
:
format_
=
'pretty-xml'
content_type
=
'application/xml'
if
format
in
[
'xml'
,
'pretty-xml'
]
else
'text/%s'
%
format_
return
Response
(
g
.
serialize
(
format
=
format_
).
decode
(
'utf-8'
),
200
,
{
'Content-Type'
:
content_type
})
nomad/app/dcat/catalog.py
View file @
0fb3e3e3
# Copyright 2018 Markus Scheidgen
#
# Copyright The NOMAD Authors.
#
# This file is part of NOMAD. See https://nomad-lab.eu for further info.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# distributed under the License is distributed on an
"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from
flask_restplus
import
Resource
,
reqparse
,
fields
from
flask
import
Response
from
flask_restplus
import
Resource
,
fields
from
elasticsearch_dsl
import
Q
from
nomad
import
search
from
.api
import
api
from
.api
import
api
,
arg_parser
,
rdf_respose
from
.mapping
import
Mapping
ns
=
api
.
namespace
(
'catalog'
,
description
=
'The API for DCAT catalog.'
)
iso8601
=
fields
.
DateTime
(
dt_format
=
'iso8601'
)
arg_parser
=
reqparse
.
RequestParser
()
arg_parser
.
add_argument
(
'format'
,
type
=
str
,
choices
=
[
'xml'
,
'n3'
,
'turtle'
,
'nt'
,
'pretty-xml'
,
'trig'
])
arg_parser
=
arg_parser
.
copy
()
arg_parser
.
add_argument
(
'after'
,
type
=
str
)
arg_parser
.
add_argument
(
'modified_since'
,
type
=
lambda
x
:
iso8601
.
parse
(
x
),
...
...
@@ -51,13 +47,10 @@ class Catalog(Resource):
''' Returns a page of DCAT datasets. '''
args
=
arg_parser
.
parse_args
()
format_
=
args
.
get
(
'format'
)
if
format_
is
None
:
format_
=
'xml'
modified_since
=
args
.
get
(
'modified_since'
,
None
)
modified_since
=
iso8601
.
parse
(
modified_since
)
if
modified_since
is
not
None
else
None
after
=
args
.
get
(
'after'
,
None
)
after
=
args
.
get
(
'after'
,
''
)
if
after
is
None
:
after
=
''
search_request
=
search
.
SearchRequest
().
owner
(
'public'
)
if
modified_since
is
not
None
:
...
...
@@ -67,14 +60,10 @@ class Catalog(Resource):
search_request
.
q
&=
modified_clause
es_search
=
search_request
.
_search
.
query
(
search_request
.
q
)
if
after
is
not
None
:
if
after
is
not
''
:
es_search
=
es_search
.
extra
(
search_after
=
[
after
],
sort
=
'calc_id'
)
es_response
=
es_search
.
execute
()
mapping
=
Mapping
()
mapping
.
map_catalog
(
es_response
.
hits
)
content_type
=
'application/xml'
if
format_
==
'xml'
else
'text/%s'
%
format_
return
Response
(
mapping
.
g
.
serialize
(
format
=
format_
).
decode
(
'utf-8'
),
200
,
{
'Content-Type'
:
content_type
})
mapping
.
map_catalog
(
es_response
.
hits
,
after
,
modified_since
)
return
rdf_respose
(
mapping
.
g
)
nomad/app/dcat/datasets.py
View file @
0fb3e3e3
# Copyright 2018 Markus Scheidgen
#
# Copyright The NOMAD Authors.
#
# This file is part of NOMAD. See https://nomad-lab.eu for further info.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# distributed under the License is distributed on an
"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
flask_restplus
import
Resource
,
abort
,
reqparse
from
flask
import
Response
#
from
flask_restplus
import
Resource
,
abort
from
elasticsearch.exceptions
import
NotFoundError
from
nomad
import
search
from
.api
import
api
from
.api
import
api
,
arg_parser
,
rdf_respose
from
.mapping
import
Mapping
ns
=
api
.
namespace
(
'datasets'
,
description
=
'The API for DCAT datasets.'
)
arg_parser
=
reqparse
.
RequestParser
()
arg_parser
.
add_argument
(
'format'
,
type
=
str
,
choices
=
[
'xml'
,
'n3'
,
'turtle'
,
'nt'
,
'pretty-xml'
,
'trig'
])
@
ns
.
route
(
'/<string:entry_id>'
)
class
Dataset
(
Resource
):
@
api
.
doc
(
'get_dcat_dataset'
)
...
...
@@ -44,11 +36,6 @@ class Dataset(Resource):
@
api
.
response
(
200
,
'Data send'
,
headers
=
{
'Content-Type'
:
'application/xml'
})
def
get
(
self
,
entry_id
):
''' Returns a DCAT dataset for a given NOMAD entry id. '''
format_
=
arg_parser
.
parse_args
().
get
(
'format'
)
if
format_
is
None
:
format_
=
'xml'
try
:
entry
=
search
.
entry_document
.
get
(
entry_id
)
except
NotFoundError
:
...
...
@@ -59,7 +46,4 @@ class Dataset(Resource):
mapping
=
Mapping
()
mapping
.
map_entry
(
entry
)
content_type
=
'application/xml'
if
format_
==
'xml'
else
'text/%s'
%
format_
return
Response
(
mapping
.
g
.
serialize
(
format
=
format_
).
decode
(
'utf-8'
),
200
,
{
'Content-Type'
:
content_type
})
return
rdf_respose
(
mapping
.
g
)
nomad/app/dcat/mapping.py
View file @
0fb3e3e3
# Copyright 2020 Markus Scheidgen
#
# Copyright The NOMAD Authors.
#
# This file is part of NOMAD. See https://nomad-lab.eu for further info.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# distributed under the License is distributed on an
"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from
rdflib
import
Graph
,
Literal
,
RDF
,
URIRef
,
BNode
from
rdflib.namespace
import
Namespace
,
DCAT
,
DCTERMS
as
DCT
,
FOAF
from
rdflib.namespace
import
Namespace
,
DCAT
,
DCTERMS
as
DCT
,
FOAF
,
RDF
from
nomad
import
config
from
nomad.datamodel
import
User
...
...
@@ -22,7 +26,6 @@ from nomad.datamodel import EntryMetadata, User
from
.api
import
url
VCARD
=
Namespace
(
'http://www.w3.org/2006/vcard/ns#'
)
HYDRA
=
Namespace
(
'http://www.w3.org/ns/hydra/core#'
)
...
...
@@ -37,6 +40,7 @@ def get_optional_entry_prop(entry, name):
class
Mapping
():
def
__init__
(
self
):
self
.
g
=
Graph
()
self
.
g
.
namespace_manager
.
bind
(
'rdf'
,
RDF
)
self
.
g
.
namespace_manager
.
bind
(
'dcat'
,
DCAT
)
self
.
g
.
namespace_manager
.
bind
(
'dct'
,
DCT
)
self
.
g
.
namespace_manager
.
bind
(
'vcard'
,
VCARD
)
...
...
@@ -45,25 +49,32 @@ class Mapping():
self
.
persons
=
{}
def
map_catalog
(
self
,
entries
):
catalog
=
URIRef
(
url
(
'catalog'
))
def
map_catalog
(
self
,
entries
,
after
:
str
,
modified_since
):
def
uri_ref
(
after
):
kwargs
=
dict
()
if
after
is
not
None
:
kwargs
[
'after'
]
=
after
if
modified_since
is
not
None
:
kwargs
[
'modified_since'
]
=
modified_since
.
strftime
(
'%Y-%m-%d'
)
return
URIRef
(
url
(
'catalog'
,
**
kwargs
))
after
=
after
.
strip
()
catalog
=
uri_ref
(
after
=
None
)
self
.
g
.
add
((
catalog
,
RDF
.
type
,
DCAT
.
Catalog
))
last_entry
=
None
for
entry
in
entries
:
self
.
g
.
add
((
catalog
,
DCT
.
dataset
,
self
.
map_entry
(
entry
,
slim
=
True
)))
last_entry
=
entry
hydra_collection
=
BNode
(
)
hydra_collection
=
uri_ref
(
after
)
self
.
g
.
add
((
hydra_collection
,
RDF
.
type
,
HYDRA
.
Collection
))
self
.
g
.
add
((
hydra_collection
,
HYDRA
.
totalItems
,
Literal
(
entries
.
total
)))
self
.
g
.
add
((
hydra_collection
,
HYDRA
.
first
,
URIRef
(
'%s/catalog'
%
config
.
api_url
(
api
=
'dcat'
),
last_entry
.
calc_id
)))
self
.
g
.
add
((
hydra_collection
,
HYDRA
.
first
,
uri_ref
(
''
)))
if
last_entry
is
not
None
:
next_url
=
'%s/catalog?after=%s'
%
(
config
.
api_url
(
api
=
'dcat'
),
last_entry
.
calc_id
)
self
.
g
.
add
((
hydra_collection
,
HYDRA
.
next
,
URIRef
(
next_url
)))
self
.
g
.
add
((
hydra_collection
,
HYDRA
.
next
,
uri_ref
(
last_entry
.
calc_id
)))
self
.
g
.
add
((
catalog
,
HYDRA
.
collection
,
hydra_
collection
))
self
.
g
.
add
((
hydra_collection
,
RDF
.
type
,
HYDRA
.
collection
))
def
map_entry
(
self
,
entry
:
EntryMetadata
,
slim
=
False
):
dataset
=
URIRef
(
url
(
'datasets'
,
entry
.
calc_id
))
...
...
tests/app/test_dcat.py
View file @
0fb3e3e3
# Copyright 2018 Markus Scheidgen
#
# Copyright The NOMAD Authors.
#
# This file is part of NOMAD. See https://nomad-lab.eu for further info.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# distributed under the License is distributed on an
"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import
pytest
from
datetime
import
datetime
...
...
@@ -62,7 +66,11 @@ def test_get_dataset(elastic_infra, api, example_entry):
clear_elastic
(
elastic_infra
)
def
test_get_catalog
(
elastic_infra
,
api
,
example_entry
):
@
pytest
.
mark
.
parametrize
(
'after,modified_since'
,
[
(
None
,
None
),
(
None
,
'2020-01-07'
),
(
'test-id-3'
,
'2020-01-07'
)])
def
test_get_catalog
(
elastic_infra
,
api
,
example_entry
,
after
,
modified_since
):
clear_elastic
(
elastic_infra
)
for
i
in
range
(
1
,
11
):
...
...
@@ -73,7 +81,12 @@ def test_get_catalog(elastic_infra, api, example_entry):
infrastructure
.
elastic_client
.
indices
.
refresh
(
index
=
config
.
elastic
.
index_name
)
rv
=
api
.
get
(
'/catalog/?after=test-id-3&modified_since=2020-01-07&format=nt'
)
url
=
'/catalog/?format=turtle'
if
after
:
url
+=
'&after='
+
after
if
modified_since
:
url
+=
'&modified_since='
+
modified_since
rv
=
api
.
get
(
url
)
assert
rv
.
status_code
==
200
clear_elastic
(
elastic_infra
)
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment