Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
nomad-lab
nomad-FAIR
Commits
0fb3e3e3
Commit
0fb3e3e3
authored
Jan 08, 2021
by
Markus Scheidgen
Browse files
Refactored dcat arg parsing. Hydra is now sibling of catalog.
parent
e67118fd
Pipeline
#90843
passed with stages
in 29 minutes and 53 seconds
Changes
5
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
nomad/app/dcat/api.py
View file @
0fb3e3e3
# Copyright 2018 Markus Scheidgen
#
# Copyright The NOMAD Authors.
#
# This file is part of NOMAD. See https://nomad-lab.eu for further info.
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# You may obtain a copy of the License at
#
#
# http://www.apache.org/licenses/LICENSE-2.0
#
http://www.apache.org/licenses/LICENSE-2.0
#
#
# Unless required by applicable law or agreed to in writing, software
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# distributed under the License is distributed on an
"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
#
from
flask
import
Blueprint
from
flask
import
Blueprint
,
Response
from
flask_restplus
import
Api
from
flask_restplus
import
Api
,
reqparse
import
urllib.parse
import
urllib.parse
from
rdflib
import
Graph
from
nomad
import
config
from
nomad
import
config
blueprint
=
Blueprint
(
'dcat'
,
__name__
)
blueprint
=
Blueprint
(
'dcat'
,
__name__
)
base_url
=
'https://%s/%s/dcat'
%
(
base_url
=
config
.
api_url
(
api
=
'dcat'
)
config
.
services
.
api_host
.
strip
(
'/'
),
config
.
services
.
api_base_path
.
strip
(
'/'
))
def
url
(
*
args
,
**
kwargs
):
def
url
(
*
args
,
**
kwargs
):
''' Returns the full dcat api url for the given path (args) and query (kwargs) parameters. '''
''' Returns the full dcat api url for the given path (args) and query (kwargs) parameters. '''
url
=
base_url
+
'/'
+
'/'
.
join
(
args
)
url
=
f
'
{
base_url
.
rstrip
(
"/"
)
}
/
{
"/"
.
join
(
args
).
lstrip
(
"/"
)
}
'
if
len
(
kwargs
)
>
0
:
if
len
(
kwargs
)
>
0
:
return
'%s?%s'
%
(
url
,
urllib
.
parse
.
urlencode
(
kwargs
)
)
return
f
'
{
url
}
?
{
urllib
.
parse
.
urlencode
(
kwargs
)
}
'
else
:
else
:
return
url
return
url
...
@@ -48,3 +51,24 @@ api = Api(
...
@@ -48,3 +51,24 @@ api = Api(
def
errorhandler
(
error
):
def
errorhandler
(
error
):
'''When an internal server error is caused by an unexpected exception.'''
'''When an internal server error is caused by an unexpected exception.'''
return
str
(
error
)
return
str
(
error
)
arg_parser
=
reqparse
.
RequestParser
()
arg_parser
.
add_argument
(
'format'
,
type
=
str
,
choices
=
[
'xml'
,
'n3'
,
'turtle'
,
'nt'
,
'pretty-xml'
,
'trig'
])
def
rdf_respose
(
g
:
Graph
)
->
Response
:
args
=
arg_parser
.
parse_args
()
format_
=
args
.
get
(
'format'
)
if
format_
is
None
:
format_
=
'pretty-xml'
content_type
=
'application/xml'
if
format
in
[
'xml'
,
'pretty-xml'
]
else
'text/%s'
%
format_
return
Response
(
g
.
serialize
(
format
=
format_
).
decode
(
'utf-8'
),
200
,
{
'Content-Type'
:
content_type
})
nomad/app/dcat/catalog.py
View file @
0fb3e3e3
# Copyright 2018 Markus Scheidgen
#
# Copyright The NOMAD Authors.
#
# This file is part of NOMAD. See https://nomad-lab.eu for further info.
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# You may obtain a copy of the License at
#
#
# http://www.apache.org/licenses/LICENSE-2.0
#
http://www.apache.org/licenses/LICENSE-2.0
#
#
# Unless required by applicable law or agreed to in writing, software
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# distributed under the License is distributed on an
"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
#
from
flask_restplus
import
Resource
,
reqparse
,
fields
from
flask_restplus
import
Resource
,
fields
from
flask
import
Response
from
elasticsearch_dsl
import
Q
from
elasticsearch_dsl
import
Q
from
nomad
import
search
from
nomad
import
search
from
.api
import
api
from
.api
import
api
,
arg_parser
,
rdf_respose
from
.mapping
import
Mapping
from
.mapping
import
Mapping
ns
=
api
.
namespace
(
'catalog'
,
description
=
'The API for DCAT catalog.'
)
ns
=
api
.
namespace
(
'catalog'
,
description
=
'The API for DCAT catalog.'
)
iso8601
=
fields
.
DateTime
(
dt_format
=
'iso8601'
)
iso8601
=
fields
.
DateTime
(
dt_format
=
'iso8601'
)
arg_parser
=
reqparse
.
RequestParser
()
arg_parser
=
arg_parser
.
copy
()
arg_parser
.
add_argument
(
'format'
,
type
=
str
,
choices
=
[
'xml'
,
'n3'
,
'turtle'
,
'nt'
,
'pretty-xml'
,
'trig'
])
arg_parser
.
add_argument
(
'after'
,
type
=
str
)
arg_parser
.
add_argument
(
'after'
,
type
=
str
)
arg_parser
.
add_argument
(
arg_parser
.
add_argument
(
'modified_since'
,
type
=
lambda
x
:
iso8601
.
parse
(
x
),
'modified_since'
,
type
=
lambda
x
:
iso8601
.
parse
(
x
),
...
@@ -51,13 +47,10 @@ class Catalog(Resource):
...
@@ -51,13 +47,10 @@ class Catalog(Resource):
''' Returns a page of DCAT datasets. '''
''' Returns a page of DCAT datasets. '''
args
=
arg_parser
.
parse_args
()
args
=
arg_parser
.
parse_args
()
format_
=
args
.
get
(
'format'
)
if
format_
is
None
:
format_
=
'xml'
modified_since
=
args
.
get
(
'modified_since'
,
None
)
modified_since
=
args
.
get
(
'modified_since'
,
None
)
modified_since
=
iso8601
.
parse
(
modified_since
)
if
modified_since
is
not
None
else
None
after
=
args
.
get
(
'after'
,
''
)
after
=
args
.
get
(
'after'
,
None
)
if
after
is
None
:
after
=
''
search_request
=
search
.
SearchRequest
().
owner
(
'public'
)
search_request
=
search
.
SearchRequest
().
owner
(
'public'
)
if
modified_since
is
not
None
:
if
modified_since
is
not
None
:
...
@@ -67,14 +60,10 @@ class Catalog(Resource):
...
@@ -67,14 +60,10 @@ class Catalog(Resource):
search_request
.
q
&=
modified_clause
search_request
.
q
&=
modified_clause
es_search
=
search_request
.
_search
.
query
(
search_request
.
q
)
es_search
=
search_request
.
_search
.
query
(
search_request
.
q
)
if
after
is
not
None
:
if
after
is
not
''
:
es_search
=
es_search
.
extra
(
search_after
=
[
after
],
sort
=
'calc_id'
)
es_search
=
es_search
.
extra
(
search_after
=
[
after
],
sort
=
'calc_id'
)
es_response
=
es_search
.
execute
()
es_response
=
es_search
.
execute
()
mapping
=
Mapping
()
mapping
=
Mapping
()
mapping
.
map_catalog
(
es_response
.
hits
)
mapping
.
map_catalog
(
es_response
.
hits
,
after
,
modified_since
)
content_type
=
'application/xml'
if
format_
==
'xml'
else
'text/%s'
%
format_
return
rdf_respose
(
mapping
.
g
)
return
Response
(
mapping
.
g
.
serialize
(
format
=
format_
).
decode
(
'utf-8'
),
200
,
{
'Content-Type'
:
content_type
})
nomad/app/dcat/datasets.py
View file @
0fb3e3e3
# Copyright 2018 Markus Scheidgen
#
# Copyright The NOMAD Authors.
#
# This file is part of NOMAD. See https://nomad-lab.eu for further info.
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# You may obtain a copy of the License at
#
#
# http://www.apache.org/licenses/LICENSE-2.0
#
http://www.apache.org/licenses/LICENSE-2.0
#
#
# Unless required by applicable law or agreed to in writing, software
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# distributed under the License is distributed on an
"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
#
from
flask_restplus
import
Resource
,
abort
,
reqparse
from
flask_restplus
import
Resource
,
abort
from
flask
import
Response
from
elasticsearch.exceptions
import
NotFoundError
from
elasticsearch.exceptions
import
NotFoundError
from
nomad
import
search
from
nomad
import
search
from
.api
import
api
from
.api
import
api
,
arg_parser
,
rdf_respose
from
.mapping
import
Mapping
from
.mapping
import
Mapping
ns
=
api
.
namespace
(
'datasets'
,
description
=
'The API for DCAT datasets.'
)
ns
=
api
.
namespace
(
'datasets'
,
description
=
'The API for DCAT datasets.'
)
arg_parser
=
reqparse
.
RequestParser
()
arg_parser
.
add_argument
(
'format'
,
type
=
str
,
choices
=
[
'xml'
,
'n3'
,
'turtle'
,
'nt'
,
'pretty-xml'
,
'trig'
])
@
ns
.
route
(
'/<string:entry_id>'
)
@
ns
.
route
(
'/<string:entry_id>'
)
class
Dataset
(
Resource
):
class
Dataset
(
Resource
):
@
api
.
doc
(
'get_dcat_dataset'
)
@
api
.
doc
(
'get_dcat_dataset'
)
...
@@ -44,11 +36,6 @@ class Dataset(Resource):
...
@@ -44,11 +36,6 @@ class Dataset(Resource):
@
api
.
response
(
200
,
'Data send'
,
headers
=
{
'Content-Type'
:
'application/xml'
})
@
api
.
response
(
200
,
'Data send'
,
headers
=
{
'Content-Type'
:
'application/xml'
})
def
get
(
self
,
entry_id
):
def
get
(
self
,
entry_id
):
''' Returns a DCAT dataset for a given NOMAD entry id. '''
''' Returns a DCAT dataset for a given NOMAD entry id. '''
format_
=
arg_parser
.
parse_args
().
get
(
'format'
)
if
format_
is
None
:
format_
=
'xml'
try
:
try
:
entry
=
search
.
entry_document
.
get
(
entry_id
)
entry
=
search
.
entry_document
.
get
(
entry_id
)
except
NotFoundError
:
except
NotFoundError
:
...
@@ -59,7 +46,4 @@ class Dataset(Resource):
...
@@ -59,7 +46,4 @@ class Dataset(Resource):
mapping
=
Mapping
()
mapping
=
Mapping
()
mapping
.
map_entry
(
entry
)
mapping
.
map_entry
(
entry
)
content_type
=
'application/xml'
if
format_
==
'xml'
else
'text/%s'
%
format_
return
rdf_respose
(
mapping
.
g
)
return
Response
(
mapping
.
g
.
serialize
(
format
=
format_
).
decode
(
'utf-8'
),
200
,
{
'Content-Type'
:
content_type
})
nomad/app/dcat/mapping.py
View file @
0fb3e3e3
# Copyright 2020 Markus Scheidgen
#
# Copyright The NOMAD Authors.
#
# This file is part of NOMAD. See https://nomad-lab.eu for further info.
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# You may obtain a copy of the License at
#
#
# http://www.apache.org/licenses/LICENSE-2.0
#
http://www.apache.org/licenses/LICENSE-2.0
#
#
# Unless required by applicable law or agreed to in writing, software
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# distributed under the License is distributed on an
"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
#
from
rdflib
import
Graph
,
Literal
,
RDF
,
URIRef
,
BNode
from
rdflib
import
Graph
,
Literal
,
RDF
,
URIRef
,
BNode
from
rdflib.namespace
import
Namespace
,
DCAT
,
DCTERMS
as
DCT
,
FOAF
from
rdflib.namespace
import
Namespace
,
DCAT
,
DCTERMS
as
DCT
,
FOAF
,
RDF
from
nomad
import
config
from
nomad
import
config
from
nomad.datamodel
import
User
from
nomad.datamodel
import
User
...
@@ -22,7 +26,6 @@ from nomad.datamodel import EntryMetadata, User
...
@@ -22,7 +26,6 @@ from nomad.datamodel import EntryMetadata, User
from
.api
import
url
from
.api
import
url
VCARD
=
Namespace
(
'http://www.w3.org/2006/vcard/ns#'
)
VCARD
=
Namespace
(
'http://www.w3.org/2006/vcard/ns#'
)
HYDRA
=
Namespace
(
'http://www.w3.org/ns/hydra/core#'
)
HYDRA
=
Namespace
(
'http://www.w3.org/ns/hydra/core#'
)
...
@@ -37,6 +40,7 @@ def get_optional_entry_prop(entry, name):
...
@@ -37,6 +40,7 @@ def get_optional_entry_prop(entry, name):
class
Mapping
():
class
Mapping
():
def
__init__
(
self
):
def
__init__
(
self
):
self
.
g
=
Graph
()
self
.
g
=
Graph
()
self
.
g
.
namespace_manager
.
bind
(
'rdf'
,
RDF
)
self
.
g
.
namespace_manager
.
bind
(
'dcat'
,
DCAT
)
self
.
g
.
namespace_manager
.
bind
(
'dcat'
,
DCAT
)
self
.
g
.
namespace_manager
.
bind
(
'dct'
,
DCT
)
self
.
g
.
namespace_manager
.
bind
(
'dct'
,
DCT
)
self
.
g
.
namespace_manager
.
bind
(
'vcard'
,
VCARD
)
self
.
g
.
namespace_manager
.
bind
(
'vcard'
,
VCARD
)
...
@@ -45,25 +49,32 @@ class Mapping():
...
@@ -45,25 +49,32 @@ class Mapping():
self
.
persons
=
{}
self
.
persons
=
{}
def
map_catalog
(
self
,
entries
):
def
map_catalog
(
self
,
entries
,
after
:
str
,
modified_since
):
catalog
=
URIRef
(
url
(
'catalog'
))
def
uri_ref
(
after
):
kwargs
=
dict
()
if
after
is
not
None
:
kwargs
[
'after'
]
=
after
if
modified_since
is
not
None
:
kwargs
[
'modified_since'
]
=
modified_since
.
strftime
(
'%Y-%m-%d'
)
return
URIRef
(
url
(
'catalog'
,
**
kwargs
))
after
=
after
.
strip
()
catalog
=
uri_ref
(
after
=
None
)
self
.
g
.
add
((
catalog
,
RDF
.
type
,
DCAT
.
Catalog
))
self
.
g
.
add
((
catalog
,
RDF
.
type
,
DCAT
.
Catalog
))
last_entry
=
None
last_entry
=
None
for
entry
in
entries
:
for
entry
in
entries
:
self
.
g
.
add
((
catalog
,
DCT
.
dataset
,
self
.
map_entry
(
entry
,
slim
=
True
)))
self
.
g
.
add
((
catalog
,
DCT
.
dataset
,
self
.
map_entry
(
entry
,
slim
=
True
)))
last_entry
=
entry
last_entry
=
entry
hydra_collection
=
BNode
(
)
hydra_collection
=
uri_ref
(
after
)
self
.
g
.
add
((
hydra_collection
,
RDF
.
type
,
HYDRA
.
Collection
))
self
.
g
.
add
((
hydra_collection
,
RDF
.
type
,
HYDRA
.
Collection
))
self
.
g
.
add
((
hydra_collection
,
HYDRA
.
totalItems
,
Literal
(
entries
.
total
)))
self
.
g
.
add
((
hydra_collection
,
HYDRA
.
totalItems
,
Literal
(
entries
.
total
)))
self
.
g
.
add
((
self
.
g
.
add
((
hydra_collection
,
HYDRA
.
first
,
uri_ref
(
''
)))
hydra_collection
,
HYDRA
.
first
,
URIRef
(
'%s/catalog'
%
config
.
api_url
(
api
=
'dcat'
),
last_entry
.
calc_id
)))
if
last_entry
is
not
None
:
if
last_entry
is
not
None
:
next_url
=
'%s/catalog?after=%s'
%
(
config
.
api_url
(
api
=
'dcat'
),
last_entry
.
calc_id
)
self
.
g
.
add
((
hydra_collection
,
HYDRA
.
next
,
uri_ref
(
last_entry
.
calc_id
)))
self
.
g
.
add
((
hydra_collection
,
HYDRA
.
next
,
URIRef
(
next_url
)))
self
.
g
.
add
((
catalog
,
HYDRA
.
collection
,
hydra_
collection
))
self
.
g
.
add
((
hydra_collection
,
RDF
.
type
,
HYDRA
.
collection
))
def
map_entry
(
self
,
entry
:
EntryMetadata
,
slim
=
False
):
def
map_entry
(
self
,
entry
:
EntryMetadata
,
slim
=
False
):
dataset
=
URIRef
(
url
(
'datasets'
,
entry
.
calc_id
))
dataset
=
URIRef
(
url
(
'datasets'
,
entry
.
calc_id
))
...
...
tests/app/test_dcat.py
View file @
0fb3e3e3
# Copyright 2018 Markus Scheidgen
#
# Copyright The NOMAD Authors.
#
# This file is part of NOMAD. See https://nomad-lab.eu for further info.
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# You may obtain a copy of the License at
#
#
# http://www.apache.org/licenses/LICENSE-2.0
#
http://www.apache.org/licenses/LICENSE-2.0
#
#
# Unless required by applicable law or agreed to in writing, software
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# distributed under the License is distributed on an
"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
#
import
pytest
import
pytest
from
datetime
import
datetime
from
datetime
import
datetime
...
@@ -62,7 +66,11 @@ def test_get_dataset(elastic_infra, api, example_entry):
...
@@ -62,7 +66,11 @@ def test_get_dataset(elastic_infra, api, example_entry):
clear_elastic
(
elastic_infra
)
clear_elastic
(
elastic_infra
)
def
test_get_catalog
(
elastic_infra
,
api
,
example_entry
):
@
pytest
.
mark
.
parametrize
(
'after,modified_since'
,
[
(
None
,
None
),
(
None
,
'2020-01-07'
),
(
'test-id-3'
,
'2020-01-07'
)])
def
test_get_catalog
(
elastic_infra
,
api
,
example_entry
,
after
,
modified_since
):
clear_elastic
(
elastic_infra
)
clear_elastic
(
elastic_infra
)
for
i
in
range
(
1
,
11
):
for
i
in
range
(
1
,
11
):
...
@@ -73,7 +81,12 @@ def test_get_catalog(elastic_infra, api, example_entry):
...
@@ -73,7 +81,12 @@ def test_get_catalog(elastic_infra, api, example_entry):
infrastructure
.
elastic_client
.
indices
.
refresh
(
index
=
config
.
elastic
.
index_name
)
infrastructure
.
elastic_client
.
indices
.
refresh
(
index
=
config
.
elastic
.
index_name
)
rv
=
api
.
get
(
'/catalog/?after=test-id-3&modified_since=2020-01-07&format=nt'
)
url
=
'/catalog/?format=turtle'
if
after
:
url
+=
'&after='
+
after
if
modified_since
:
url
+=
'&modified_since='
+
modified_since
rv
=
api
.
get
(
url
)
assert
rv
.
status_code
==
200
assert
rv
.
status_code
==
200
clear_elastic
(
elastic_infra
)
clear_elastic
(
elastic_infra
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment