mapping.py 8.24 KB
Newer Older
1
2
3
4
#
# Copyright The NOMAD Authors.
#
# This file is part of NOMAD. See https://nomad-lab.eu for further info.
5
6
7
8
9
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
10
#     http://www.apache.org/licenses/LICENSE-2.0
11
12
#
# Unless required by applicable law or agreed to in writing, software
13
# distributed under the License is distributed on an "AS IS" BASIS,
14
15
16
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
17
#
18
19

from rdflib import Graph, Literal, RDF, URIRef, BNode
20
from rdflib.namespace import Namespace, DCAT, DCTERMS as DCT, FOAF, RDF
21
22
23
24
25
26
27
28
29

from nomad import config
from nomad.datamodel import User

from nomad.datamodel import EntryMetadata, User

from .api import url

VCARD = Namespace('http://www.w3.org/2006/vcard/ns#')
30
HYDRA = Namespace('http://www.w3.org/ns/hydra/core#')
31
32


33
34
35
36
37
38
39
def get_optional_entry_prop(entry, name):
    try:
        return entry[name]
    except KeyError:
        return 'unavailable'


40
41
42
class Mapping():
    def __init__(self):
        self.g = Graph()
43
        self.g.namespace_manager.bind('rdf', RDF)
44
45
46
        self.g.namespace_manager.bind('dcat', DCAT)
        self.g.namespace_manager.bind('dct', DCT)
        self.g.namespace_manager.bind('vcard', VCARD)
47
        self.g.namespace_manager.bind('foaf', FOAF)
48
        self.g.namespace_manager.bind('hydra', HYDRA)
49

50
        self.persons = {}
51

52
53
54
55
56
57
58
59
60
61
62
63
    def map_catalog(self, entries, after: str, modified_since):
        def uri_ref(after):
            kwargs = dict()
            if after is not None:
                kwargs['after'] = after
            if modified_since is not None:
                kwargs['modified_since'] = modified_since.strftime('%Y-%m-%d')
            return URIRef(url('catalog', **kwargs))

        after = after.strip()

        catalog = uri_ref(after=None)
64
        self.g.add((catalog, RDF.type, DCAT.Catalog))
65
        last_entry = None
66
        for entry in entries:
67
68
69
            self.g.add((catalog, DCT.dataset, self.map_entry(entry, slim=True)))
            last_entry = entry

70
        hydra_collection = uri_ref(after)
71
72
        self.g.add((hydra_collection, RDF.type, HYDRA.Collection))
        self.g.add((hydra_collection, HYDRA.totalItems, Literal(entries.total)))
73
        self.g.add((hydra_collection, HYDRA.first, uri_ref('')))
74
        if last_entry is not None:
75
            self.g.add((hydra_collection, HYDRA.next, uri_ref(last_entry.calc_id)))
76

77
        self.g.add((hydra_collection, RDF.type, HYDRA.collection))
78
79

    def map_entry(self, entry: EntryMetadata, slim=False):
80
81
82
83
84
85
        dataset = URIRef(url('datasets', entry.calc_id))

        self.g.add((dataset, RDF.type, DCAT.Dataset))
        self.g.add((dataset, DCT.identifier, Literal(entry.calc_id)))
        self.g.add((dataset, DCT.issued, Literal(entry.upload_time)))
        self.g.add((dataset, DCT.modified, Literal(entry.last_processing)))
86
87
        self.g.add((dataset, DCT.title, Literal(get_optional_entry_prop(entry, 'formula'))))
        self.g.add((dataset, DCT.description, Literal(get_optional_entry_prop(entry, 'comment'))))
88
89
90
91
92
93
94

        if slim:
            return dataset

        self.g.add((dataset, DCAT.landing_page, URIRef('%s/entry/id/%s/%s' % (
            config.gui_url(), entry.upload_id, entry.calc_id))))

Maja-Olivia Lenz's avatar
Maja-Olivia Lenz committed
95
96
        self.g.add((dataset, DCT.license, URIRef('https://creativecommons.org/licenses/by/4.0/legalcode')))
        self.g.add((dataset, DCT.language, URIRef('http://id.loc.gov/vocabulary/iso639-1/en')))
97
98
99

        self.g.add((dataset, DCT.publisher, self.map_user(entry.uploader)))
        for author in entry.authors:
100
            self.g.add((dataset, DCT.creator, self.map_user(author)))
Maja-Olivia Lenz's avatar
Maja-Olivia Lenz committed
101
        self.g.add((dataset, DCAT.contactPoint, self.map_contact(entry.uploader)))
102

Maja-Olivia Lenz's avatar
Maja-Olivia Lenz committed
103
104
105
106
        self.g.add((dataset, DCAT.distribution, self.map_distribution(entry, 'api')))
        self.g.add((dataset, DCAT.distribution, self.map_distribution(entry, 'json')))
        self.g.add((dataset, DCAT.distribution, self.map_distribution(entry, 'raw')))

107
108
109
        return dataset

    def map_user(self, user: User):
110
111
112
        person = self.persons.get(user.user_id)
        if person is not None:
            return person
113
114

        user = User.get(user.user_id)
115
        person = BNode()
116

117
118
119
120
121
        self.g.add((person, RDF.type, FOAF.Person))
        self.g.add((person, FOAF.givenName, Literal(user.first_name)))
        self.g.add((person, FOAF.familyName, Literal(user.last_name)))
        self.g.add((person, FOAF.nick, Literal(user.username)))
        self.g.add((person, FOAF.mbox, URIRef('mailto:%s' % (user.email))))
122

123
124
125
        self.persons[user.user_id] = person

        return person
Maja-Olivia Lenz's avatar
Maja-Olivia Lenz committed
126

127
128
129
130
131
132
133
134
135
136
137
138
    def map_contact(self, user: User):
        person = self.persons.get(user.user_id)
        if person is None:
            person = self.map_user(user)

        user = User.get(user.user_id)
        self.g.add((person, RDF.type, VCARD.Individual))
        self.g.add((person, VCARD.givenName, Literal(user.first_name)))
        self.g.add((person, VCARD.familyName, Literal(user.last_name)))
        self.g.add((person, VCARD.nickName, Literal(user.username)))
        self.g.add((person, VCARD.hasEmail, Literal(user.email)))
        self.g.add((person, VCARD.organizationName, Literal('unavailable' if user.affiliation is None else user.affiliation)))
139
140
141
142
143
144
145
146
        # address = BNode()
        # self.g.add((address, RDF.type, VCARD.Address))
        # self.g.add((address, VCARD.street_address, )) # affiliation_address?
        # self.g.add((address, VCARD.postal_code, )) # affiliation_address?
        # self.g.add((address, VCARD.country_name, )) # affiliation_address?
        # self.g.add((address, VCARD.locality, )) # affiliation_address?
        # self.g.add((address, VCARD.region, )) # affiliation_address?
        # self.g.add((person, VCARD.hasAddress, address))
Maja-Olivia Lenz's avatar
Maja-Olivia Lenz committed
147

148
        return person
Maja-Olivia Lenz's avatar
Maja-Olivia Lenz committed
149
150
151
152
153
154

    def map_distribution(self, entry, dist_kind):
        if dist_kind == 'api':
            # DataService: API
            service = BNode()
            self.g.add((service, RDF.type, DCAT.DataService))
Maja-Olivia Lenz's avatar
Maja-Olivia Lenz committed
155
156
157
            self.g.add((service, DCT.title, Literal('NOMAD API')))  # How to include terms from swagger document here?
            self.g.add((service, DCT.description, Literal('Official NOMAD API')))  # same question
            self.g.add((service, DCAT.endpointURL, URIRef('https://nomad-lab.eu/prod/rae/api/')))  # config.api_url() ?
Maja-Olivia Lenz's avatar
Maja-Olivia Lenz committed
158
159
160
161
162
            # not sure if the following needs to be dataset specific:
            self.g.add((service, DCAT.endpointDescription, URIRef('https://nomad-lab.eu/prod/rae/api/swagger.json')))

            # Distribution over API
            dist = BNode()
163
            self.g.add((dist, DCT.title, Literal('unavailable' if entry.formula is None else entry.formula + '_api')))
Maja-Olivia Lenz's avatar
Maja-Olivia Lenz committed
164
165
166
167
168
169
            self.g.add((dist, RDF.type, DCAT.Distribution))
            self.g.add((dist, DCAT.accessService, service))
        elif dist_kind == 'json':
            # Distribution as JSON
            dist = BNode()
            self.g.add((dist, RDF.type, DCAT.Distribution))
170
            self.g.add((dist, DCT.title, Literal(get_optional_entry_prop(entry, 'formula') + '_json')))
Maja-Olivia Lenz's avatar
Maja-Olivia Lenz committed
171
172
173
174
175
176
177
178
179
180
            self.g.add((dist, DCAT.mediaType, URIRef('https://www.iana.org/assignments/media-types/application/json')))
            self.g.add((dist, DCAT.packageFormat, URIRef('https://www.iana.org/assignments/media-types/application/zip')))
            self.g.add((dist, DCAT.downloadURL, URIRef(
                'http://nomad-lab.eu/prod/rae/api/archive/download?upload_id=%s&calc_id=%s' % (entry.upload_id, entry.calc_id))))
            self.g.add((dist, DCAT.accessURL, URIRef('%s/entry/id/%s/%s' % (
                config.gui_url(), entry.upload_id, entry.calc_id))))
        elif dist_kind == 'raw':
            # Distribution of the raw data
            dist = BNode()
            self.g.add((dist, RDF.type, DCAT.Distribution))
181
            self.g.add((dist, DCT.title, Literal(get_optional_entry_prop(entry, 'formula') + '_raw')))
Maja-Olivia Lenz's avatar
Maja-Olivia Lenz committed
182
183
184
185
            self.g.add((dist, DCAT.accessURL, URIRef('https://nomad-lab.eu/prod/rae/api/raw/calc/%s/%s' % (
                entry.upload_id, entry.calc_id))))
            self.g.add((dist, DCAT.packageFormat, URIRef('https://www.iana.org/assignments/media-types/application/zip')))

Maja-Olivia Lenz's avatar
Maja-Olivia Lenz committed
186
        return dist