mapping.py 8.24 KB
Newer Older
1
2
3
4
#
# Copyright The NOMAD Authors.
#
# This file is part of NOMAD. See https://nomad-lab.eu for further info.
5
6
7
8
9
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
10
#     http://www.apache.org/licenses/LICENSE-2.0
11
12
#
# Unless required by applicable law or agreed to in writing, software
13
# distributed under the License is distributed on an "AS IS" BASIS,
14
15
16
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
17
#
18
19

from rdflib import Graph, Literal, RDF, URIRef, BNode
20
from rdflib.namespace import Namespace, DCAT, DCTERMS as DCT, FOAF, RDF
21
22
23
24
25
26
27
28
29

from nomad import config
from nomad.datamodel import User

from nomad.datamodel import EntryMetadata, User

from .api import url

VCARD = Namespace('http://www.w3.org/2006/vcard/ns#')
30
HYDRA = Namespace('http://www.w3.org/ns/hydra/core#')
31
32


33
34
35
36
37
38
39
def get_optional_entry_prop(entry, name):
    try:
        return entry[name]
    except KeyError:
        return 'unavailable'


40
41
42
class Mapping():
    def __init__(self):
        self.g = Graph()
43
44
45
46
47
48
        self.g.bind('rdf', RDF)
        self.g.bind('dcat', DCAT)
        self.g.bind('dct', DCT)
        self.g.bind('vcard', VCARD)
        self.g.bind('foaf', FOAF)
        self.g.bind('hydra', HYDRA)
49

50
        self.persons = {}
51

52
    def map_catalog(self, entries, after: str, modified_since, slim=True):
53
54
55
56
57
58
59
60
61
62
63
        def uri_ref(after):
            kwargs = dict()
            if after is not None:
                kwargs['after'] = after
            if modified_since is not None:
                kwargs['modified_since'] = modified_since.strftime('%Y-%m-%d')
            return URIRef(url('catalog', **kwargs))

        after = after.strip()

        catalog = uri_ref(after=None)
64
        self.g.add((catalog, RDF.type, DCAT.Catalog))
65
        last_entry = None
66
        for entry in entries:
67
            self.g.add((catalog, DCT.dataset, self.map_entry(entry, slim=slim)))
68
69
            last_entry = entry

70
        hydra_collection = uri_ref(after)
71
72
        self.g.add((hydra_collection, RDF.type, HYDRA.Collection))
        self.g.add((hydra_collection, HYDRA.totalItems, Literal(entries.total)))
73
        self.g.add((hydra_collection, HYDRA.first, uri_ref('')))
74
        if last_entry is not None:
75
            self.g.add((hydra_collection, HYDRA.next, uri_ref(last_entry.calc_id)))
76

77
        self.g.add((hydra_collection, RDF.type, HYDRA.collection))
78

79
80
81
        for person in self.persons.values():
            self.g.add((catalog, DCT.creator, person))

82
    def map_entry(self, entry: EntryMetadata, slim=False):
83
84
85
86
87
88
        dataset = URIRef(url('datasets', entry.calc_id))

        self.g.add((dataset, RDF.type, DCAT.Dataset))
        self.g.add((dataset, DCT.identifier, Literal(entry.calc_id)))
        self.g.add((dataset, DCT.issued, Literal(entry.upload_time)))
        self.g.add((dataset, DCT.modified, Literal(entry.last_processing)))
89
90
        self.g.add((dataset, DCT.title, Literal(get_optional_entry_prop(entry, 'formula'))))
        self.g.add((dataset, DCT.description, Literal(get_optional_entry_prop(entry, 'comment'))))
91
92
93
94
95
96
97

        if slim:
            return dataset

        self.g.add((dataset, DCAT.landing_page, URIRef('%s/entry/id/%s/%s' % (
            config.gui_url(), entry.upload_id, entry.calc_id))))

Maja-Olivia Lenz's avatar
Maja-Olivia Lenz committed
98
99
        self.g.add((dataset, DCT.license, URIRef('https://creativecommons.org/licenses/by/4.0/legalcode')))
        self.g.add((dataset, DCT.language, URIRef('http://id.loc.gov/vocabulary/iso639-1/en')))
100
101
102

        self.g.add((dataset, DCT.publisher, self.map_user(entry.uploader)))
        for author in entry.authors:
103
            self.g.add((dataset, DCT.creator, self.map_user(author)))
Maja-Olivia Lenz's avatar
Maja-Olivia Lenz committed
104
        self.g.add((dataset, DCAT.contactPoint, self.map_contact(entry.uploader)))
105

Maja-Olivia Lenz's avatar
Maja-Olivia Lenz committed
106
107
108
109
        self.g.add((dataset, DCAT.distribution, self.map_distribution(entry, 'api')))
        self.g.add((dataset, DCAT.distribution, self.map_distribution(entry, 'json')))
        self.g.add((dataset, DCAT.distribution, self.map_distribution(entry, 'raw')))

110
111
112
        return dataset

    def map_user(self, user: User):
113
114
115
        person = self.persons.get(user.user_id)
        if person is not None:
            return person
116
117

        user = User.get(user.user_id)
118
        person = BNode()
119

120
121
122
123
124
        self.g.add((person, RDF.type, FOAF.Person))
        self.g.add((person, FOAF.givenName, Literal(user.first_name)))
        self.g.add((person, FOAF.familyName, Literal(user.last_name)))
        self.g.add((person, FOAF.nick, Literal(user.username)))
        self.g.add((person, FOAF.mbox, URIRef('mailto:%s' % (user.email))))
125

126
127
128
        self.persons[user.user_id] = person

        return person
Maja-Olivia Lenz's avatar
Maja-Olivia Lenz committed
129

130
131
132
133
134
135
136
137
138
139
140
141
    def map_contact(self, user: User):
        person = self.persons.get(user.user_id)
        if person is None:
            person = self.map_user(user)

        user = User.get(user.user_id)
        self.g.add((person, RDF.type, VCARD.Individual))
        self.g.add((person, VCARD.givenName, Literal(user.first_name)))
        self.g.add((person, VCARD.familyName, Literal(user.last_name)))
        self.g.add((person, VCARD.nickName, Literal(user.username)))
        self.g.add((person, VCARD.hasEmail, Literal(user.email)))
        self.g.add((person, VCARD.organizationName, Literal('unavailable' if user.affiliation is None else user.affiliation)))
142
143
144
145
146
147
148
149
        # address = BNode()
        # self.g.add((address, RDF.type, VCARD.Address))
        # self.g.add((address, VCARD.street_address, )) # affiliation_address?
        # self.g.add((address, VCARD.postal_code, )) # affiliation_address?
        # self.g.add((address, VCARD.country_name, )) # affiliation_address?
        # self.g.add((address, VCARD.locality, )) # affiliation_address?
        # self.g.add((address, VCARD.region, )) # affiliation_address?
        # self.g.add((person, VCARD.hasAddress, address))
Maja-Olivia Lenz's avatar
Maja-Olivia Lenz committed
150

151
        return person
Maja-Olivia Lenz's avatar
Maja-Olivia Lenz committed
152
153
154
155
156
157

    def map_distribution(self, entry, dist_kind):
        if dist_kind == 'api':
            # DataService: API
            service = BNode()
            self.g.add((service, RDF.type, DCAT.DataService))
Maja-Olivia Lenz's avatar
Maja-Olivia Lenz committed
158
159
160
            self.g.add((service, DCT.title, Literal('NOMAD API')))  # How to include terms from swagger document here?
            self.g.add((service, DCT.description, Literal('Official NOMAD API')))  # same question
            self.g.add((service, DCAT.endpointURL, URIRef('https://nomad-lab.eu/prod/rae/api/')))  # config.api_url() ?
Maja-Olivia Lenz's avatar
Maja-Olivia Lenz committed
161
162
163
164
165
            # not sure if the following needs to be dataset specific:
            self.g.add((service, DCAT.endpointDescription, URIRef('https://nomad-lab.eu/prod/rae/api/swagger.json')))

            # Distribution over API
            dist = BNode()
166
            self.g.add((dist, DCT.title, Literal('unavailable' if entry.formula is None else entry.formula + '_api')))
Maja-Olivia Lenz's avatar
Maja-Olivia Lenz committed
167
168
169
170
171
172
            self.g.add((dist, RDF.type, DCAT.Distribution))
            self.g.add((dist, DCAT.accessService, service))
        elif dist_kind == 'json':
            # Distribution as JSON
            dist = BNode()
            self.g.add((dist, RDF.type, DCAT.Distribution))
173
            self.g.add((dist, DCT.title, Literal(get_optional_entry_prop(entry, 'formula') + '_json')))
Maja-Olivia Lenz's avatar
Maja-Olivia Lenz committed
174
175
176
177
178
179
180
181
182
183
            self.g.add((dist, DCAT.mediaType, URIRef('https://www.iana.org/assignments/media-types/application/json')))
            self.g.add((dist, DCAT.packageFormat, URIRef('https://www.iana.org/assignments/media-types/application/zip')))
            self.g.add((dist, DCAT.downloadURL, URIRef(
                'http://nomad-lab.eu/prod/rae/api/archive/download?upload_id=%s&calc_id=%s' % (entry.upload_id, entry.calc_id))))
            self.g.add((dist, DCAT.accessURL, URIRef('%s/entry/id/%s/%s' % (
                config.gui_url(), entry.upload_id, entry.calc_id))))
        elif dist_kind == 'raw':
            # Distribution of the raw data
            dist = BNode()
            self.g.add((dist, RDF.type, DCAT.Distribution))
184
            self.g.add((dist, DCT.title, Literal(get_optional_entry_prop(entry, 'formula') + '_raw')))
Maja-Olivia Lenz's avatar
Maja-Olivia Lenz committed
185
186
187
188
            self.g.add((dist, DCAT.accessURL, URIRef('https://nomad-lab.eu/prod/rae/api/raw/calc/%s/%s' % (
                entry.upload_id, entry.calc_id))))
            self.g.add((dist, DCAT.packageFormat, URIRef('https://www.iana.org/assignments/media-types/application/zip')))

Maja-Olivia Lenz's avatar
Maja-Olivia Lenz committed
189
        return dist