infrastructure.py 18.9 KB
Newer Older
Markus Scheidgen's avatar
Markus Scheidgen committed
1
2
3
4
#
# Copyright The NOMAD Authors.
#
# This file is part of NOMAD. See https://nomad-lab.eu for further info.
5
6
7
8
9
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
Markus Scheidgen's avatar
Markus Scheidgen committed
10
#     http://www.apache.org/licenses/LICENSE-2.0
11
12
#
# Unless required by applicable law or agreed to in writing, software
Markus Scheidgen's avatar
Markus Scheidgen committed
13
# distributed under the License is distributed on an "AS IS" BASIS,
14
15
16
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
Markus Scheidgen's avatar
Markus Scheidgen committed
17
#
18

19
'''
20
This module provides function to establish connections to the database, searchengine, etc.
Markus Scheidgen's avatar
Markus Scheidgen committed
21
22
infrastructure services. Usually everything is setup at once with :func:`setup`. This
is run once for each *api* and *worker* process. Individual functions for partial setups
23
exist to facilitate testing, aspects of :py:mod:`nomad.cli`, etc.
24
'''
25

26
import os.path
27
import os
28
import shutil
29
from elasticsearch.exceptions import RequestError
30
from elasticsearch_dsl import connections
31
from mongoengine import connect, disconnect
32
from mongoengine.connection import ConnectionFailure
33
34
import smtplib
from email.mime.text import MIMEText
35
from keycloak import KeycloakOpenID, KeycloakAdmin
36
from keycloak.exceptions import KeycloakAuthenticationError, KeycloakGetError
37
import json
38
import jwt
39
40
from flask import g, request
import basicauth
41
from datetime import datetime
Markus Scheidgen's avatar
Markus Scheidgen committed
42
43
import re
import unidecode
44

45
46
from nomad import config, utils

47
logger = utils.get_logger(__name__)
48
49

elastic_client = None
50
''' The elastic search client. '''
51

52
mongo_client = None
53
''' The pymongo mongodb client. '''
54

55

56
def setup():
57
    '''
58
    Uses the current configuration (nomad/config.py and environment) to setup all the
59
60
61
    infrastructure services (repository db, mongo, elastic search) and logging.
    Will create client instances for the databases and has to be called before they
    can be used.
62
    '''
63
    setup_files()
64
65
    setup_mongo()
    setup_elastic()
66
67


68
69
70
71
72
73
def setup_files():
    for directory in [config.fs.public, config.fs.staging, config.fs.tmp]:
        if not os.path.exists(directory):
            os.makedirs(directory)


74
def setup_mongo(client=False):
75
    ''' Creates connection to mongodb. '''
76
    global mongo_client
77
78
    try:
        mongo_client = connect(db=config.mongo.db_name, host=config.mongo.host, port=config.mongo.port)
79
    except ConnectionFailure:
80
81
82
        disconnect()
        mongo_client = connect(db=config.mongo.db_name, host=config.mongo.host, port=config.mongo.port)

83
    logger.info('setup mongo connection')
Markus Scheidgen's avatar
Markus Scheidgen committed
84
    return mongo_client
85
86


87
def setup_elastic(create_mappings=True):
88
    ''' Creates connection to elastic search. '''
89
90
91
    from nomad.search import entry_document, material_document
    from elasticsearch_dsl import Index

92
    global elastic_client
93
    elastic_client = connections.create_connection(
94
95
        hosts=['%s:%d' % (config.elastic.host, config.elastic.port)],
        timeout=60, max_retries=10, retry_on_timeout=True)
96
    logger.info('setup elastic connection')
97

98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
    # Setup materials index mapping. An alias is used to be able to reindex the
    # materials with zero downtime. First see to which index the alias points
    # to. If alias is not set, create it. Update the mapping in the index
    # pointed to by the alias.
    if create_mappings:
        try:
            if elastic_client.indices.exists_alias(config.elastic.materials_index_name):
                index_name = list(elastic_client.indices.get(config.elastic.materials_index_name).keys())[0]
                material_document.init(index_name)
            else:
                index_name = config.elastic.materials_index_name + "_a"
                material_document.init(index_name)
                index = Index(index_name)
                index.put_alias(name=config.elastic.materials_index_name)
        except RequestError as e:
            if e.status_code == 400 and 'resource_already_exists_exception' in e.error:
                # happens if two services try this at the same time
                pass
            else:
                raise e

        # Initialize calculation index mapping
        try:
            entry_document.init(index=config.elastic.index_name)
        except RequestError as e:
            if e.status_code == 400 and 'resource_already_exists_exception' in e.error:
                # happens if two services try this at the same time
                pass
            else:
                raise e
128

129
130
131
132
        entry_document._index._name = config.elastic.index_name
        material_document._index._name = config.elastic.materials_index_name
        logger.info('initialized elastic index for calculations', index_name=config.elastic.index_name)
        logger.info('initialized elastic index for materials', index_name=config.elastic.materials_index_name)
133

134
135
    return elastic_client

136

137
class Keycloak():
138
    '''
139
140
    A class that encapsulates all keycloak related functions for easier mocking and
    configuration
141
    '''
142
143
144
    def __init__(self):
        self.__oidc_client = None
        self.__admin_client = None
145
        self.__public_keys = None
146
147
148
149
150

    @property
    def _oidc_client(self):
        if self.__oidc_client is None:
            self.__oidc_client = KeycloakOpenID(
151
                server_url=config.keycloak.server_url,
152
153
                client_id=config.keycloak.client_id,
                realm_name=config.keycloak.realm_name,
154
                client_secret_key=config.keycloak.client_secret)
155
156
157

        return self.__oidc_client

158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
    @property
    def _public_keys(self):
        if self.__public_keys is None:
            try:
                jwks = self._oidc_client.certs()
                self.__public_keys = {}
                for jwk in jwks['keys']:
                    kid = jwk['kid']
                    self.__public_keys[kid] = jwt.algorithms.RSAAlgorithm.from_jwk(
                        json.dumps(jwk))
            except Exception as e:
                self.__public_keys = None
                raise e

        return self.__public_keys

    def authorize_flask(self, basic: bool = True) -> str:
175
        '''
176
177
178
179
180
181
182
183
        Authorizes the current flask request with keycloak. Uses either Bearer or Basic
        authentication, depending on available headers in the request. Bearer auth is
        basically offline (besides retrieving and caching keycloaks public key for signature
        validation). Basic auth causes authentication agains keycloak with each request.

        Will set ``g.user``, either with None or user data from the respective OIDC token.

        Returns: An error message or None
184
        '''
185
        g.oidc_access_token = None
186
        if 'Authorization' in request.headers and request.headers['Authorization'].startswith('Bearer '):
187
            g.oidc_access_token = request.headers['Authorization'].split(None, 1)[1].strip()
188
        elif 'Authorization' in request.headers and request.headers['Authorization'].startswith('Basic '):
189
            if not basic:
190
                return 'Basic authentication not allowed, use Bearer token instead'
191

192
            try:
193
194
                auth = request.headers['Authorization'].split(None, 1)[1].strip()
                username, password = basicauth.decode(auth)
195
                token_info = self._oidc_client.token(username=username, password=password)
196
                g.oidc_access_token = token_info['access_token']
197
198
            except KeycloakAuthenticationError:
                return 'Could not authenticate, wrong credentials'
199
            except Exception as e:
200
                logger.error('Could not authenticate Basic auth', exc_info=e)
201
                return 'Could not authenticate Basic auth: %s' % str(e)
202

203
204
205
206
        if g.oidc_access_token is not None:
            auth_error: str = None
            try:
                kid = jwt.get_unverified_header(g.oidc_access_token)['kid']
207
208
209
210
211
212
213
214
                key = self._public_keys.get(kid)
                if key is None:
                    logger.error('The user provided keycloak public key does not exist. Does the UI use the right realm?')
                    auth_error = 'Could not verify JWT token: public key does not exist'
                else:
                    options = dict(verify_aud=False, verify_exp=True, verify_iss=True)
                    payload = jwt.decode(
                        g.oidc_access_token, key=key, algorithms=['RS256'], options=options,
215
                        issuer='%s/realms/%s' % (config.keycloak.server_url.rstrip('/'), config.keycloak.realm_name))
216
217
218
219
220
221

            except jwt.InvalidTokenError as e:
                auth_error = str(e)
            except Exception as e:
                logger.error('Could not verify JWT token', exc_info=e)
                raise e
222

223
224
225
            if auth_error is not None:
                g.user = None
                return auth_error
226

227
            else:
228
229
230
231
232
                from nomad import datamodel
                g.user = datamodel.User(
                    user_id=payload.get('sub', None),
                    email=payload.get('email', None),
                    first_name=payload.get('given_name', None),
233
                    last_name=payload.get('family_name', None))
234
235

                return None
236

237
        else:
238
239
            g.user = None
            # Do not return an error. This is the case were there are no credentials
240
            return None
241

Markus Scheidgen's avatar
Markus Scheidgen committed
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
    def __create_username(self, user):
        if user.first_name is not None and user.last_name is not None:
            user.username = '%s%s' % (user.first_name[:1], user.last_name)
        elif user.last_name is not None:
            user.username = user.last_name
        elif '@' in user.username:
            user.username = user.username.split('@')[0]

        user.username = unidecode.unidecode(user.username.lower())
        user.username = re.sub(r'[^0-9a-zA-Z_\-\.]+', '', user.username)

        index = 1
        try:
            while self.get_user(username=user.username):
                user.username += '%d' % index
                index += 1
        except KeyError:
            pass

261
    def add_user(self, user, bcrypt_password=None, invite=False):
262
        '''
263
264
        Adds the given :class:`nomad.datamodel.User` instance to the configured keycloak
        realm using the keycloak admin API.
265
        '''
266
267
268
269
        from nomad import datamodel
        if not isinstance(user, datamodel.User):
            if 'user_id' not in user:
                user['user_id'] = 'not set'
Markus Scheidgen's avatar
Markus Scheidgen committed
270

271
272
273
274
275
276
277
            if 'password' in user:
                bcrypt_password = user.pop('password')

            created = user.get('created', None)
            if created is not None and not isinstance(created, datetime):
                user['created'] = datetime.fromtimestamp(created / 1000)

278
279
            user = datamodel.User(**user)

Markus Scheidgen's avatar
Markus Scheidgen committed
280
281
282
        if user.username is None or not re.match(r'^[a-zA-Z0-9_\-\.]+$', user.username):
            self.__create_username(user)

283
284
285
        keycloak_user = dict(
            id=user.user_id if user.user_id != 'not set' else None,
            email=user.email,
Markus Scheidgen's avatar
Markus Scheidgen committed
286
            username=user.username,
287
288
289
            firstName=user.first_name,
            lastName=user.last_name,
            attributes=dict(
Markus Scheidgen's avatar
Markus Scheidgen committed
290
                repo_user_id=user.repo_user_id,
291
292
293
294
295
296
                affiliation=user.affiliation if user.affiliation is not None else '',
                affiliation_address=user.affiliation_address if user.affiliation_address is not None else ''),
            createdTimestamp=user.created.timestamp() * 1000 if user.created is not None else None,
            enabled=True,
            emailVerified=True)

297
298
299
300
        if invite:
            keycloak_user['requiredActions'] = [
                'UPDATE_PASSWORD', 'UPDATE_PROFILE', 'VERIFY_EMAIL']

301
302
303
304
305
306
307
308
309
310
311
312
313
        if bcrypt_password is not None:
            keycloak_user['credentials'] = [dict(
                type='password',
                hashedSaltedValue=bcrypt_password,
                algorithm='bcrypt')]

        keycloak_user = {
            key: value for key, value in keycloak_user.items()
            if value is not None}

        if user.user_id != 'not_set':
            try:
                self._admin_client.get_user(user.user_id)
Markus Scheidgen's avatar
Markus Scheidgen committed
314
                return 'User %s with given id already exists' % user.email
315
316
317
318
            except KeycloakGetError:
                pass

        if self._admin_client.get_user_id(user.email) is not None:
Markus Scheidgen's avatar
Markus Scheidgen committed
319
            return 'User with email %s already exists' % user.email
320

321
322
323
324
325
        try:
            self._admin_client.create_user(keycloak_user)
        except Exception as e:
            return str(e)

326
        if invite:
Markus Scheidgen's avatar
Markus Scheidgen committed
327
328
329
330
331
            try:
                user = self.get_user(username=user.username)
                self._admin_client.send_verify_email(user_id=user.user_id)
            except Exception as e:
                logger.error('could not send verify email', exc_info=e)
332

333
334
        return None

335
336
337
338
339
340
    def __user_from_keycloak_user(self, keycloak_user):
        from nomad import datamodel

        kwargs = {key: value[0] for key, value in keycloak_user.get('attributes', {}).items()}
        return datamodel.User(
            user_id=keycloak_user['id'],
341
342
343
344
            email=keycloak_user.get('email'),
            username=keycloak_user.get('username'),
            first_name=keycloak_user.get('firstName'),
            last_name=keycloak_user.get('lastName'),
345
346
347
            created=datetime.fromtimestamp(keycloak_user['createdTimestamp'] / 1000),
            **kwargs)

348
    def search_user(self, query: str = None, max=1000, **kwargs):
349
        if query is not None:
350
            kwargs['query'] = dict(search=query, max=max)
351
        else:
352
            kwargs['query'] = dict(max=max)
353
        try:
354
            keycloak_results = self._admin_client.get_users(**kwargs)
355
356
357
358
359
360
361
362
        except Exception as e:
            logger.error('Could not retrieve users from keycloak', exc_info=e)
            raise e

        return [
            self.__user_from_keycloak_user(keycloak_user)
            for keycloak_user in keycloak_results]

363
    def get_user(self, user_id: str = None, username: str = None, user=None) -> object:
364
        '''
365
366
367
368
        Retrives all available information about a user from the keycloak admin
        interface. This must be used to retrieve complete user information, because
        the info solely gathered from tokens (i.e. for the authenticated user ``g.user``)
        is generally incomplete.
369
        '''
370

371
372
373
        if user is not None and user_id is None:
            user_id = user.user_id

374
        if username is not None and user_id is None:
Markus Scheidgen's avatar
Markus Scheidgen committed
375
            with utils.lnr(logger, 'Could not use keycloak admin client'):
376
                user_id = self._admin_client.get_user_id(username)
Markus Scheidgen's avatar
Markus Scheidgen committed
377
378

            if user_id is None:
379
                raise KeyError('User with username %s does not exist' % username)
380

381
        assert user_id is not None, 'Could not determine user from given kwargs'
382
383

        try:
384
            keycloak_user = self._admin_client.get_user(user_id)
385

386
387
388
389
390
391
392
        except Exception as e:
            if str(getattr(e, 'response_code', 404)) == '404':
                raise KeyError('User does not exist')

            logger.error('Could not retrieve user from keycloak', exc_info=e)
            raise e

393
        return self.__user_from_keycloak_user(keycloak_user)
394

395
396
    @property
    def _admin_client(self):
397
        if True:  # TODO (self.__admin_client is None:), client becomes unusable after 60s
398
399
400
401
            self.__admin_client = KeycloakAdmin(
                server_url=config.keycloak.server_url,
                username=config.keycloak.username,
                password=config.keycloak.password,
402
                realm_name=config.keycloak.realm_name,
403
404
405
406
407
                verify=True)
            self.__admin_client.realm_name = config.keycloak.realm_name

        return self.__admin_client

408
409
410
411
    @property
    def access_token(self):
        return getattr(g, 'oidc_access_token', None)

412
413
414
415

keycloak = Keycloak()


416
def reset(remove: bool):
417
    '''
418
    Resets the databases mongo, elastic/calcs, and all files. Be careful.
419
420
421
    In contrast to :func:`remove`, it will only remove the contents of dbs and indicies.
    This function just attempts to remove everything, there is no exception handling
    or any warranty it will succeed.
422
423
424

    Args:
        remove: Do not try to recreate empty databases, remove entirely.
425
    '''
426
427
428
429
    try:
        if not mongo_client:
            setup_mongo()
        mongo_client.drop_database(config.mongo.db_name)
430
        logger.info('mongodb resetted')
431
432
    except Exception as e:
        logger.error('exception reset mongodb', exc_info=e)
433

434
435
436
    try:
        if not elastic_client:
            setup_elastic()
437
        elastic_client.indices.delete(index=config.elastic.index_name)
438
439
440
        material_index_name = list(elastic_client.indices.get(config.elastic.materials_index_name).keys())[0]
        elastic_client.indices.delete(index=material_index_name)
        from nomad.search import entry_document, material_document
441
        if not remove:
442
            entry_document.init(index=config.elastic.index_name)
443
            material_document.init(index=material_index_name)
444
        logger.info('elastic index resetted')
445
446
    except Exception as e:
        logger.error('exception resetting elastic', exc_info=e)
447

448
    try:
449
450
        shutil.rmtree(config.fs.staging, ignore_errors=True)
        shutil.rmtree(config.fs.public, ignore_errors=True)
451

452
        # delete tmp without the folder
453
454
455
456
457
458
459
460
461
        if os.path.isdir(config.fs.tmp):
            for sub_path in os.listdir(config.fs.tmp):
                path = os.path.join(config.fs.tmp, sub_path)
                try:
                    if os.path.isfile(path):
                        os.unlink(path)
                    elif os.path.isdir(path): shutil.rmtree(path, ignore_errors=True)
                except Exception:
                    pass
462

463
        logger.info('files resetted')
464
465
466
467
    except Exception as e:
        logger.error('exception deleting files', exc_info=e)


468
def send_mail(name: str, email: str, message: str, subject: str):
469
470
471
472
473
474
475
476
    """Used to programmatically send mails.

    Args:
        name: The email recipient name.
        email: The email recipient address.
        messsage: The email body.
        subject: The subject line.
    """
477
    if not config.mail.enabled:
478
479
480
481
482
483
484
485
486
        return

    logger = utils.get_logger(__name__)
    server = smtplib.SMTP(config.mail.host, config.mail.port)

    if config.mail.port == 995:
        try:
            server.starttls()
        except Exception as e:
487
            logger.warning('Could not use TTS', exc_info=e)
488

489
    if config.mail.with_login:
490
        try:
491
            server.login(config.mail.user, config.mail.password)
492
493
494
495
496
497
        except Exception as e:
            logger.warning('Could not log into mail server', exc_info=e)

    msg = MIMEText(message)
    msg['Subject'] = subject
    msg['To'] = name
498
    msg['From'] = config.mail.from_address
499
500
501
    to_addrs = [email]

    if config.mail.cc_address is not None:
502
        msg['Cc'] = 'The nomad team <%s>' % config.mail.cc_address
503
        to_addrs.append(config.mail.cc_address)
504
505

    try:
506
        server.send_message(msg, from_addr=config.mail.from_address, to_addrs=to_addrs)
507
    except Exception as e:
508
        logger.error('Could not send email', exc_info=e)
509
510

    server.quit()