infrastructure.py 17.1 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
# Copyright 2018 Markus Scheidgen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15
'''
16
This module provides function to establish connections to the database, searchengine, etc.
Markus Scheidgen's avatar
Markus Scheidgen committed
17
18
infrastructure services. Usually everything is setup at once with :func:`setup`. This
is run once for each *api* and *worker* process. Individual functions for partial setups
19
exist to facilitate testing, aspects of :py:mod:`nomad.cli`, etc.
20
'''
21

22
import os.path
23
import os
24
import shutil
25
from elasticsearch.exceptions import RequestError
26
from elasticsearch_dsl import connections
27
from mongoengine import connect, disconnect
28
from mongoengine.connection import ConnectionFailure
29
30
import smtplib
from email.mime.text import MIMEText
31
from keycloak import KeycloakOpenID, KeycloakAdmin
32
from keycloak.exceptions import KeycloakAuthenticationError, KeycloakGetError
33
import json
34
import jwt
35
36
from flask import g, request
import basicauth
37
from datetime import datetime
Markus Scheidgen's avatar
Markus Scheidgen committed
38
39
import re
import unidecode
40

41
42
from nomad import config, utils

43
logger = utils.get_logger(__name__)
44
45

elastic_client = None
46
''' The elastic search client. '''
47

48
mongo_client = None
49
''' The pymongo mongodb client. '''
50

51

52
def setup():
53
    '''
54
    Uses the current configuration (nomad/config.py and environment) to setup all the
55
56
57
    infrastructure services (repository db, mongo, elastic search) and logging.
    Will create client instances for the databases and has to be called before they
    can be used.
58
    '''
59
    setup_files()
60
61
    setup_mongo()
    setup_elastic()
62
63


64
65
66
67
68
69
def setup_files():
    for directory in [config.fs.public, config.fs.staging, config.fs.tmp]:
        if not os.path.exists(directory):
            os.makedirs(directory)


70
def setup_mongo(client=False):
71
    ''' Creates connection to mongodb. '''
72
    global mongo_client
73
74
    try:
        mongo_client = connect(db=config.mongo.db_name, host=config.mongo.host, port=config.mongo.port)
75
    except ConnectionFailure:
76
77
78
        disconnect()
        mongo_client = connect(db=config.mongo.db_name, host=config.mongo.host, port=config.mongo.port)

79
    logger.info('setup mongo connection')
Markus Scheidgen's avatar
Markus Scheidgen committed
80
    return mongo_client
81
82
83


def setup_elastic():
84
    ''' Creates connection to elastic search. '''
85
    global elastic_client
86
    elastic_client = connections.create_connection(
87
88
        hosts=['%s:%d' % (config.elastic.host, config.elastic.port)],
        timeout=60, max_retries=10, retry_on_timeout=True)
89
    logger.info('setup elastic connection')
90
91

    try:
92
93
        from nomad.search import entry_document
        entry_document.init(index=config.elastic.index_name)
94
95
    except RequestError as e:
        if e.status_code == 400 and 'resource_already_exists_exception' in e.error:
96
97
            # happens if two services try this at the same time
            pass
98
99
        else:
            raise e
100

101
102
103
    entry_document._index._name = config.elastic.index_name
    logger.info('initialized elastic index', index_name=config.elastic.index_name)

104
105
    return elastic_client

106

107
class Keycloak():
108
    '''
109
110
    A class that encapsulates all keycloak related functions for easier mocking and
    configuration
111
    '''
112
113
114
    def __init__(self):
        self.__oidc_client = None
        self.__admin_client = None
115
        self.__public_keys = None
116
117
118
119
120

    @property
    def _oidc_client(self):
        if self.__oidc_client is None:
            self.__oidc_client = KeycloakOpenID(
121
                server_url=config.keycloak.server_url,
122
123
                client_id=config.keycloak.client_id,
                realm_name=config.keycloak.realm_name,
124
                client_secret_key=config.keycloak.client_secret)
125
126
127

        return self.__oidc_client

128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
    @property
    def _public_keys(self):
        if self.__public_keys is None:
            try:
                jwks = self._oidc_client.certs()
                self.__public_keys = {}
                for jwk in jwks['keys']:
                    kid = jwk['kid']
                    self.__public_keys[kid] = jwt.algorithms.RSAAlgorithm.from_jwk(
                        json.dumps(jwk))
            except Exception as e:
                self.__public_keys = None
                raise e

        return self.__public_keys

    def authorize_flask(self, basic: bool = True) -> str:
145
        '''
146
147
148
149
150
151
152
153
        Authorizes the current flask request with keycloak. Uses either Bearer or Basic
        authentication, depending on available headers in the request. Bearer auth is
        basically offline (besides retrieving and caching keycloaks public key for signature
        validation). Basic auth causes authentication agains keycloak with each request.

        Will set ``g.user``, either with None or user data from the respective OIDC token.

        Returns: An error message or None
154
        '''
155
        g.oidc_access_token = None
156
        if 'Authorization' in request.headers and request.headers['Authorization'].startswith('Bearer '):
157
            g.oidc_access_token = request.headers['Authorization'].split(None, 1)[1].strip()
158
        elif 'Authorization' in request.headers and request.headers['Authorization'].startswith('Basic '):
159
            if not basic:
160
                return 'Basic authentication not allowed, use Bearer token instead'
161

162
            try:
163
164
                auth = request.headers['Authorization'].split(None, 1)[1].strip()
                username, password = basicauth.decode(auth)
165
                token_info = self._oidc_client.token(username=username, password=password)
166
                g.oidc_access_token = token_info['access_token']
167
168
            except KeycloakAuthenticationError:
                return 'Could not authenticate, wrong credentials'
169
            except Exception as e:
170
                logger.error('Could not authenticate Basic auth', exc_info=e)
171
                return 'Could not authenticate Basic auth: %s' % str(e)
172

173
174
175
176
        if g.oidc_access_token is not None:
            auth_error: str = None
            try:
                kid = jwt.get_unverified_header(g.oidc_access_token)['kid']
177
178
179
180
181
182
183
184
                key = self._public_keys.get(kid)
                if key is None:
                    logger.error('The user provided keycloak public key does not exist. Does the UI use the right realm?')
                    auth_error = 'Could not verify JWT token: public key does not exist'
                else:
                    options = dict(verify_aud=False, verify_exp=True, verify_iss=True)
                    payload = jwt.decode(
                        g.oidc_access_token, key=key, algorithms=['RS256'], options=options,
185
                        issuer='%s/realms/%s' % (config.keycloak.server_url.rstrip('/'), config.keycloak.realm_name))
186
187
188
189
190
191

            except jwt.InvalidTokenError as e:
                auth_error = str(e)
            except Exception as e:
                logger.error('Could not verify JWT token', exc_info=e)
                raise e
192

193
194
195
            if auth_error is not None:
                g.user = None
                return auth_error
196

197
            else:
198
199
200
201
202
                from nomad import datamodel
                g.user = datamodel.User(
                    user_id=payload.get('sub', None),
                    email=payload.get('email', None),
                    first_name=payload.get('given_name', None),
203
                    last_name=payload.get('family_name', None))
204
205

                return None
206

207
        else:
208
209
            g.user = None
            # Do not return an error. This is the case were there are no credentials
210
            return None
211

Markus Scheidgen's avatar
Markus Scheidgen committed
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
    def __create_username(self, user):
        if user.first_name is not None and user.last_name is not None:
            user.username = '%s%s' % (user.first_name[:1], user.last_name)
        elif user.last_name is not None:
            user.username = user.last_name
        elif '@' in user.username:
            user.username = user.username.split('@')[0]

        user.username = unidecode.unidecode(user.username.lower())
        user.username = re.sub(r'[^0-9a-zA-Z_\-\.]+', '', user.username)

        index = 1
        try:
            while self.get_user(username=user.username):
                user.username += '%d' % index
                index += 1
        except KeyError:
            pass

231
    def add_user(self, user, bcrypt_password=None, invite=False):
232
        '''
233
234
        Adds the given :class:`nomad.datamodel.User` instance to the configured keycloak
        realm using the keycloak admin API.
235
        '''
236
237
238
239
        from nomad import datamodel
        if not isinstance(user, datamodel.User):
            if 'user_id' not in user:
                user['user_id'] = 'not set'
Markus Scheidgen's avatar
Markus Scheidgen committed
240

241
242
243
244
245
246
247
            if 'password' in user:
                bcrypt_password = user.pop('password')

            created = user.get('created', None)
            if created is not None and not isinstance(created, datetime):
                user['created'] = datetime.fromtimestamp(created / 1000)

248
249
            user = datamodel.User(**user)

Markus Scheidgen's avatar
Markus Scheidgen committed
250
251
252
        if user.username is None or not re.match(r'^[a-zA-Z0-9_\-\.]+$', user.username):
            self.__create_username(user)

253
254
255
        keycloak_user = dict(
            id=user.user_id if user.user_id != 'not set' else None,
            email=user.email,
Markus Scheidgen's avatar
Markus Scheidgen committed
256
            username=user.username,
257
258
259
            firstName=user.first_name,
            lastName=user.last_name,
            attributes=dict(
Markus Scheidgen's avatar
Markus Scheidgen committed
260
                repo_user_id=user.repo_user_id,
261
262
263
264
265
266
                affiliation=user.affiliation if user.affiliation is not None else '',
                affiliation_address=user.affiliation_address if user.affiliation_address is not None else ''),
            createdTimestamp=user.created.timestamp() * 1000 if user.created is not None else None,
            enabled=True,
            emailVerified=True)

267
268
269
270
        if invite:
            keycloak_user['requiredActions'] = [
                'UPDATE_PASSWORD', 'UPDATE_PROFILE', 'VERIFY_EMAIL']

271
272
273
274
275
276
277
278
279
280
281
282
283
        if bcrypt_password is not None:
            keycloak_user['credentials'] = [dict(
                type='password',
                hashedSaltedValue=bcrypt_password,
                algorithm='bcrypt')]

        keycloak_user = {
            key: value for key, value in keycloak_user.items()
            if value is not None}

        if user.user_id != 'not_set':
            try:
                self._admin_client.get_user(user.user_id)
Markus Scheidgen's avatar
Markus Scheidgen committed
284
                return 'User %s with given id already exists' % user.email
285
286
287
288
            except KeycloakGetError:
                pass

        if self._admin_client.get_user_id(user.email) is not None:
Markus Scheidgen's avatar
Markus Scheidgen committed
289
            return 'User with email %s already exists' % user.email
290

291
292
293
294
295
        try:
            self._admin_client.create_user(keycloak_user)
        except Exception as e:
            return str(e)

296
        if invite:
Markus Scheidgen's avatar
Markus Scheidgen committed
297
298
299
300
301
            try:
                user = self.get_user(username=user.username)
                self._admin_client.send_verify_email(user_id=user.user_id)
            except Exception as e:
                logger.error('could not send verify email', exc_info=e)
302

303
304
        return None

305
306
307
308
309
310
    def __user_from_keycloak_user(self, keycloak_user):
        from nomad import datamodel

        kwargs = {key: value[0] for key, value in keycloak_user.get('attributes', {}).items()}
        return datamodel.User(
            user_id=keycloak_user['id'],
311
312
313
314
            email=keycloak_user.get('email'),
            username=keycloak_user.get('username'),
            first_name=keycloak_user.get('firstName'),
            last_name=keycloak_user.get('lastName'),
315
316
317
            created=datetime.fromtimestamp(keycloak_user['createdTimestamp'] / 1000),
            **kwargs)

318
    def search_user(self, query: str = None, max=1000, **kwargs):
319
        if query is not None:
320
            kwargs['query'] = dict(search=query, max=max)
321
        else:
322
            kwargs['query'] = dict(max=max)
323
        try:
324
            keycloak_results = self._admin_client.get_users(**kwargs)
325
326
327
328
329
330
331
332
        except Exception as e:
            logger.error('Could not retrieve users from keycloak', exc_info=e)
            raise e

        return [
            self.__user_from_keycloak_user(keycloak_user)
            for keycloak_user in keycloak_results]

333
    def get_user(self, user_id: str = None, username: str = None, user=None) -> object:
334
        '''
335
336
337
338
        Retrives all available information about a user from the keycloak admin
        interface. This must be used to retrieve complete user information, because
        the info solely gathered from tokens (i.e. for the authenticated user ``g.user``)
        is generally incomplete.
339
        '''
340

341
342
343
        if user is not None and user_id is None:
            user_id = user.user_id

344
        if username is not None and user_id is None:
Markus Scheidgen's avatar
Markus Scheidgen committed
345
            with utils.lnr(logger, 'Could not use keycloak admin client'):
346
                user_id = self._admin_client.get_user_id(username)
Markus Scheidgen's avatar
Markus Scheidgen committed
347
348

            if user_id is None:
349
                raise KeyError('User with username %s does not exist' % username)
350

351
        assert user_id is not None, 'Could not determine user from given kwargs'
352
353

        try:
354
            keycloak_user = self._admin_client.get_user(user_id)
355

356
357
358
359
360
361
362
        except Exception as e:
            if str(getattr(e, 'response_code', 404)) == '404':
                raise KeyError('User does not exist')

            logger.error('Could not retrieve user from keycloak', exc_info=e)
            raise e

363
        return self.__user_from_keycloak_user(keycloak_user)
364

365
366
    @property
    def _admin_client(self):
367
        if True:  # TODO (self.__admin_client is None:), client becomes unusable after 60s
368
369
370
371
            self.__admin_client = KeycloakAdmin(
                server_url=config.keycloak.server_url,
                username=config.keycloak.username,
                password=config.keycloak.password,
372
                realm_name=config.keycloak.realm_name,
373
374
375
376
377
                verify=True)
            self.__admin_client.realm_name = config.keycloak.realm_name

        return self.__admin_client

378
379
380
381
    @property
    def access_token(self):
        return getattr(g, 'oidc_access_token', None)

382
383
384
385

keycloak = Keycloak()


386
def reset(remove: bool):
387
    '''
388
    Resets the databases mongo, elastic/calcs, and all files. Be careful.
389
390
391
    In contrast to :func:`remove`, it will only remove the contents of dbs and indicies.
    This function just attempts to remove everything, there is no exception handling
    or any warranty it will succeed.
392
393
394

    Args:
        remove: Do not try to recreate empty databases, remove entirely.
395
    '''
396
397
398
399
    try:
        if not mongo_client:
            setup_mongo()
        mongo_client.drop_database(config.mongo.db_name)
400
        logger.info('mongodb resetted')
401
402
    except Exception as e:
        logger.error('exception reset mongodb', exc_info=e)
403

404
405
406
    try:
        if not elastic_client:
            setup_elastic()
407
        elastic_client.indices.delete(index=config.elastic.index_name)
408
        from nomad.search import entry_document
409
        if not remove:
410
            entry_document.init(index=config.elastic.index_name)
411
        logger.info('elastic index resetted')
412
413
    except Exception as e:
        logger.error('exception resetting elastic', exc_info=e)
414

415
    try:
416
417
        shutil.rmtree(config.fs.staging, ignore_errors=True)
        shutil.rmtree(config.fs.public, ignore_errors=True)
418

419
        # delete tmp without the folder
420
421
422
423
424
425
426
427
428
        if os.path.isdir(config.fs.tmp):
            for sub_path in os.listdir(config.fs.tmp):
                path = os.path.join(config.fs.tmp, sub_path)
                try:
                    if os.path.isfile(path):
                        os.unlink(path)
                    elif os.path.isdir(path): shutil.rmtree(path, ignore_errors=True)
                except Exception:
                    pass
429

430
        logger.info('files resetted')
431
432
433
434
    except Exception as e:
        logger.error('exception deleting files', exc_info=e)


435
def send_mail(name: str, email: str, message: str, subject: str):
436
437
438
439
440
441
442
443
    """Used to programmatically send mails.

    Args:
        name: The email recipient name.
        email: The email recipient address.
        messsage: The email body.
        subject: The subject line.
    """
444
    if not config.mail.enabled:
445
446
447
448
449
450
451
452
453
        return

    logger = utils.get_logger(__name__)
    server = smtplib.SMTP(config.mail.host, config.mail.port)

    if config.mail.port == 995:
        try:
            server.starttls()
        except Exception as e:
454
            logger.warning('Could not use TTS', exc_info=e)
455

456
    if config.mail.with_login:
457
        try:
458
            server.login(config.mail.user, config.mail.password)
459
460
461
462
463
464
        except Exception as e:
            logger.warning('Could not log into mail server', exc_info=e)

    msg = MIMEText(message)
    msg['Subject'] = subject
    msg['To'] = name
465
466
467
    to_addrs = [email]

    if config.mail.cc_address is not None:
468
        msg['Cc'] = 'The nomad team <%s>' % config.mail.cc_address
469
        to_addrs.append(config.mail.cc_address)
470
471

    try:
472
        server.send_message(msg, from_addr=config.mail.from_address, to_addrs=to_addrs)
473
    except Exception as e:
474
        logger.error('Could not send email', exc_info=e)
475
476

    server.quit()