admin.py 15.7 KB
Newer Older
1

Markus Scheidgen's avatar
Markus Scheidgen committed
2
3
4
5
#
# Copyright The NOMAD Authors.
#
# This file is part of NOMAD. See https://nomad-lab.eu for further info.
6
7
8
9
10
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
Markus Scheidgen's avatar
Markus Scheidgen committed
11
#     http://www.apache.org/licenses/LICENSE-2.0
12
13
#
# Unless required by applicable law or agreed to in writing, software
Markus Scheidgen's avatar
Markus Scheidgen committed
14
# distributed under the License is distributed on an "AS IS" BASIS,
15
16
17
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
Markus Scheidgen's avatar
Markus Scheidgen committed
18
19
#

20
import click
21
import sys
22

23
from nomad import config
24
25
26
27
28
29
30
31
from nomad.cli.cli import cli


@cli.group(help='''The nomad admin commands to do nasty stuff directly on the databases.
                     Remember: With great power comes great responsibility!''')
@click.pass_context
def admin(ctx):
    pass
32
33


34
35
36
37
38
39
40
41
@admin.command(help='Reset/remove all databases.')
@click.option('--remove', is_flag=True, help='Do not just reset all dbs, but also remove them.')
@click.option('--i-am-really-sure', is_flag=True, help='Must be set for the command to to anything.')
def reset(remove, i_am_really_sure):
    if not i_am_really_sure:
        print('You do not seem to be really sure about what you are doing.')
        sys.exit(1)

42
43
    from nomad import infrastructure

44
45
46
47
48
49
    infrastructure.setup_mongo()
    infrastructure.setup_elastic()

    infrastructure.reset(remove)


50
51
52
@admin.command(help='Reset all "stuck" in processing uploads and calc in low level mongodb operations.')
@click.option('--zero-complete-time', is_flag=True, help='Sets the complete time to epoch zero.')
def reset_processing(zero_complete_time):
53
54
55
56
    from datetime import datetime

    from nomad import infrastructure, processing as proc

57
58
59
    infrastructure.setup_mongo()

    def reset_collection(cls):
60
        in_processing = cls.objects(process_status__in=proc.ProcessStatus.STATUSES_PROCESSING)
61
62
        print('%d %s processes need to be reset due to incomplete process' % (in_processing.count(), cls.__name__))
        in_processing.update(
63
            process_status=proc.ProcessStatus.READY,
64
65
66
67
            current_process=None,
            worker_hostname=None,
            celery_task_id=None,
            errors=[], warnings=[],
68
            complete_time=datetime.fromtimestamp(0) if zero_complete_time else datetime.now())
69
70
71
72
73

    reset_collection(proc.Calc)
    reset_collection(proc.Upload)


74
75
76
77
@admin.command(help='Check and lift embargo of data with expired embargo period.')
@click.option('--dry', is_flag=True, help='Do not lift the embargo, just show what needs to be done.')
@click.option('--parallel', default=1, type=int, help='Use the given amount of parallel processes. Default is 1.')
def lift_embargo(dry, parallel):
78
79
80
81
    from datetime import datetime
    from dateutil.relativedelta import relativedelta

    from nomad import infrastructure, processing as proc
82
    from nomad.search import quantity_values
83

84
85
86
    infrastructure.setup_mongo()
    infrastructure.setup_elastic()

87
    query = dict(with_embargo=True, published=True)
88

89
    for upload_id in quantity_values('upload_id', query=query, owner='all'):
90
91
92
        upload = proc.Upload.get(upload_id)
        embargo_length = upload.embargo_length

93
94
95
        if upload.publish_time + relativedelta(months=embargo_length) < datetime.now():
            print('need to lift the embargo of %s (publish_time=%s, embargo=%d)' % (
                upload.upload_id, upload.publish_time, embargo_length))
96
97

            if not dry:
98
                upload.edit_upload_metadata(
David Sikter's avatar
David Sikter committed
99
                    edit_request_json=dict(metadata={'embargo_length': 0}),
100
                    user_id=config.services.admin_user_id)
101
    return
102
103


104
@admin.group(help='Generate scripts and commands for nomad operation.')
105
106
107
108
def ops():
    pass


109
110
111
# @ops.group(help='Tools for managing the DOS similarity data.')
# def similarity():
#     pass
112
113


114
115
116
@ops.command(help=('Dump the mongo (calculation metadata) db.'))
@click.option('--restore', is_flag=True, help='Do not dump, but restore.')
def dump(restore: bool):
117
118
    from datetime import datetime

119
    date_str = datetime.utcnow().strftime('%Y_%m_%d')
120
121
122
123
124
125
126
127
128
129
130
    print('mongodump --host {} --port {} --db {} -o /backup/fairdi/mongo/{}'.format(
        config.mongo.host, config.mongo.port, config.mongo.db_name, date_str))


@ops.command(help=('Restore the mongo (calculation metadata) db.'))
@click.argument('PATH_TO_DUMP', type=str, nargs=1)
def restore(path_to_dump):
    print('mongorestore --host {} --port {} --db {} {}'.format(
        config.mongo.host, config.mongo.port, config.mongo.db_name, path_to_dump))


131
@ops.command(help=('Generate an nginx.conf to serve the GUI and proxy pass to API container.'))
132
133
134
@click.option('--prefix', type=str, default=config.services.api_base_path, help='Alter the url path prefix.')
@click.option('--host', type=str, default=config.services.api_host, help='Alter the NOMAD app host.')
@click.option('--port', type=str, default=config.services.api_port, help='Alter the NOMAD port host.')
135
136
137
@click.option('--server/--no-server', default=True, help='Control writing of the outer server {} block. '
              'Useful when conf file is included within another nginx.conf.')
def nginx_conf(prefix, host, port, server):
138
139
140
    prefix = prefix.rstrip('/')
    prefix = '/%s' % prefix.lstrip('/')

141
142
    if server:
        print('''server {
143
144
    listen        80;
    server_name   www.example.com;
145
    proxy_set_header Host $host;
146
        ''')
147

148
    print('''
149
    location / {{
150
        proxy_pass http://{1}:{2};
151
152
    }}

153
154
    location ~ {0}\\/?(gui)?$ {{
        rewrite ^ {0}/gui/ permanent;
155
156
    }}

157
    location {0}/gui/ {{
158
159
        proxy_intercept_errors on;
        error_page 404 = @redirect_to_index;
160
        proxy_pass http://{1}:{2};
161
162
163
    }}

    location @redirect_to_index {{
164
165
        rewrite ^ {0}/gui/index.html break;
        proxy_pass http://{1}:{2};
166
167
    }}

168
    location ~ \\/gui\\/(service-worker\\.js|meta\\.json)$ {{
169
170
171
172
173
        add_header Last-Modified $date_gmt;
        add_header Cache-Control 'no-store, no-cache, must-revalidate, proxy-revalidate, max-age=0';
        if_modified_since off;
        expires off;
        etag off;
174
        proxy_pass http://{1}:{2};
175
176
    }}

177
    location ~ /api/v1/uploads(/?$|.*/raw|.*/bundle?$) {{
178
179
        client_max_body_size 35g;
        proxy_request_buffering off;
180
        proxy_pass http://{1}:{2};
181
182
    }}

183
    location ~ /api/v1/.*/download {{
184
        proxy_buffering off;
185
        proxy_pass http://{1}:{2};
186
    }}
187
188
189
'''.format(prefix, host, port))
    if server:
        print('}')
190
191
192
193
194
195
196


@ops.command(help='Updates the AFLOW prototype information using the latest online version and writes the results to a python module in the given FILEPATH.')
@click.argument('FILEPATH', nargs=1, type=str)
@click.option('--matches-only', is_flag=True, help='Only update the match information that depends on the symmetry analysis settings. Will not perform and online update.')
@click.pass_context
def prototypes_update(ctx, filepath, matches_only):
197
198
    from nomad.cli.aflow import update_prototypes
    update_prototypes(ctx, filepath, matches_only)
199

200
201

@ops.command(help='Updates the springer database in nomad.config.normalize.springer_db_path.')
202
203
204
@click.option('--max-n-query', default=10, type=int, help='Number of unsuccessful springer request before returning an error. Default is 10.')
@click.option('--retry-time', default=120, type=int, help='Time in seconds to retry after unsuccessful request. Default is 120.')
def springer_update(max_n_query, retry_time):
205
    from nomad.cli.admin import springer
206
    springer.update_springer(max_n_query, retry_time)
207
208


209
210
211
212
213
214
215
# @similarity.command(help='Updates the msgpack file containing the similarity information.')
# @click.option('--dir', "-d", "input_dir", type=str, help='Path of the folder containing the raw similarity information files')
# @click.option('--out', "-o", type=str, help='Path of the output msgpack file.')
# @click.option('--verbose', is_flag=True, help='Enable verbose output.')
# def update(input_dir, out, verbose):
#     from nomad.cli.admin import similarity
#     similarity.update(input_dir, out, verbose)
216
217


218
219
220
221
222
223
224
# @similarity.command(help='Ingests the given similarity information from an msgpack file into MongoDB.')
# @click.option('--in', "-i", "input_path", type=str, help='Path of the ingested msgpack file.')
# @click.option('--batch_size', type=int, default=10000, help='Batch size for MongoDB bulk ingestion.')
# @click.option('--verbose', is_flag=True, help='Enable verbose output.')
# def ingest(input_path, batch_size, verbose):
#     from nomad.cli.admin import similarity
#     similarity.ingest(input_path, batch_size, verbose)
225
226
227
228
229
230
231
232
233


@ops.command(help='Configures the GUIs based on NOMAD config.')
def gui_config():
    import os
    import os.path
    from nomad import config
    import glob
    import shutil
234
    import json
235

236
    gui_folder = os.path.abspath(os.path.join(os.path.dirname(__file__), '../../app/flask/static/gui'))
237
238
239
240
241
242
243
244
245
246
    run_gui_folder = os.path.join(gui_folder, '../.gui_configured')

    # copy
    shutil.rmtree(run_gui_folder, ignore_errors=True)
    shutil.copytree(gui_folder, run_gui_folder)

    # setup the env
    env_js_file = os.path.join(run_gui_folder, 'env.js')
    if not os.path.exists(env_js_file):
        with open(env_js_file, 'wt') as f:
247
248
249
            f.write(f'''
window.nomadEnv = {{
    'appBase': '{config.services.api_base_path}',
250
    'keycloakBase': 'https://nomad-lab.eu/fairdi/keycloak/auth/',
251
252
    'keycloakRealm': '{config.keycloak.realm_name}',
    'keycloakClientId': '{config.keycloak.client_id}',
253
    'debug': false,
254
    'encyclopediaBase': '{config.encyclopedia_base if config.encyclopedia_base else 'undefined'}',
255
256
    'aitoolkitEnabled': {'true' if config.aitoolkit_enabled else 'false'},
    'oasis': {'true' if config.keycloak.oasis else 'false'},
257
    'version': {json.dumps(config.meta.beta) if config.meta.beta else dict()},
258
    'globalLoginRequired': {'false' if config.oasis.allowed_users is None else 'true'}
259
}};''')
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276

    # replace base path in all GUI files
    source_file_globs = [
        '**/*.json',
        '**/*.html',
        '**/*.js',
        '**/*.js.map',
        '**/*.css']
    for source_file_glob in source_file_globs:
        source_files = glob.glob(os.path.join(run_gui_folder, source_file_glob), recursive=True)
        for source_file in source_files:
            with open(source_file, 'rt') as f:
                file_data = f.read()
            file_data = file_data.replace('/fairdi/nomad/latest', config.services.api_base_path)
            with open(source_file, 'wt') as f:
                f.write(file_data)

277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302

@admin.group(help='Commands for upgrading to a newer NOMAD version')
def upgrade():
    pass


@upgrade.command(
    help='''Converts (upgrades) records from one mongodb and migrates to another.
            Note, it is strongly recommended to run this command with loglevel verbosed, i.e.

                nomad -v upgrade migrate-mongo ...

         ''')
@click.option(
    '--host', type=str, default=config.mongo.host,
    help='The mongodb host. By default same as the configureed db.')
@click.option(
    '--port', type=int, default=config.mongo.port,
    help='The mongodb port. By default same as the configured db.')
@click.option(
    '--src-db-name', type=str, required=True,
    help='The name of the source database.')
@click.option(
    '--dst-db-name', type=str, default=config.mongo.db_name,
    help='The name of the destination database. By default same as the configured db.')
@click.option(
303
304
305
306
307
    '--upload-query', type=str,
    help='An mongo upload query. All uploads matching the query will be included in the migration.')
@click.option(
    '--entry-query', type=str,
    help='An mongo entry query. All uploads with an entry matching the query will be included in the migration.')
308
309
@click.option(
    '--ids-from-file', type=str,
310
311
    help='''Reads upload IDs from the specified file. Cannot be used together with the
            --upload-query or --entry-query options.
312
313
314
315
316
317
318
319
320
            This can for example be used to retry just the uploads that has previously failed
            (as these ids can be exported to file using --failed-ids-to-file). You can specify both
            --ids-from-file and --failed-ids-to-file at the same time with the same file name.''')
@click.option(
    '--failed-ids-to-file', type=str,
    help='''Write the IDs of failed and skipped uploads to the specified file.
            This can for example be used to subsequently retry just the uploads that failed
            (as these ids can be loaded from file using --ids-from-file). You can specify both
            --ids-from-file and --failed-ids-to-file at the same time with the same file name.''')
321
322
323
324
325
326
@click.option(
    '--upload-update', type=str,
    help='json with updates to apply to all converted uploads')
@click.option(
    '--entry-update', type=str,
    help='json with updates to apply to all converted entries')
David Sikter's avatar
David Sikter committed
327
328
329
330
331
332
333
334
@click.option(
    '--overwrite', type=click.Choice(['always', 'if-newer', 'never'], case_sensitive=False), default='never',
    help='''If an upload already exists in the destination db, this option determines whether
            it and its child records should be overwritten with the data from the source db.
            Possible values are "always", "if-newer", "never". Selecting "always" always overwrites,
            "never" never overwrites, and "if-newer" overwrites if the upload either doesn't exist
            in the destination, or it exists but its complete_time (i.e. last time it was
            processed) is older than in the source db.''')
335
336
337
338
339
340
341
@click.option(
    '--fix-problems', is_flag=True,
    help='''If a minor, fixable problem is encountered, fixes it automaticall; otherwise fail.''')
@click.option(
    '--dry', is_flag=True,
    help='Dry run (not writing anything to the destination database).')
def migrate_mongo(
342
343
        host, port, src_db_name, dst_db_name, upload_query, entry_query,
        ids_from_file, failed_ids_to_file, upload_update, entry_update, overwrite, fix_problems, dry):
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
    import json
    from pymongo.database import Database
    from nomad import utils, infrastructure
    from .upgrade import create_collections_if_needed, migrate_mongo_uploads

    logger = utils.get_logger('migrate-mongo')
    config.mongo.host = host
    config.mongo.port = port
    config.mongo.db_name = dst_db_name
    infrastructure.setup_mongo()

    db_src: Database = infrastructure.mongo_client.get_database(src_db_name)
    db_dst: Database = infrastructure.mongo_client.get_database(dst_db_name)

    if not dry:
        create_collections_if_needed(db_dst)

361
362
363
364
    upload_ids = None
    if upload_query and entry_query:
        print('Cannot specify both upload-query and entry-query')
        return -1
365
    if ids_from_file:
366
        if upload_query or entry_query:
367
368
369
370
371
372
373
374
            print('Cannot specify a query when using --ids-from-file.')
            return -1
        try:
            with open(ids_from_file, 'r') as f:
                upload_ids = [line.strip() for line in f.readlines() if line.strip()]
        except FileNotFoundError:
            logger.error(f'Could not open file {ids_from_file}')
            return -1
375
376
377
378
    elif upload_query:
        upload_query = json.loads(upload_query)
    elif entry_query:
        entry_query = json.loads(entry_query)
379

380
381
382
383
384
    if upload_update:
        upload_update = json.loads(upload_update)
    if entry_update:
        entry_update = json.loads(entry_update)

385
386
387
388
389
    if entry_query:
        logger.info('Quering entries...')
        upload_ids = list(db_src.calc.distinct('upload_id', entry_query))
    if upload_ids:
        upload_query = {'_id': {'$in': upload_ids}}
390
    logger.info('Quering uploads...')
391
    uploads = db_src.upload.find(upload_query)
392
393

    migrate_mongo_uploads(
David Sikter's avatar
David Sikter committed
394
395
        db_src, db_dst, uploads, failed_ids_to_file, upload_update, entry_update, overwrite,
        fix_problems, dry, logger)