diff --git a/nomad/api/repository.py b/nomad/api/repository.py index d90a60da1adee1fa25d5b897605af3f215fd5558..b0a7634674ac45cc38eb32f34eb9b3d7a3fe5ac8 100644 --- a/nomad/api/repository.py +++ b/nomad/api/repository.py @@ -23,7 +23,7 @@ from flask_restful import Resource, abort from nomad.repo import RepoCalc -from .app import api, auth, base_path, login_if_available +from .app import api, base_path, login_if_available class RepoCalcRes(Resource): diff --git a/nomad/api/upload.py b/nomad/api/upload.py index 7cf2ce81c4dfe12a020607b76a6e3661d72f4ec2..0ec2cea36dee121a1885e94c727b781fa14a0ea7 100644 --- a/nomad/api/upload.py +++ b/nomad/api/upload.py @@ -159,6 +159,74 @@ class UploadsRes(Resource): return upload.json_dict, 200 + @login_really_required + def put(self): + """ + Upload a file and automatically create a new upload in the process. + Can be used to upload files via browser or other http clients like curl. + This will also start the processing of the upload. + + There are two basic ways to upload a file: multipart-formdata or simply streaming + the file data. Both are supported. The later one does not allow to transfer a + filename or other meta-data. If a filename is available, it will become the + name of the upload. + + .. :quickref: upload; Upload a file directly and create an upload. + + **Curl examples for both approaches**: + + .. sourcecode:: sh + + curl -X put "/nomad/api/uploads/" -F file=@local_file + curl "/nomad/api/uploads/" --upload-file local_file + + :qparam name: an optional name for the upload + :status 200: upload successfully received. + :returns: the upload (see GET /uploads/<upload_id>) + """ + # create upload + upload = Upload.create( + user=g.user, + name=request.args.get('name')) + + logger = get_logger(__name__, endpoint='upload', action='put', upload_id=upload.upload_id) + logger.info('upload created') + + uploadFile = UploadFile(upload.upload_id) + + if request.mimetype == 'application/multipart-formdata': + # multipart formdata, e.g. with curl -X put "url" -F file=@local_file + # might have performance issues for large files: https://github.com/pallets/flask/issues/2086 + if 'file' in request.files: + abort(400, message='Bad multipart-formdata, there is no file part.') + file = request.files['file'] + if upload.name is '': + upload.name = file.filename + + file.save(uploadFile.os_path) + else: + # simple streaming data in HTTP body, e.g. with curl "url" -T local_file + try: + with uploadFile.open('wb') as f: + while not request.stream.is_exhausted: + f.write(request.stream.read(1024)) + + except Exception as e: + logger.error('Error on streaming upload', exc_info=e) + abort(400, message='Some IO went wrong, download probably aborted/disrupted.') + + if not uploadFile.is_valid: + uploadFile.delete() + upload.delete() + abort(400, message='Bad file format, excpected %s.' % ", ".join(UploadFile.formats)) + + logger.info('received uploaded file') + upload.upload_time = datetime.now() + upload.process() + logger.info('initiated processing') + + return upload.json_dict, 200 + class UploadRes(Resource): """ Uploads """ diff --git a/tests/test_api.py b/tests/test_api.py index 861690acbc4f4087074503076f24a60c6d9f84da..4d87360984ddd31568c662754c302fb71b296ce5 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -288,6 +288,27 @@ def test_processing_local_path(client, file, worker, mocksearch, test_user_auth, assert_processing(client, test_user_auth, upload_id, repository_db) +@pytest.mark.parametrize('file', example_files) +@pytest.mark.parametrize('mode', ['multipart', 'stream']) +@pytest.mark.timeout(10) +def test_processing_upload(client, file, mode, worker, mocksearch, test_user_auth, no_warn, repository_db): + if mode == 'multipart': + rv = client.put( + '/uploads', + data=dict(file=(open(file, 'rb'), 'file')), + headers=test_user_auth) + elif mode == 'stream': + with open(file, 'rb') as f: + rv = client.put('/uploads', data=f.read(), headers=test_user_auth) + else: + assert False + assert rv.status_code == 200 + upload = assert_upload(rv.data) + upload_id = upload['upload_id'] + + assert_processing(client, test_user_auth, upload_id, repository_db) + + def test_repo_calc(client, example_elastic_calc, no_warn): rv = client.get( '/repo/%s/%s' % (example_elastic_calc.upload_hash, example_elastic_calc.calc_hash))