Sporadic failures of python tests in the pipeline
@mscheidg there is some sporadic failures of the pipiline for nomad/tests/app/v1/routers/test_uploads.py:test_post_upload
. This passes if the pipeline is manually restarted. Seems to me like an error from elastic_search.
This goes beyond what I know, so I let you investigate this, but let me know if you need some info from me.
The full message is:
==================================== ERRORS ====================================
___________ ERROR at setup of test_post_upload[invalid-credentials] ____________
elastic_infra = <Elasticsearch([{'host': 'elastic', 'port': 9200}])>
@pytest.fixture(scope='function')
def elastic(elastic_infra):
''' Provides a clean elastic per function. Clears elastic before test. '''
> return clear_elastic(elastic_infra)
tests/conftest.py:299:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
tests/conftest.py:264: in clear_elastic
elastic_infra.delete_by_query(
/usr/local/lib/python3.9/site-packages/elasticsearch/client/utils.py:347: in _wrapped
return func(*args, params=params, headers=headers, **kwargs)
/usr/local/lib/python3.9/site-packages/elasticsearch/client/__init__.py:738: in delete_by_query
return self.transport.perform_request(
/usr/local/lib/python3.9/site-packages/elasticsearch/transport.py:466: in perform_request
raise e
/usr/local/lib/python3.9/site-packages/elasticsearch/transport.py:427: in perform_request
status, headers_response, data = connection.perform_request(
/usr/local/lib/python3.9/site-packages/elasticsearch/connection/http_urllib3.py:291: in perform_request
self._raise_error(response.status, raw_data)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <Urllib3HttpConnection: http://elastic:9200>, status_code = 409
raw_data = '{"took":2,"timed_out":false,"total":1,"deleted":0,"batches":1,"version_conflicts":1,"noops":0,"retries":{"bulk":0,"se...d primary term [1]","index_uuid":"SdKVyK4sT4irtlYqUxTIAA","shard":"0","index":"nomad_entries_v1_test"},"status":409}]}'
def _raise_error(self, status_code, raw_data):
"""Locate appropriate exception and raise it."""
error_message = raw_data
additional_info = None
try:
if raw_data:
additional_info = json.loads(raw_data)
error_message = additional_info.get("error", error_message)
if isinstance(error_message, dict) and "type" in error_message:
error_message = error_message["type"]
except (ValueError, TypeError) as err:
logger.warning("Undecodable raw error response from server: %s", err)
> raise HTTP_EXCEPTIONS.get(status_code, TransportError)(
status_code, error_message, additional_info
)
E elasticsearch.exceptions.ConflictError: ConflictError(409, '{"took":2,"timed_out":false,"total":1,"deleted":0,"batches":1,"version_conflicts":1,"noops":0,"retries":{"bulk":0,"search":0},"throttled_millis":0,"requests_per_second":-1.0,"throttled_until_millis":0,"failures":[{"index":"nomad_entries_v1_test","type":"_doc","id":"ENjScxjKHMhyWV_kZxfbDHfrrJY0","cause":{"type":"version_conflict_engine_exception","reason":"[ENjScxjKHMhyWV_kZxfbDHfrrJY0]: version conflict, required seqNo [1841], primary term [1]. current document has seqNo [1842] and primary term [1]","index_uuid":"SdKVyK4sT4irtlYqUxTIAA","shard":"0","index":"nomad_entries_v1_test"},"status":409}]}')
/usr/local/lib/python3.9/site-packages/elasticsearch/connection/base.py:328: ConflictError
------------------------------ Captured log setup ------------------------------
INFO nomad.search 2023-05-24T14:42:42 perform bulk index of entries
- commit:
- search.exec_time: 0.10958147048950195
- search.n_actions: 2
- search.n_entries: 1
- search.size: 35437
- version: 1.2.0rc1.dev118+g7e441c981
WARNING elasticsearch 2023-05-24T14:42:42 POST http://elastic:9200/nomad_entries_v1_test/_delete_by_query?refresh=true&wait_for_completion=true [status:409 request:0.059s]
- commit:
- version: 1.2.0rc1.dev118+g7e441c981
WARNING elasticsearch 2023-05-24T14:42:42 POST http://elastic:9200/nomad_entries_v1_test/_delete_by_query?refresh=true&wait_for_completion=true [status:409 request:0.018s]
- commit:
- version: 1.2.0rc1.dev118+g7e441c981
ERROR nomad.processing 2023-05-24T14:42:42 could not write archive after processing failure
- exception: Traceback (most recent call last):
File "/app/nomad/processing/base.py", line 906, in proc_task
next_process = proc._sync_complete_process()
File "/app/nomad/processing/base.py", line 718, in _sync_complete_process
raise ProcessSyncFailure('Failed to complete process too many times - should not happen')
nomad.processing.base.ProcessSyncFailure: Failed to complete process too many times - should not happen
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/app/nomad/processing/data.py", line 1303, in write_archive
return self.upload_files.write_archive(
File "/app/nomad/files.py", line 836, in write_archive
raise e
File "/app/nomad/files.py", line 830, in write_archive
write_archive(archive_file_object.os_path, 1, data=[(entry_id, data)])
File "/app/nomad/archive/storage.py", line 540, in write_archive
with ArchiveWriter(path_or_file, n_entries, entry_toc_depth=entry_toc_depth) as writer:
File "/app/nomad/archive/storage.py", line 176, in __enter__
self._f = open(self.file_or_path, 'wb')
FileNotFoundError: [Errno 2] No such file or directory: '.volumes/test_fs/staging/6-/6-yD5SrkSRm8dW8YOIe6uA/archive/ENjScxjKHMhyWV_kZxfbDHfrrJY0-v1.msg'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/app/nomad/processing/data.py", line 1168, in _on_fail
self.write_archive(self._parser_results)
File "/app/nomad/processing/data.py", line 1310, in write_archive
self.upload_files.write_archive(self.entry_id, archive.m_to_dict())
File "/app/nomad/files.py", line 836, in write_archive
raise e
File "/app/nomad/files.py", line 830, in write_archive
write_archive(archive_file_object.os_path, 1, data=[(entry_id, data)])
File "/app/nomad/archive/storage.py", line 540, in write_archive
with ArchiveWriter(path_or_file, n_entries, entry_toc_depth=entry_toc_depth) as writer:
File "/app/nomad/archive/storage.py", line 176, in __enter__
self._f = open(self.file_or_path, 'wb')
FileNotFoundError: [Errno 2] No such file or directory: '.volumes/test_fs/staging/6-/6-yD5SrkSRm8dW8YOIe6uA/archive/ENjScxjKHMhyWV_kZxfbDHfrrJY0-v1.msg'
- exception_hash: pP1hsdi12j9j9IcEWCY1RMYKaaWZ
- commit:
- entry_id: ENjScxjKHMhyWV_kZxfbDHfrrJY0
- mainfile: None
- processing.logger: nomad.processing
- processing.parser: None
- processing.proc: Entry
- processing.process: None
- processing.process_status: RUNNING
- processing.process_worker_id: dvbEPQ3-QFaFKA95iX1smQ
- upload_id: None
- version: 1.2.0rc1.dev118+g7e441c981
INFO nomad.processing 2023-05-24T14:42:42 process failed
- commit:
- entry_id: ENjScxjKHMhyWV_kZxfbDHfrrJY0
- mainfile: None
- processing.logger: nomad.processing
- processing.parser: None
- processing.proc: Entry
- processing.process: None
- processing.process_status: RUNNING
- processing.process_worker_id: dvbEPQ3-QFaFKA95iX1smQ
- upload_id: None
- version: 1.2.0rc1.dev118+g7e441c981
ERROR celery.app.trace 2023-05-24T14:42:42 Task nomad.processing.base.proc_task[e42ad743-532b-4cd2-97e6-4248b1cc5f2e] raised unexpected: ValidationError(ValidationError (Entry:ENjScxjKHMhyWV_kZxfbDHfrrJY0) (Field is required: ['upload_id']),)
- data: {'hostname': 'runner-2df03998-project-2187-concurrent-6', 'id': 'e42ad743-532b-4cd2-97e6-4248b1cc5f2e', 'name': 'nomad.processing.base.proc_task', 'exc': "ValidationError(ValidationError (Entry:ENjScxjKHMhyWV_kZxfbDHfrrJY0) (Field is required: ['upload_id']),)", 'traceback': 'Traceback (most recent call last):\n File "/app/nomad/processing/base.py", line 906, in proc_task\n next_process = proc._sync_complete_process()\n File "/app/nomad/processing/base.py", line 718, in _sync_complete_process\n raise ProcessSyncFailure(\'Failed to complete process too many times - should not happen\')\nnomad.processing.base.ProcessSyncFailure: Failed to complete process too many times - should not happen\n\nDuring handling of the above exception, another exception occurred:\n\nTraceback (most recent call last):\n File "/usr/local/lib/python3.9/site-packages/celery/app/trace.py", line 451, in trace_task\n R = retval = fun(*args, **kwargs)\n File "/app/nomad/processing/base.py", line 918, in proc_task\n proc.fail(e)\n File "/app/nomad/processing/base.py", line 412, in fail\n self._sync_complete_process(force_clear_queue_on_failure=True)\n File "/app/nomad/processing/base.py", line 683, in _sync_complete_process\n self.save()\n File "/usr/local/lib/python3.9/site-packages/mongoengine/document.py", line 396, in save\n self.validate(clean=clean)\n File "/usr/local/lib/python3.9/site-packages/mongoengine/base/document.py", line 439, in validate\n raise ValidationError(message, errors=errors)\nmongoengine.errors.ValidationError: ValidationError (Entry:ENjScxjKHMhyWV_kZxfbDHfrrJY0) (Field is required: [\'upload_id\'])\n', 'args': "['Entry', 'ENjScxjKHMhyWV_kZxfbDHfrrJY0', 'process_entry', [], {}]", 'kwargs': '{}', 'description': 'raised unexpected', 'internal': False}
- lineno: 265
- commit:
- version: 1.2.0rc1.dev118+g7e441c981
- process: 13
- stack_trace: Traceback (most recent call last):
File "/app/nomad/processing/base.py", line 906, in proc_task
next_process = proc._sync_complete_process()
File "/app/nomad/processing/base.py", line 718, in _sync_complete_process
raise ProcessSyncFailure('Failed to complete process too many times - should not happen')
nomad.processing.base.ProcessSyncFailure: Failed to complete process too many times - should not happen
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/local/lib/python3.9/site-packages/celery/app/trace.py", line 451, in trace_task
R = retval = fun(*args, **kwargs)
File "/app/nomad/processing/base.py", line 918, in proc_task
proc.fail(e)
File "/app/nomad/processing/base.py", line 412, in fail
self._sync_complete_process(force_clear_queue_on_failure=True)
File "/app/nomad/processing/base.py", line 683, in _sync_complete_process
self.save()
File "/usr/local/lib/python3.9/site-packages/mongoengine/document.py", line 396, in save
self.validate(clean=clean)
File "/usr/local/lib/python3.9/site-packages/mongoengine/base/document.py", line 439, in validate
raise ValidationError(message, errors=errors)
mongoengine.errors.ValidationError: ValidationError (Entry:ENjScxjKHMhyWV_kZxfbDHfrrJY0) (Field is required: ['upload_id'])
- thread_name: Thread-2
INFO celery.worker.strate 2023-05-24T14:42:42 Task nomad.processing.base.proc_task[b39613cf-8c13-4771-91fc-72665b4d0d23] received
- data: {'id': 'b39613cf-8c13-4771-91fc-72665b4d0d23', 'name': 'nomad.processing.base.proc_task', 'args': "['Entry', '8JMDrfZTZxTCh4fo0rcSjkyJPovk', 'process_entry', [], {'_meta_label': None}]", 'kwargs': '{}'}
- commit:
- version: 1.2.0rc1.dev118+g7e441c981
DEBUG celery.pool 2023-05-24T14:42:42 TaskPool: Apply <function trace_task_ret at 0x7f72834b63a0> (args:('nomad.processing.base.proc_task', 'b39613cf-8c13-4771-91fc-72665b4d0d23', {'lang': 'py', 'task': 'nomad.processing.base.proc_task', 'id': 'b39613cf-8c13-4771-91fc-72665b4d0d23', 'shadow': None, 'eta': None, 'expires': None, 'group': None, 'group_index': None, 'retries': 0, 'timelimit': [3600, 1800], 'root_id': '59c7bafd-06ec-4e6a-a11d-3767b42efd77', 'parent_id': '59c7bafd-06ec-4e6a-a11d-3767b42efd77', 'argsrepr': "['Entry', '8JMDrfZTZxTCh4fo0rcSjkyJPovk', 'process_entry', (), {'_meta_label': None}]", 'kwargsrepr': '{}', 'origin': 'gen13@runner-2df03998-project-2187-concurrent-6', 'ignore_result': False, 'properties': {'content_type': 'application/json', 'content_encoding': 'utf-8', 'application_headers': {'lang': 'py', 'task': 'nomad.processing.base.proc_task', 'id': 'b39613cf-8c13-4771-91fc-72665b4d0d23', 'shadow': None, 'eta': None, 'expires': None, 'group': None, 'group_index': None, 'retries': 0, 'timelimit': [3600, 1800], 'root_id': '59c7bafd-06ec-4e6a-a11d-3767b42efd77', 'parent_id':... kwargs:{})
- commit:
- version: 1.2.0rc1.dev118+g7e441c981
DEBUG celery.worker.reques 2023-05-24T14:42:42 Task accepted: nomad.processing.base.proc_task[b39613cf-8c13-4771-91fc-72665b4d0d23] pid:13
- commit:
- version: 1.2.0rc1.dev118+g7e441c981
WARNING nomad.processing 2023-05-24T14:42:42 called object is missing, retry
- commit:
- processing.cls: Entry
- version: 1.2.0rc1.dev118+g7e441c981
INFO celery.app.trace 2023-05-24T14:42:42 Task nomad.processing.base.proc_task[b39613cf-8c13-4771-91fc-72665b4d0d23] retry: Retry in 3s: KeyError('Entry with id 8JMDrfZTZxTCh4fo0rcSjkyJPovk does not exist')
- data: {'id': 'b39613cf-8c13-4771-91fc-72665b4d0d23', 'name': 'nomad.processing.base.proc_task', 'exc': "Retry in 3s: KeyError('Entry with id 8JMDrfZTZxTCh4fo0rcSjkyJPovk does not exist')"}
- commit:
- version: 1.2.0rc1.dev118+g7e441c981
DEBUG kombu.pidbox 2023-05-24T14:42:42 pidbox received method active(safe=None) [reply_to:{'exchange': 'reply.celery.pidbox', 'routing_key': 'eb200204-5f94-32af-9b0f-f9133afca32f'} ticket:7e977010-0c9f-42a7-96ee-8997599862af]
- commit:
- version: 1.2.0rc1.dev118+g7e441c981
INFO celery.worker.strate 2023-05-24T14:42:42 Task nomad.processing.base.proc_task[b39613cf-8c13-4771-91fc-72665b4d0d23] received
- data: {'id': 'b39613cf-8c13-4771-91fc-72665b4d0d23', 'name': 'nomad.processing.base.proc_task', 'args': "['Entry', '8JMDrfZTZxTCh4fo0rcSjkyJPovk', 'process_entry', [], {'_meta_label': None}]", 'kwargs': '{}'}
- commit:
- version: 1.2.0rc1.dev118+g7e441c981
DEBUG kombu.common 2023-05-24T14:42:42 basic.qos: prefetch_count->5
- commit:
- version: 1.2.0rc1.dev118+g7e441c981
WARNING elasticsearch 2023-05-24T14:42:42 POST http://elastic:9200/nomad_entries_v1_test/_delete_by_query?refresh=true&wait_for_completion=true [status:409 request:0.008s]
- commit:
- version: 1.2.0rc1.dev118+g7e441c981
WARNING elasticsearch 2023-05-24T14:42:42 POST http://elastic:9200/nomad_entries_v1_test/_delete_by_query?refresh=true&wait_for_completion=true [status:409 request:0.008s]
- commit:
- version: 1.2.0rc1.dev118+g7e441c981
WARNING elasticsearch 2023-05-24T14:42:43 POST http://elastic:9200/nomad_entries_v1_test/_delete_by_query?refresh=true&wait_for_completion=true [status:409 request:0.008s]
- commit:
- version: 1.2.0rc1.dev118+g7e441c981
WARNING elasticsearch 2023-05-24T14:42:43 POST http://elastic:9200/nomad_entries_v1_test/_delete_by_query?refresh=true&wait_for_completion=true [status:409 request:0.008s]
- commit:
- version: 1.2.0rc1.dev118+g7e441c981
WARNING elasticsearch 2023-05-24T14:42:43 POST http://elastic:9200/nomad_entries_v1_test/_delete_by_query?refresh=true&wait_for_completion=true [status:409 request:0.008s]
- commit:
- version: 1.2.0rc1.dev118+g7e441c981
WARNING elasticsearch 2023-05-24T14:42:43 POST http://elastic:9200/nomad_entries_v1_test/_delete_by_query?refresh=true&wait_for_completion=true [status:409 request:0.006s]
- commit:
- version: 1.2.0rc1.dev118+g7e441c981
WARNING elasticsearch 2023-05-24T14:42:43 POST http://elastic:9200/nomad_entries_v1_test/_delete_by_query?refresh=true&wait_for_completion=true [status:409 request:0.006s]
- commit:
- version: 1.2.0rc1.dev118+g7e441c981
WARNING elasticsearch 2023-05-24T14:42:43 POST http://elastic:9200/nomad_entries_v1_test/_delete_by_query?refresh=true&wait_for_completion=true [status:409 request:0.007s]
- commit:
- version: 1.2.0rc1.dev118+g7e441c981
WARNING elasticsearch 2023-05-24T14:42:43 POST http://elastic:9200/nomad_entries_v1_test/_delete_by_query?refresh=true&wait_for_completion=true [status:409 request:0.006s]
- commit:
- version: 1.2.0rc1.dev118+g7e441c981