Failed publish due to missing suggestion value?
I was about to publish the EELS DB data that we have. There are some samples in there that are so weirdly named that we cannot give any formula or elements. This results in empty suggestion values ['']
. When the system tried to bulk index these, it ES throughs an mapper_parsing_exception
: value must have a length > 0
.
@himanel1 My only clue is that this are these suggestion values. Would this make sense? Have you ever encountered errors like these? Is there a way to tweak the algorithm to return no suggestions instead of empty ones?
What is also weird, indexing individual calculations during processing works. Its only the bulk indexing during publishing that fails.
Traceback (most recent call last):
File "/app/nomad/processing/base.py", line 790, in proc_task
rv = unwrapped_func(proc, *args, **kwargs)
File "/app/nomad/processing/data.py", line 1352, in publish_upload
search.publish(entries)
File "/app/nomad/search.py", line 204, in publish
entries, index=index, published=True, update_materials=True, refresh=True)
File "/app/nomad/search.py", line 235, in update_metadata
infrastructure.elastic_client, updates, stats_only=True)
File "/usr/local/lib/python3.7/site-packages/elasticsearch/helpers/__init__.py", line 308, in bulk
for ok, item in streaming_bulk(client, actions, *args, **kwargs):
File "/usr/local/lib/python3.7/site-packages/elasticsearch/helpers/__init__.py", line 238, in streaming_bulk
**kwargs
File "/usr/local/lib/python3.7/site-packages/elasticsearch/helpers/__init__.py", line 166, in _process_bulk_chunk
raise BulkIndexError("%i document(s) failed to index." % len(errors), errors)
elasticsearch.helpers.BulkIndexError: ('1 document(s) failed to index.', [{'update': {'_index': 'nomad_prod_v1_reindex_1_entries_v1', '_type': 'doc', '_id': 'x-emzXdZZPuKxw_JRuCVyL8Hi_M5', 'status': 400, 'error': {'type': 'mapper_parsing_exception', 'reason': 'failed to parse', 'caused_by': {'type': 'illegal_argument_exception', 'reason': 'value must have a length > 0'}}, 'data': {'doc': {'results': {'material': {'structural_type': 'not processed', 'elements': [], 'n_elements': 0, 'elements_exclusive': '', 'chemical_formula_descriptive': '', 'chemical_formula_reduced': '', 'chemical_formula_hill': '', 'structural_type__suggestion': ['not processed', 'processed not '], 'chemical_formula_descriptive__suggestion': [''], 'chemical_formula_reduced__suggestion': [''], 'chemical_formula_hill__suggestion': ['']}, 'method': {'method_name': 'EELS', 'method_name__suggestion': ['EELS']}, 'properties': {'available_properties': ['eels'], 'spectroscopy': {'eels': {'detector_type': 'GIF Quantum ER', 'resolution': 8.01088317e-20, 'max_energy': 6.166104950079719e-16, 'min_energy': 2.8864493802817195e-16, 'detector_type__suggestion': ['GIF Quantum ER', 'Quantum ER GIF ', 'ER GIF Quantum ']}}}}, 'upload_id': 'u08bpKUoRyGU6x0-nDCqyA', 'upload_name': 'EELS Database', 'upload_create_time': '2021-11-15T07:29:17.049000', 'entry_id': 'x-emzXdZZPuKxw_JRuCVyL8Hi_M5', 'calc_id': 'x-emzXdZZPuKxw_JRuCVyL8Hi_M5', 'entry_create_time': '2021-11-15T07:30:09.655000', 'parser_name': 'parsers/eels', 'mainfile': 'eelsdb.eu/spectra/zr/metadata.json', 'files': ['eelsdb.eu/spectra/zr/metadata.json', 'eelsdb.eu/spectra/zr/data.msa'], 'external_id': '22079', 'published': True, 'with_embargo': False, 'processed': True, 'last_processing_time': '2022-02-02T07:39:54.291000', 'processing_errors': [], 'nomad_version': '1.0.1', 'nomad_commit': 'e0e51d75', 'references': ['https://eelsdb.eu/spectra/zr/', 'https://api.eelsdb.eu/spectra/zr/'], 'external_db': 'EELS Data Base', 'origin': 'EELS Data Base', 'main_author': {'name': 'Markus Scheidgen', 'user_id': '05375d3f-87e5-4266-9ec8-e80bfe2a739d', 'name__suggestion': ['Markus Scheidgen', 'Scheidgen Markus ']}, 'authors': [{'name': 'Ian MacLaren', 'name__suggestion': ['Ian MacLaren', 'MacLaren Ian ']}], 'writers': [{'name': 'Markus Scheidgen', 'user_id': '05375d3f-87e5-4266-9ec8-e80bfe2a739d', 'name__suggestion': ['Markus Scheidgen', 'Scheidgen Markus ']}], 'viewers': [{'name': 'Markus Scheidgen', 'user_id': '05375d3f-87e5-4266-9ec8-e80bfe2a739d', 'name__suggestion': ['Markus Scheidgen', 'Scheidgen Markus ']}], 'datasets': [], 'domain': 'ems', 'n_quantities': 75, 'quantities': ['', '.measurement', '.metadata', '.results', 'measurement', 'measurement.description', 'measurement.eels', 'measurement.eels.authors', 'measurement.eels.authors.first_name', 'measurement.eels.authors.last_name', 'measurement.eels.edges', 'measurement.eels.publish_time', 'measurement.eels.spectrum', 'measurement.eels.spectrum.count', 'measurement.eels.spectrum.energy', 'measurement.instrument', 'measurement.instrument.eels', 'measurement.instrument.eels.beam_energy', 'measurement.instrument.eels.dark_current', 'measurement.instrument.eels.detector_type', 'measurement.instrument.eels.guntype', 'measurement.instrument.eels.max_energy', 'measurement.instrument.eels.min_energy', 'measurement.instrument.eels.resolution', 'measurement.instrument.eels.step_size', 'measurement.instrument.name', 'measurement.measurement_id', 'measurement.method_abbreviation', 'measurement.method_name', 'measurement.sample', 'measurement.sample.chemical_formula', 'measurement.sample.elements', 'measurement.sample.name', 'metadata', 'metadata.coauthors', 'metadata.datasets', 'metadata.domain', 'metadata.embargo_length', 'metadata.entry_coauthors', 'metadata.entry_create_time', 'metadata.entry_hash', 'metadata.entry_id', 'metadata.external_db', 'metadata.external_id', 'metadata.files', 'metadata.last_edit_time', 'metadata.last_processing_time', 'metadata.license', 'metadata.main_author', 'metadata.mainfile', 'metadata.nomad_commit', 'metadata.nomad_version', 'metadata.parser_name', 'metadata.processed', 'metadata.processing_errors', 'metadata.published', 'metadata.references', 'metadata.reviewers', 'metadata.upload_create_time', 'metadata.upload_id', 'metadata.upload_name', 'metadata.with_embargo', 'results', 'results.material', 'results.material.chemical_formula_descriptive', 'results.material.chemical_formula_hill', 'results.material.chemical_formula_reduced', 'results.material.elements', 'results.method', 'results.method.method_name', 'results.properties', 'results.properties.available_properties', 'results.properties.spectroscopy', 'results.properties.spectroscopy.eels', 'results.properties.spectroscopy.eels.detector_type', 'results.properties.spectroscopy.eels.max_energy', 'results.properties.spectroscopy.eels.min_energy', 'results.properties.spectroscopy.eels.resolution', 'results.properties.spectroscopy.spectrum']}}}}])