diff --git a/nomad/archive/__init__.py b/nomad/archive/__init__.py index 804a874b0efba302db9c237202eee4f4dd129694..be0386b4cdc2d55c7ccadefece69680d98ba9e0d 100644 --- a/nomad/archive/__init__.py +++ b/nomad/archive/__init__.py @@ -35,15 +35,14 @@ section annotations/categories. from .storage import ( to_json, - write_archive, read_archive, ArchiveError, ArchiveReader, - ArchiveWriter, ArchiveDict, ArchiveList, ArchiveItem, ) +from .storage_v2 import write_archive from .query import query_archive, filter_archive, ArchiveQueryError from .partial import ( read_partial_archive_from_mongo, diff --git a/nomad/archive/storage.py b/nomad/archive/storage.py index e841f68a8327c420d10a8eadced9ffa8afa8566a..2b0eb3535b3b9f2a22731332e394632c90cd1182 100644 --- a/nomad/archive/storage.py +++ b/nomad/archive/storage.py @@ -17,40 +17,26 @@ # from __future__ import annotations -from typing import Iterable, Any, Tuple, Dict, BinaryIO, Union, List, cast, Generator +from typing import Any, Tuple, Dict, Union, cast, Generator from io import BytesIO, BufferedReader from collections.abc import Mapping, Sequence import msgpack -from msgpack.fallback import Packer, StringIO import struct -import json from nomad import utils from nomad.config import config -__packer = msgpack.Packer(autoreset=True, use_bin_type=True) - _toc_uuid_size = utils.default_hash_len + 1 _toc_item_size = _toc_uuid_size + 25 # packed(uuid + [10-byte-pos, 10-byte-pos]) _entries_per_block = config.archive.block_size // _toc_item_size _bytes_per_block = _entries_per_block * _toc_item_size -def packb(o): - return __packer.pack(o) - - def unpackb(o): return msgpack.unpackb(o, raw=False) -def _encode(start: int, end: int) -> bytes: - return start.to_bytes(5, byteorder='little', signed=False) + end.to_bytes( - 5, byteorder='little', signed=False - ) - - def _decode(position: bytes) -> Tuple[int, int]: return int.from_bytes( position[:5], byteorder='little', signed=False @@ -77,208 +63,6 @@ class ArchiveError(Exception): pass -class TOCPacker(Packer): - """ - A special msgpack packer that records a TOC while packing. - - Uses a combination of the pure python msgpack fallback packer and the "real" - c-based packing. - """ - - def __init__(self, toc_depth: int, *args, **kwargs): - self.toc_depth = toc_depth - # noinspection PyTypeChecker - self.toc: Dict[str, Any] = None - self._depth = 0 - - # Because we cannot change msgpacks interface of _pack, this _stack is used to - # transfer the result of _pack calls in terms of the TOC. - self._stack: List[Any] = [] - - super().__init__(*args, **kwargs) - - def _pos(self): - return self._buffer.getbuffer().nbytes - - def _pack_list(self, obj, *args, **kwargs): - pack_result = super()._pack(obj, *args, **kwargs) - - toc_result = [] - # same assumption and condition as above - if len(obj) > 0 and isinstance(obj[0], dict): - for _ in obj: - toc_result.append(self._stack.pop()) - - self._stack.append(list(reversed(toc_result))) - - return pack_result - - def _pack_dict(self, obj, *args, **kwargs): - toc_result = {} - start = self._pos() - if self._depth >= self.toc_depth: - pack_result = self._buffer.write(packb(obj)) - else: - self._depth += 1 - pack_result = super()._pack(obj, *args, **kwargs) - self._depth -= 1 - - toc = {} - for key, value in reversed(list(obj.items())): - if isinstance(value, dict) or ( - isinstance(value, (list, tuple)) - and len(value) > 0 - and isinstance(value[0], dict) - ): - # assumes non emptiness and uniformity of array items - toc[key] = self._stack.pop() - - toc_result['toc'] = { - key: value for key, value in reversed(list(toc.items())) - } - - end = self._pos() - toc_result['pos'] = [start, end] - - self._stack.append(toc_result) - - return pack_result - - def _pack(self, obj, *args, **kwargs): - if isinstance(obj, dict): - return self._pack_dict(obj, *args, **kwargs) - - if isinstance(obj, list): - return self._pack_list(obj, *args, **kwargs) - - return self._buffer.write(packb(obj)) - - def pack(self, obj): - assert isinstance(obj, dict), f'TOC packer can only pack dicts, {obj.__class__}' - self._depth = 0 - self._buffer = StringIO() - result = super().pack(obj) - self.toc = self._stack.pop() - assert len(self._stack) == 0 - return result - - -class ArchiveWriter: - def __init__( - self, file_or_path: Union[str, BytesIO], n_entries: int, entry_toc_depth: int - ): - self.file_or_path = file_or_path - self.n_entries = n_entries - - self._pos = 0 - # noinspection PyTypeChecker - self._toc_position: Tuple[int, int] = None - self._toc: Dict[str, Tuple[Tuple[int, int], Tuple[int, int]]] = {} - # noinspection PyTypeChecker - self._f: BinaryIO = None - self._toc_packer = TOCPacker(toc_depth=entry_toc_depth) - - def __enter__(self): - if isinstance(self.file_or_path, str): - self._f = open(self.file_or_path, 'wb') - elif isinstance(self.file_or_path, BytesIO): - self._f = self.file_or_path - self._f.seek(0) - else: - raise ValueError('not a file or path') - - # write empty placeholder header - self._write_map_header(3) - self._writeb('toc_pos') - self._writeb(_encode(0, 0)) - - self._writeb('toc') - toc_start, _ = self._write_map_header(self.n_entries) - _, toc_end = self._write(b'0' * _toc_item_size * self.n_entries) - self._toc_position = toc_start, toc_end - - self._writeb('data') - self._write_map_header(self.n_entries) - - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - if exc_val is not None: - raise exc_val - - # go back and write the real TOC to the header - self._f.seek(0) - self._pos = 0 - - assert len(self._toc) == self.n_entries - toc_items = sorted(self._toc.items(), key=lambda item: item[0]) - toc = { - uuid: [_encode(*positions[0]), _encode(*positions[1])] - for uuid, positions in toc_items - } - - self._write_map_header(3) - self._writeb('toc_pos') - self._writeb(_encode(*self._toc_position)) - - self._writeb('toc') - toc_position = self._writeb(toc) - assert ( - toc_position == self._toc_position - ), f'{toc_position} - {self._toc_position}' - - if isinstance(self.file_or_path, str): - self._f.close() - - def _write_map_header(self, n): - if n <= 0x0F: - return self._write(struct.pack('B', 0x80 + n)) - if n <= 0xFFFF: - return self._write(struct.pack('>BH', 0xDE, n)) - if n <= 0xFFFFFFFF: - return self._write(struct.pack('>BI', 0xDF, n)) - raise ValueError('Dict is too large') - - def _write(self, b: bytes) -> Tuple[int, int]: - start = self._pos - self._pos += self._f.write(b) - return start, self._pos - - def _writeb(self, obj): - return self._write(packb(obj)) - - def _write_entry(self, uuid: str, toc: dict, packed: bytes | Generator): - uuid = utils.adjust_uuid_size(uuid) - - self._writeb(uuid) - self._write_map_header(2) - self._writeb('toc') - toc_pos = self._writeb(toc) - self._writeb('data') - - if isinstance(packed, bytes): - data_pos = self._write(packed) - elif isinstance(packed, Generator): - start = self._pos - for chunk in packed: - self._pos += self._f.write(chunk) - data_pos = start, self._pos - else: - raise ValueError('Invalid type for packed data.') - - self._toc[uuid] = (toc_pos, data_pos) - - def add(self, uuid: str, data: Any) -> None: - self._toc_packer.reset() - packed = self._toc_packer.pack(data) - toc = self._toc_packer.toc - - self._write_entry(uuid, toc, packed) - - def add_raw(self, uuid: str, toc: dict, packed: Generator) -> None: - self._write_entry(uuid, toc, packed) - - class ArchiveItem: def __init__(self, f: BytesIO, offset: int = 0): self._f = f @@ -528,112 +312,6 @@ class ArchiveReader(ArchiveDict): return self._f.closed if isinstance(self._file_or_path, str) else True -def combine_archive(path: str, n_entries: int, data: Iterable[Tuple[str, Any]]): - if config.archive.use_new_writer: - from .storage_v2 import ( - ArchiveWriter as ArchiveWriterNew, - ArchiveReader as ArchiveReaderNew, - ) - - with ArchiveWriterNew( - path, n_entries, toc_depth=config.archive.toc_depth - ) as writer: - for uuid, reader in data: - if not reader: - writer.add(uuid, {}) - elif isinstance(reader, ArchiveReaderNew): - toc, data = reader.get_raw(uuid) - writer.add_raw(uuid, toc, data) - else: - # rare case, old reader new writer, toc is not compatible, has to repack - writer.add(uuid, to_json(reader[uuid])) - else: - with ArchiveWriter(path, n_entries, entry_toc_depth=2) as writer: - for uuid, reader in data: - if not reader: - writer.add(uuid, {}) - else: - toc, data = reader.get_raw(uuid) - writer.add_raw(uuid, toc, data) - - -def write_archive( - path_or_file: Union[str, BytesIO], - n_entries: int, - data: Iterable[Tuple[str, Any]], - entry_toc_depth: int = 2, -) -> None: - """ - Writes a msgpack-based archive file. The file contents will be a valid msgpack-object. - The data will contain extra table-of-contents (TOC) objects that map some keys to - positions in the file. Data can be partially read from these positions and deserialized - with msgpack. - - The data in the archive file will have the following layout: - - .. code-block:: python - - { - 'toc_pos': b[start, end], - 'toc': { - entry_uuid: [b[start, end], b[start, end]], ... - }, - 'data': { - entry_uuid: { - 'toc': { - key: { - 'pos': [start, end], - 'toc': ... - }, - key: [ - { - 'pos': [start, end] - 'toc': ... - }, ... - ], - ... - }, - 'data': ... - }, ... - } - } - - - The top-level TOC will map entry_uuids to positions. The key 'toc_pos' holds the - position of the entry TOC, the second ('toc') the position of each entry. These positions - will be absolute positions in the file. The top-level TOC will be ordered by entry_uuid. - The top-level TOC positions are 2*5byte encoded integers. This will give the top-level TOC a - predictable layout and will allow to partially read this TOC. - - The TOC of each entry will have the same structure than the data up to a certain - TOC depth. A TOC object will hold the position of the object it refers to (key 'pos') - and further deeper TOC data (key 'toc'). Only data objects (dict instances) will - have TOC objects and only object count towards the TOC depth. Positions in the entry - TOCs are regular msgpack encoded integers. - - Arguments: - path_or_file: A file path or file-like to the archive file that should be written. - n_entries: The number of entries that will be added to the file. - data: The file contents as an iterator of entry id, data tuples. - entry_toc_depth: The depth of the table of contents in each entry. Only objects will - count for calculating the depth. - """ - if config.archive.use_new_writer: - from .storage_v2 import ArchiveWriter as ArchiveWriterNew - - with ArchiveWriterNew( - path_or_file, n_entries, toc_depth=entry_toc_depth - ) as writer: - for uuid, entry in data: - writer.add(uuid, entry) - else: - with ArchiveWriter( - path_or_file, n_entries, entry_toc_depth=entry_toc_depth - ) as writer: - for uuid, entry in data: - writer.add(uuid, entry) - - def read_archive(file_or_path: Union[str, BytesIO], **kwargs) -> ArchiveReader: """ Allows to read a msgpack-based archive. @@ -676,57 +354,4 @@ def read_archive(file_or_path: Union[str, BytesIO], **kwargs) -> ArchiveReader: if __name__ == '__main__': - - def benchmark(): - from time import time - import sys - - with open('archive_test.json') as f: - example_data = json.load(f) - - size = 5000 if len(sys.argv) == 1 else int(sys.argv[1]) - access_every = 2 - example_archive = [(utils.create_uuid(), example_data) for _ in range(0, size)] - example_uuid = example_archive[int(size / 2)][0] - - # this impl - # create archive - start = time() - buffer = BytesIO() - write_archive(buffer, len(example_archive), example_archive, entry_toc_depth=2) - print('archive.py: create archive (1): ', time() - start) - - # read single entry from archive - buffer = BytesIO(buffer.getbuffer()) - for use_blocked_toc in [False, True]: - start = time() - for _ in range(0, 23): - read_archive(buffer, use_blocked_toc=use_blocked_toc)[example_uuid][ - 'run' - ]['system'] - print( - f'archive.py: access single entry system (23), blocked {use_blocked_toc:d}: ', - (time() - start) / 23, - ) - - # read every n-ed entry from archive - buffer = BytesIO(buffer.getbuffer()) - for use_blocked_toc in [False, True]: - start = time() - for _ in range(0, 23): - with read_archive(buffer, use_blocked_toc=use_blocked_toc) as data: - for i, entry in enumerate(example_archive): - if i % access_every == 0: - data[entry[0]]['run']['system'] - print( - f'archive.py: access every {access_every:d}-ed entry single entry system (23), ' - f'blocked {use_blocked_toc:d}: ', - (time() - start) / 23, - ) - - # just msgpack - start = time() - packb(example_archive) - print('msgpack: create archive (1): ', time() - start) - - benchmark() + pass diff --git a/nomad/archive/storage_v2.py b/nomad/archive/storage_v2.py index db014bf992b317be20d5043c58bdc28e85b149bf..4cbdaf278ba5592dcac490811ad9a4ecee3238a0 100644 --- a/nomad/archive/storage_v2.py +++ b/nomad/archive/storage_v2.py @@ -18,8 +18,8 @@ from __future__ import annotations import struct +from collections.abc import Generator, Iterable from io import BytesIO -from typing import Generator import msgpack from bitarray import bitarray @@ -373,7 +373,7 @@ class ArchiveWriter: def to_json(v): - return v.to_json() if isinstance(v, ArchiveItem) else v + return v.to_json() if hasattr(v, 'to_json') else v class ArchiveReadCounter: @@ -842,9 +842,25 @@ class ArchiveReader(ArchiveItem): return self._full_cache +def combine_archive(path: str, n_entries: int, data: Iterable[tuple]): + with ArchiveWriter(path, n_entries, toc_depth=config.archive.toc_depth) as writer: + for uuid, reader in data: + if not reader: + writer.add(uuid, {}) + elif isinstance(reader, ArchiveReader): + toc, data = reader.get_raw(uuid) + writer.add_raw(uuid, toc, data) + else: + # rare case, old reader new writer, toc is not compatible, has to repack + writer.add(uuid, to_json(reader[uuid])) + + def write_archive( - path_or_file: str | BytesIO, data: list, toc_depth: int = config.archive.toc_depth -): - with ArchiveWriter(path_or_file, len(data), toc_depth=toc_depth) as writer: + path_or_file: str | BytesIO, + n_entries: int, + data: Iterable[tuple], + entry_toc_depth: int = config.archive.toc_depth, +) -> None: + with ArchiveWriter(path_or_file, n_entries, toc_depth=entry_toc_depth) as writer: for uuid, entry in data: writer.add(uuid, entry) diff --git a/nomad/cli/admin/springer.py b/nomad/cli/admin/springer.py index 837cd40eb87d453c6a722d8ce790e6c4504126ac..ccd5c4d28ed2ebcbdb84440d1d52acc7748a7312 100644 --- a/nomad/cli/admin/springer.py +++ b/nomad/cli/admin/springer.py @@ -30,6 +30,7 @@ import bs4 import time import os.path +import nomad.archive.storage_v2 from nomad import archive from nomad.config import config from nomad.archive import read_archive @@ -244,7 +245,7 @@ def update_springer(max_n_query: int = 10, retry_time: int = 120): page += 1 - archive.write_archive( + nomad.archive.storage_v2.write_archive( config.normalize.springer_db_path, len(sp_data), sp_data.items(), diff --git a/nomad/config/defaults.yaml b/nomad/config/defaults.yaml index 1a21753427eff35f16e0baedd89ac28284e28c16..787453f215c6355790f1c6e69a2a60979c9567aa 100644 --- a/nomad/config/defaults.yaml +++ b/nomad/config/defaults.yaml @@ -3,7 +3,6 @@ archive: read_buffer_size: 1048576 copy_chunk_size: 16777216 toc_depth: 10 - use_new_writer: true small_obj_optimization_threshold: 16777216 fast_loading: true fast_loading_threshold: 0.6 diff --git a/nomad/config/models/config.py b/nomad/config/models/config.py index c4db81679c30ddb72c077d3657a2cd2472a65cbb..29058ba95594a16a52cdc122e2b434c9c5dc4a6e 100644 --- a/nomad/config/models/config.py +++ b/nomad/config/models/config.py @@ -879,7 +879,6 @@ class Archive(ConfigBaseModel): """, ) toc_depth = Field(10, description='Depths of table of contents in the archive.') - use_new_writer = True # todo: to be removed small_obj_optimization_threshold = Field( 1 * 2**20, description=""" diff --git a/nomad/files.py b/nomad/files.py index 1aad3697225a692a3e2865a6dfa2beb72aaf3faf..3b4ce16d353e597f46553c73d6cf474f05cfbe53 100644 --- a/nomad/files.py +++ b/nomad/files.py @@ -74,7 +74,7 @@ import zipfile from nomad import utils, datamodel from nomad.config import config -from nomad.archive.storage import combine_archive +from nomad.archive.storage_v2 import combine_archive from nomad.config.models.config import BundleImportSettings, BundleExportSettings from nomad.archive import write_archive, read_archive, ArchiveReader, to_json diff --git a/ops/kubernetes/nomad-prod-util.yaml b/ops/kubernetes/nomad-prod-util.yaml index c50943af442e579c1a855955a9f744e0911a4862..70dc3937a7592328dc366188829bc824e4b56784 100644 --- a/ops/kubernetes/nomad-prod-util.yaml +++ b/ops/kubernetes/nomad-prod-util.yaml @@ -22,9 +22,6 @@ nomad: north: enabled: false - archive: - use_new_writer: true - image: tag: "prod" diff --git a/ops/kubernetes/values.yaml b/ops/kubernetes/values.yaml index 0de15f2c73ea8b25275124669f2e7d841d5da5c5..2fcbbcc7c36a6a17f432fcfc80ed87e7e9517dd8 100644 --- a/ops/kubernetes/values.yaml +++ b/ops/kubernetes/values.yaml @@ -53,9 +53,6 @@ nomad: - v1.2 - v1 - archive: - use_new_writer: true - mail: enabled: true host: "mailrelay.mpcdf.mpg.de" diff --git a/tests/archive/test_archive.py b/tests/archive/test_archive.py index 80e9eac8fa5435db1ebbb1e92f04dae7834e49e6..dbcafa7762030c91e1dc7df95918f18eeddf4250 100644 --- a/tests/archive/test_archive.py +++ b/tests/archive/test_archive.py @@ -37,10 +37,9 @@ from nomad.metainfo import ( MetainfoError, Context, MProxy, - Section, ) from nomad.datamodel import EntryArchive, ClientContext -from nomad.archive.storage import TOCPacker, _decode, _entries_per_block, to_json +from nomad.archive.storage import _decode, _entries_per_block, to_json from nomad.archive import ( write_archive, read_archive, @@ -85,12 +84,9 @@ def example_entry(): def _unpack(data, pos=None): f = BytesIO(data) - if config.archive.use_new_writer: - from nomad.archive.storage_v2 import ArchiveWriter + from nomad.archive.storage_v2 import ArchiveWriter - offset = ArchiveWriter.magic_len - else: - offset = 0 + offset = ArchiveWriter.magic_len if pos is None: return msgpack.unpackb(f.read()[offset:], raw=False) else: @@ -98,33 +94,6 @@ def _unpack(data, pos=None): return msgpack.unpackb(f.read(pos[1] - pos[0]), raw=False) -def test_toc_packer(example_entry): - toc_packer = TOCPacker(toc_depth=2) - - toc_packer.reset() - data = toc_packer.pack(example_entry) - toc = toc_packer.toc - - assert toc is not None - assert 'pos' in toc - assert _unpack(data, toc['pos']) == example_entry - - assert 'run' in toc['toc'] - toc = toc['toc']['run'] - assert _unpack(data, toc['pos']) == example_entry['run'] - - assert 'program_name' not in toc - assert 'system' in toc['toc'] - toc = toc['toc']['system'] - assert isinstance(toc, list) - assert 'pos' in toc[0] - assert 'toc' not in toc[0] - assert _unpack(data, toc[0]['pos']) == example_entry['run']['system'][0] - - assert data is not None - assert msgpack.unpackb(data, raw=False) == example_entry - - def test_write_archive_empty(): f = BytesIO() write_archive(f, 0, []) @@ -141,10 +110,7 @@ def test_short_uuids(): assert to_json(archive['0']) == {'archive': 'test'} -@pytest.mark.parametrize('new_writer', [True, False]) -def test_write_file(monkeypatch, raw_files_function, example_uuid, new_writer): - monkeypatch.setattr('nomad.config.archive.use_new_writer', new_writer) - +def test_write_file(raw_files_function, example_uuid): path = os.path.join(config.fs.tmp, 'test.msg') write_archive(path, 1, [(example_uuid, {'archive': 'test'})]) with read_archive(path) as archive: @@ -152,10 +118,7 @@ def test_write_file(monkeypatch, raw_files_function, example_uuid, new_writer): assert to_json(archive[example_uuid]) == {'archive': 'test'} -@pytest.mark.parametrize('new_writer', [True, False]) -def test_write_archive_single(monkeypatch, example_uuid, example_entry, new_writer): - monkeypatch.setattr('nomad.config.archive.use_new_writer', new_writer) - +def test_write_archive_single(example_uuid, example_entry): f = BytesIO() write_archive(f, 1, [(example_uuid, example_entry)]) packed_archive = f.getbuffer() @@ -168,16 +131,10 @@ def test_write_archive_single(monkeypatch, example_uuid, example_entry, new_writ assert 'data' in archive['data'][example_uuid] assert archive['data'][example_uuid]['data'] == example_entry - if config.archive.use_new_writer: - from nomad.archive.storage_v2 import TOCPacker as TOCPackerNew + from nomad.archive.storage_v2 import TOCPacker as TOCPackerNew - toc_packer = TOCPackerNew(toc_depth=2) - _, global_toc = toc_packer.pack(example_entry) - else: - toc_packer = TOCPacker(toc_depth=2) - toc_packer.reset() - toc_packer.pack(example_entry) - global_toc = toc_packer.toc + toc_packer = TOCPackerNew(toc_depth=2) + _, global_toc = toc_packer.pack(example_entry) assert archive['data'][example_uuid]['toc'] == global_toc toc = _unpack(packed_archive, _decode(archive['toc_pos'])) @@ -186,10 +143,7 @@ def test_write_archive_single(monkeypatch, example_uuid, example_entry, new_writ assert _unpack(packed_archive, _decode(toc[example_uuid][1])) == example_entry -@pytest.mark.parametrize('new_writer', [True, False]) -def test_write_archive_multi(monkeypatch, example_uuid, example_entry, new_writer): - monkeypatch.setattr('nomad.config.archive.use_new_writer', new_writer) - +def test_write_archive_multi(example_uuid, example_entry): f = BytesIO() example_uuids = create_example_uuid(0), create_example_uuid(1) write_archive( @@ -211,13 +165,8 @@ def test_write_archive_multi(monkeypatch, example_uuid, example_entry, new_write assert example_uuid in toc -@pytest.mark.parametrize('new_writer', [True, False]) @pytest.mark.parametrize('use_blocked_toc', [False, True]) -def test_read_archive_single( - monkeypatch, example_uuid, example_entry, use_blocked_toc, new_writer -): - monkeypatch.setattr('nomad.config.archive.use_new_writer', new_writer) - +def test_read_archive_single(example_uuid, example_entry, use_blocked_toc): f = BytesIO() write_archive(f, 1, [(example_uuid, example_entry)]) packed_archive = f.getbuffer() @@ -240,12 +189,8 @@ def test_read_archive_single( data[example_uuid]['run']['system'][2] -@pytest.mark.parametrize('new_writer', [True, False]) @pytest.mark.parametrize('use_blocked_toc', [False, True]) -def test_read_archive_multi( - monkeypatch, example_uuid, example_entry, use_blocked_toc, new_writer -): - monkeypatch.setattr('nomad.config.archive.use_new_writer', new_writer) +def test_read_archive_multi(monkeypatch, example_uuid, example_entry, use_blocked_toc): monkeypatch.setattr('nomad.config.archive.small_obj_optimization_threshold', 256) archive_size = _entries_per_block * 2 + 23 diff --git a/tests/archive/test_storage.py b/tests/archive/test_storage.py index 430d9dc3e5d8843e31674345f5c16d2617c80586..fbd7a955f5b0e6bd18ed67d2f060a400119a1b5a 100644 --- a/tests/archive/test_storage.py +++ b/tests/archive/test_storage.py @@ -82,7 +82,6 @@ def to_path(container, path): @pytest.mark.skip def test_generate_random_json(monkeypatch, tmp): - monkeypatch.setattr('nomad.config.archive.use_new_writer', True) monkeypatch.setattr('nomad.config.archive.read_buffer_size', 4096) monkeypatch.setattr('nomad.config.archive.small_obj_optimization_threshold', 1024) @@ -155,7 +154,6 @@ def test_folder_access(monkeypatch): for path in paths: f.write(','.join([str(v) for v in path]) + ',\n') - from nomad.archive.storage_v2 import write_archive as write_archive_v2 from nomad.config import config for j in threshold: @@ -167,7 +165,7 @@ def test_folder_access(monkeypatch): f'archive-{i}-{config.archive.small_obj_optimization_threshold}.msg' ) file_path = f'{parent_folder}{file_name}' - write_archive_v2(file_path, [(f'id', archive)], i) + write_archive(file_path, 1, [(f'id', archive)], i) def measure(small_obj, toc_depth, paths): diff --git a/tests/processing/test_data.py b/tests/processing/test_data.py index 6e36eade84202bb57487c0ed1adc4dcdf995a4a1..0a674c08b5ca37e4d96b1c9c2f889de8b341f200 100644 --- a/tests/processing/test_data.py +++ b/tests/processing/test_data.py @@ -470,7 +470,6 @@ def test_process_non_existing(proc_infra, user1, with_error): @pytest.mark.timeout(config.tests.default_timeout) -@pytest.mark.parametrize('new_writer', [True, False]) @pytest.mark.parametrize('with_failure', [None, 'before', 'after', 'not-matched']) def test_re_processing( published: Upload, @@ -478,10 +477,7 @@ def test_re_processing( monkeypatch, tmp, with_failure, - new_writer, ): - monkeypatch.setattr('nomad.config.archive.use_new_writer', new_writer) - if with_failure == 'not-matched': monkeypatch.setattr('nomad.config.reprocess.use_original_parser', True) diff --git a/tests/test_cli.py b/tests/test_cli.py index 3be50c2d969bd2296529334fd9b4f92d0b26aa0f..3ee028698b5e5098b2761c0f25f3066d413e869e 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -419,22 +419,17 @@ class TestAdminUploads: assert ('test_upload' in result.output) != indexed @pytest.mark.parametrize('all_entries', ['--check-all-entries', '']) - @pytest.mark.parametrize('new_writer', [True, False]) def test_integrity_archive( self, - monkeypatch, user1, mongo_function, elastic_function, - new_writer, all_entries, ): - with monkeypatch.context() as m: - m.setattr('nomad.config.archive.use_new_writer', new_writer) - data = ExampleData(main_author=user1) - data.create_upload(upload_id='test_upload') - data.create_entry(upload_id='test_upload') - data.save(with_es=True, with_files=True) + data = ExampleData(main_author=user1) + data.create_upload(upload_id='test_upload') + data.create_entry(upload_id='test_upload') + data.save(with_es=True, with_files=True) result = invoke_cli( cli, @@ -443,7 +438,7 @@ class TestAdminUploads: ) assert result.exit_code == 0 - assert ('test_upload' in result.output) != new_writer + assert 'test_upload' not in result.output @pytest.mark.parametrize('all_entries', ['--check-all-entries', '']) @pytest.mark.parametrize('es_nomad_version', ['1', '2'])