From df14e0853e48decdcb17e74898db6a291c44f0e8 Mon Sep 17 00:00:00 2001 From: Amir Golparvar Date: Fri, 9 Sep 2022 17:02:34 +0200 Subject: [PATCH 01/24] Adding mode tos witch between column and row parsing --- nomad/metainfo/metainfo.py | 5 ++++- nomad/parsing/tabular.py | 46 +++++++++++++++++++++++++++----------- 2 files changed, 37 insertions(+), 14 deletions(-) diff --git a/nomad/metainfo/metainfo.py b/nomad/metainfo/metainfo.py index 02c94fec4..a467e7540 100644 --- a/nomad/metainfo/metainfo.py +++ b/nomad/metainfo/metainfo.py @@ -1844,7 +1844,10 @@ class MSection(metaclass=MObjectMeta): # TODO find a way to make this a subclas ''' Retrieves a single sub section of the given sub section definition. ''' if sub_section_def.repeats: if isinstance(index, int): - return self.__dict__[sub_section_def.name][index] + if self.__dict__[sub_section_def.name]: + return self.__dict__[sub_section_def.name][index] + else: + return self.__dict__[sub_section_def.name] elif isinstance(index, str): try: sub_sections: List['MSection'] = [section for section in self.__dict__[sub_section_def.name] if index == section.name] diff --git a/nomad/parsing/tabular.py b/nomad/parsing/tabular.py index a450e5959..8bd160406 100644 --- a/nomad/parsing/tabular.py +++ b/nomad/parsing/tabular.py @@ -58,9 +58,13 @@ class TableData(ArchiveSection): for quantity in self.m_def.all_quantities.values(): tabular_parser_annotation = quantity.m_annotations.get('tabular_parser', {}) if tabular_parser_annotation: - self.tabular_parser(quantity, archive, logger, **tabular_parser_annotation) + tabular_parser_options = quantity.m_annotations.get('tabular_parser_options', {}) + self.tabular_parser(quantity, archive, logger, tabular_parser_options, **tabular_parser_annotation) + + def tabular_parser(self, quantity_def: Quantity, archive, logger, tabular_parser_options: Dict[str, str] = None, **kwargs): + if logger is None: + logger = utils.get_logger(__name__) - def tabular_parser(self, quantity_def: Quantity, archive, logger, **kwargs): if not quantity_def.is_scalar: raise NotImplementedError('CSV parser is only implemented for single files.') @@ -71,7 +75,15 @@ class TableData(ArchiveSection): with archive.m_context.raw_file(self.data_file) as f: data = read_table_data(self.data_file, f, **kwargs) - parse_columns(data, self) + if tabular_parser_options['mode'] == 'column': + parse_columns(data, self) + elif tabular_parser_options['mode'] == 'row': + section_name = tabular_parser_options['ref_to_sub_section'] + main_sheet = tabular_parser_options['main_sheet'] + + sections = parse_table(data, self.m_def, logger=logger, main_sheet=main_sheet) + for section in sections: + self[section_name].append(section[section_name][0]) m_package.__init_metainfo__() @@ -89,7 +101,7 @@ def _create_column_to_quantity_mapping(section_def: Section): continue properties.add(quantity) - tabular_annotation = quantity.m_annotations.get('tabular', None) + tabular_annotation = quantity.m_annotations.get('tabular', {}) if tabular_annotation and 'name' in tabular_annotation: col_name = tabular_annotation['name'] else: @@ -140,7 +152,8 @@ def _create_column_to_quantity_mapping(section_def: Section): mapping[col_name] = set_value for sub_section in section_def.all_sub_sections.values(): - if sub_section in properties or sub_section.repeats: + # if sub_section in properties or sub_section.repeats: + if sub_section in properties: continue next_base_section = sub_section.sub_section properties.add(sub_section) @@ -177,7 +190,7 @@ def parse_columns(pd_dataframe, section: MSection): mapping[column](section, df.loc[:, column]) -def parse_table(pd_dataframe, section_def: Section, logger): +def parse_table(pd_dataframe, section_def: Section, logger, main_sheet: str = None): ''' Parses the given pandas dataframe and creates a section based on the given section_def for each row. The sections are filled with the cells from @@ -188,17 +201,26 @@ def parse_table(pd_dataframe, section_def: Section, logger): sections: List[MSection] = [] mapping = _create_column_to_quantity_mapping(section_def) # type: ignore - for row_index, row in data.iterrows(): + + # data object contains the entire excel file with all of its sheets (given that an + # excel file is provided, otherwise it contains the csv file). if a sheet_name is provided, + # the corresponding sheet_name from the data is extracted, otherwise its assumed that + # the columns are to be extracted from first sheet of the excel file. + df = pd.DataFrame.from_dict(data.loc[0, main_sheet]) if main_sheet is not None else pd.DataFrame.from_dict(data.iloc[0, 0]) + + for row_index, row in df.iterrows(): section = section_def.section_cls() try: - for column in data: - if column in mapping: + for column in mapping: + col_name = column.split('/')[1] if '/' in column else column + + if col_name in df: try: - mapping[column](section, row[column]) + mapping[column](section, row[col_name]) except Exception as e: logger.error( f'could not parse cell', - details=dict(row=row_index, column=column), exc_info=e) + details=dict(row=row_index, column=col_name), exc_info=e) except Exception as e: logger.error(f'could not parse row', details=dict(row=row_index), exc_info=e) sections.append(section) @@ -277,7 +299,6 @@ class TabularDataParser(MatchingParser): self, mainfile: str, archive: EntryArchive, logger=None, child_archives: Dict[str, EntryArchive] = None ): - import pandas as pd if logger is None: logger = utils.get_logger(__name__) @@ -305,7 +326,6 @@ class TabularDataParser(MatchingParser): tabular_parser_annotation = section_def.m_annotations.get('tabular-parser', {}) data = read_table_data(mainfile, **tabular_parser_annotation) - data = pd.DataFrame.from_dict(data.iloc[0, 0]) child_sections = parse_table(data, section_def, logger=logger) assert len(child_archives) == len(child_sections) -- GitLab From 882d958031f953eac0e21305b03330b1bae391ef Mon Sep 17 00:00:00 2001 From: Amir Golparvar Date: Mon, 12 Sep 2022 17:41:18 +0200 Subject: [PATCH 02/24] updated parse_table --- nomad/parsing/tabular.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/nomad/parsing/tabular.py b/nomad/parsing/tabular.py index 8bd160406..75d791633 100644 --- a/nomad/parsing/tabular.py +++ b/nomad/parsing/tabular.py @@ -79,9 +79,8 @@ class TableData(ArchiveSection): parse_columns(data, self) elif tabular_parser_options['mode'] == 'row': section_name = tabular_parser_options['ref_to_sub_section'] - main_sheet = tabular_parser_options['main_sheet'] - sections = parse_table(data, self.m_def, logger=logger, main_sheet=main_sheet) + sections = parse_table(data, self.m_def, logger=logger, ref_to_sub_section=section_name) for section in sections: self[section_name].append(section[section_name][0]) @@ -190,7 +189,7 @@ def parse_columns(pd_dataframe, section: MSection): mapping[column](section, df.loc[:, column]) -def parse_table(pd_dataframe, section_def: Section, logger, main_sheet: str = None): +def parse_table(pd_dataframe, section_def: Section, logger, ref_to_sub_section: str = None): ''' Parses the given pandas dataframe and creates a section based on the given section_def for each row. The sections are filled with the cells from @@ -199,6 +198,7 @@ def parse_table(pd_dataframe, section_def: Section, logger, main_sheet: str = No import pandas as pd data: pd.DataFrame = pd_dataframe sections: List[MSection] = [] + main_sheet: Set[str] = set() mapping = _create_column_to_quantity_mapping(section_def) # type: ignore @@ -206,10 +206,21 @@ def parse_table(pd_dataframe, section_def: Section, logger, main_sheet: str = No # excel file is provided, otherwise it contains the csv file). if a sheet_name is provided, # the corresponding sheet_name from the data is extracted, otherwise its assumed that # the columns are to be extracted from first sheet of the excel file. - df = pd.DataFrame.from_dict(data.loc[0, main_sheet]) if main_sheet is not None else pd.DataFrame.from_dict(data.iloc[0, 0]) + for column in mapping: + if column == 'data_file': + continue + sheet_name = {column.split('/')[0]} if '/' in column else {0} + main_sheet = main_sheet.union(sheet_name) + if main_sheet.isdisjoint(sheet_name): + raise Exception('The columns for each quantity should be coming from one single sheet') + + assert len(main_sheet) == 1 + df = pd.DataFrame.from_dict(data.loc[0, main_sheet.pop()]) for row_index, row in df.iterrows(): section = section_def.section_cls() + if ref_to_sub_section is not None: + section[ref_to_sub_section] try: for column in mapping: col_name = column.split('/')[1] if '/' in column else column -- GitLab From 4f3146e2454db0d3b2e7e5f0740b32747cd8efa3 Mon Sep 17 00:00:00 2001 From: Amir Golparvar Date: Wed, 14 Sep 2022 16:54:24 +0200 Subject: [PATCH 03/24] updated tabular parser --- nomad/parsing/tabular.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/nomad/parsing/tabular.py b/nomad/parsing/tabular.py index 75d791633..2dd7d66ba 100644 --- a/nomad/parsing/tabular.py +++ b/nomad/parsing/tabular.py @@ -78,11 +78,11 @@ class TableData(ArchiveSection): if tabular_parser_options['mode'] == 'column': parse_columns(data, self) elif tabular_parser_options['mode'] == 'row': - section_name = tabular_parser_options['ref_to_sub_section'] + section_names = tabular_parser_options['ref_to_sub_section'] - sections = parse_table(data, self.m_def, logger=logger, ref_to_sub_section=section_name) + sections = parse_table(data, self.m_def, logger=logger, ref_to_sub_section=section_names) for section in sections: - self[section_name].append(section[section_name][0]) + [self[section_name].append(section[section_name][0]) for section_name in section_names] m_package.__init_metainfo__() @@ -215,12 +215,16 @@ def parse_table(pd_dataframe, section_def: Section, logger, ref_to_sub_section: raise Exception('The columns for each quantity should be coming from one single sheet') assert len(main_sheet) == 1 - df = pd.DataFrame.from_dict(data.loc[0, main_sheet.pop()]) + sheet_name = main_sheet.pop() + df = pd.DataFrame.from_dict(data.loc[0, sheet_name] if isinstance(sheet_name, str) else data.iloc[0, sheet_name]) for row_index, row in df.iterrows(): section = section_def.section_cls() + # Sometimes sections is not fully resolved with all its components. Calling it here rectifies this issue. + # The m_get_sub_Section though might need to looked up again if ref_to_sub_section is not None: - section[ref_to_sub_section] + for sub_section in ref_to_sub_section: + section[sub_section] try: for column in mapping: col_name = column.split('/')[1] if '/' in column else column -- GitLab From 614005c75d893e44cb1cb7690bc02b5db63f86a2 Mon Sep 17 00:00:00 2001 From: Amir Golparvar Date: Tue, 20 Sep 2022 16:53:33 +0200 Subject: [PATCH 04/24] updated tabular parser --- nomad/parsing/tabular.py | 53 ++++++++++++++++++++++++++++------------ 1 file changed, 37 insertions(+), 16 deletions(-) diff --git a/nomad/parsing/tabular.py b/nomad/parsing/tabular.py index 2dd7d66ba..2b60cf398 100644 --- a/nomad/parsing/tabular.py +++ b/nomad/parsing/tabular.py @@ -16,6 +16,7 @@ # limitations under the License. # +from operator import indexOf from typing import Union, List, Iterable, Dict, Callable, Set, Any, Tuple, cast from memoization import cached import os.path @@ -58,10 +59,9 @@ class TableData(ArchiveSection): for quantity in self.m_def.all_quantities.values(): tabular_parser_annotation = quantity.m_annotations.get('tabular_parser', {}) if tabular_parser_annotation: - tabular_parser_options = quantity.m_annotations.get('tabular_parser_options', {}) - self.tabular_parser(quantity, archive, logger, tabular_parser_options, **tabular_parser_annotation) + self.tabular_parser(quantity, archive, logger, **tabular_parser_annotation) - def tabular_parser(self, quantity_def: Quantity, archive, logger, tabular_parser_options: Dict[str, str] = None, **kwargs): + def tabular_parser(self, quantity_def: Quantity, archive, logger, **kwargs): if logger is None: logger = utils.get_logger(__name__) @@ -75,14 +75,35 @@ class TableData(ArchiveSection): with archive.m_context.raw_file(self.data_file) as f: data = read_table_data(self.data_file, f, **kwargs) - if tabular_parser_options['mode'] == 'column': + if kwargs.get('mode') == 'column': parse_columns(data, self) - elif tabular_parser_options['mode'] == 'row': - section_names = tabular_parser_options['ref_to_sub_section'] - sections = parse_table(data, self.m_def, logger=logger, ref_to_sub_section=section_names) + elif kwargs.get('mode') == 'row': + section_names: List[str] = kwargs.get('ref_to_sub_section') + + sections = parse_table(data, self.m_def, logger=logger) + + top_level_section_list: List[str] = [] + for section_name in section_names: + section_name_str = section_name.split('/')[0] + if top_level_section_list.count(section_name_str): + continue + else: + top_level_section_list.append(section_name_str) + self.__setattr__(section_name_str, sections[0][section_name_str]) + + sections.pop(0) + for section in sections: - [self[section_name].append(section[section_name][0]) for section_name in section_names] + for section_name in section_names: + section_name_list = section_name.split('/') + top_level_section = section_name_list.pop(0) + self_updated = self[top_level_section] + section_updated = section[top_level_section] + for section_path in section_name_list: + self_updated = self_updated[section_path] + section_updated = section_updated[section_path] + self_updated.append(section_updated[0]) m_package.__init_metainfo__() @@ -116,6 +137,9 @@ def _create_column_to_quantity_mapping(section_def: Section): def set_value(section: MSection, value, path=path, quantity=quantity, tabular_annotation=tabular_annotation): import numpy as np for sub_section, section_def in path: + # when the subsection has repeats set to true, it needs to be initialized this way. + if sub_section.repeats: + section[sub_section.name] next_section = section.m_get_sub_section(sub_section, -1) if not next_section: next_section = section_def.section_cls() @@ -189,7 +213,7 @@ def parse_columns(pd_dataframe, section: MSection): mapping[column](section, df.loc[:, column]) -def parse_table(pd_dataframe, section_def: Section, logger, ref_to_sub_section: str = None): +def parse_table(pd_dataframe, section_def: Section, logger): ''' Parses the given pandas dataframe and creates a section based on the given section_def for each row. The sections are filled with the cells from @@ -220,11 +244,6 @@ def parse_table(pd_dataframe, section_def: Section, logger, ref_to_sub_section: for row_index, row in df.iterrows(): section = section_def.section_cls() - # Sometimes sections is not fully resolved with all its components. Calling it here rectifies this issue. - # The m_get_sub_Section though might need to looked up again - if ref_to_sub_section is not None: - for sub_section in ref_to_sub_section: - section[sub_section] try: for column in mapping: col_name = column.split('/')[1] if '/' in column else column @@ -254,12 +273,14 @@ def read_table_data(path, file_or_path=None, **kwargs): df = pd.DataFrame() for sheet_name in excel_file.sheet_names: df.loc[0, sheet_name] = [ - pd.read_excel(excel_file, sheet_name=sheet_name, **kwargs) + pd.read_excel(excel_file, sheet_name=sheet_name, + comment=kwargs.get('comment'), + skiprows=kwargs.get('skiprows')) .to_dict()] else: df = pd.DataFrame() df.loc[0, 0] = [ - pd.read_csv(file_or_path, engine='python', **kwargs).to_dict() + pd.read_csv(file_or_path, engine='python', comment=kwargs.get('comment'), sep=kwargs.get('sep')).to_dict() ] return df -- GitLab From 7d3a9da8f28f9b37b1c493c482f652f492f51f74 Mon Sep 17 00:00:00 2001 From: Amir Golparvar Date: Wed, 21 Sep 2022 13:05:01 +0200 Subject: [PATCH 05/24] Added tests for the tabular parser. updated python linting. --- nomad/parsing/tabular.py | 1 - tests/parsing/test_tabular.py | 184 ++++++++++++++++++++++++++++++++-- 2 files changed, 178 insertions(+), 7 deletions(-) diff --git a/nomad/parsing/tabular.py b/nomad/parsing/tabular.py index 2b60cf398..eeedd83c0 100644 --- a/nomad/parsing/tabular.py +++ b/nomad/parsing/tabular.py @@ -16,7 +16,6 @@ # limitations under the License. # -from operator import indexOf from typing import Union, List, Iterable, Dict, Callable, Set, Any, Tuple, cast from memoization import cached import os.path diff --git a/tests/parsing/test_tabular.py b/tests/parsing/test_tabular.py index 1d23fc4f1..ef6c2e394 100644 --- a/tests/parsing/test_tabular.py +++ b/tests/parsing/test_tabular.py @@ -123,7 +123,7 @@ def test_tabular(raw_files, monkeypatch, schema, content): definitions: name: 'A test schema for excel file parsing' sections: - MovpeSto_schema: + My_schema: base_section: nomad.datamodel.data.EntryData sub_sections: process: @@ -137,6 +137,7 @@ def test_tabular(raw_files, monkeypatch, schema, content): m_annotations: tabular_parser: comment: '#' + mode: column browser: adaptor: RawFileAdaptor eln: @@ -149,7 +150,7 @@ def test_tabular(raw_files, monkeypatch, schema, content): eln: component: StringEditQuantity data: - m_def: MovpeSto_schema + m_def: My_schema process: data_file: Test.xlsx '''), id='w/o_sheetName_rowMode'), @@ -158,7 +159,7 @@ def test_tabular(raw_files, monkeypatch, schema, content): definitions: name: 'A test schema for excel file parsing' sections: - MovpeSto_schema: + My_schema: base_section: nomad.datamodel.data.EntryData sub_sections: process: @@ -172,6 +173,7 @@ def test_tabular(raw_files, monkeypatch, schema, content): m_annotations: tabular_parser: comment: '#' + mode: column browser: adaptor: RawFileAdaptor eln: @@ -184,7 +186,7 @@ def test_tabular(raw_files, monkeypatch, schema, content): eln: component: StringEditQuantity data: - m_def: MovpeSto_schema + m_def: My_schema process: data_file: Test.xlsx '''), id='w_sheetName_rowMode'), @@ -193,7 +195,7 @@ def test_tabular(raw_files, monkeypatch, schema, content): definitions: name: 'A test schema for excel file parsing' sections: - MovpeSto_schema: + My_schema: base_section: nomad.datamodel.data.EntryData sub_sections: process: @@ -207,6 +209,7 @@ def test_tabular(raw_files, monkeypatch, schema, content): m_annotations: tabular_parser: comment: '#' + mode: column browser: adaptor: RawFileAdaptor eln: @@ -227,7 +230,7 @@ def test_tabular(raw_files, monkeypatch, schema, content): tabular: name: Deposition Control/Pyrotemperature data: - m_def: MovpeSto_schema + m_def: My_schema process: data_file: Test.xlsx '''), id='w_sheetName_colMode') @@ -251,6 +254,175 @@ def test_xlsx_tabular(raw_files, monkeypatch, schema): if 'pyrotemperature' in main_archive.data.process: assert len(main_archive.data.process['pyrotemperature']) == 6 +@pytest.mark.parametrize('schema', [ + pytest.param( + strip(''' + definitions: + name: 'Eln' + sections: + S1: + base_sections: + - nomad.datamodel.data.EntryData + quantities: + s1_1: + type: str + shape: ['*'] + m_annotations: + eln: + component: StringEditQuantity + tabular: + name: Substrate/Off-cut + s1_2: + type: str + shape: ['*'] + m_annotations: + eln: + component: StringEditQuantity + tabular: + name: Substrate/Charge + My_schema: + base_sections: + - nomad.datamodel.data.EntryData + - nomad.parsing.tabular.TableData + quantities: + data_file: + type: str + description: | + A reference to an uploaded .xlsx + m_annotations: + tabular_parser: + comment: '#' + mode: column + browser: + adaptor: RawFileAdaptor + eln: + component: FileEditQuantity + sub_sections: + my_s1: + section: S1 + data: + m_def: My_schema + data_file: ELAB-255.xlsx + '''), id='column_mode') +]) +def test_xlsx_tabular_col_mode(raw_files, monkeypatch, schema): + _, schema_file = get_files(schema) + excel_file = os.path.join(os.path.dirname(__file__), '../../tests/data/parsers/tabular/Test.xlsx') + + class MyContext(ClientContext): + def raw_file(self, path, *args, **kwargs): + return open(excel_file, *args, **kwargs) + context = MyContext(local_dir='') + + main_archive, _ = get_archives(context, schema_file, None) + ArchiveParser().parse(schema_file, main_archive) + run_normalize(main_archive) + + assert main_archive.data is not None + assert main_archive.data.my_s1 is not None + for x in ['s1_1', 's1_2']: + assert main_archive.data.my_s1[x] is not None + +@pytest.mark.parametrize('schema', [ + pytest.param( + strip(''' + definitions: + name: 'Eln' + sections: + S1: + base_sections: + - nomad.datamodel.data.EntryData + quantities: + s1_1: + type: str + m_annotations: + eln: + component: StringEditQuantity + tabular: + name: Substrate/Off-cut + S2: + base_sections: + - nomad.datamodel.data.EntryData + quantities: + s2_1: + type: str + m_annotations: + eln: + component: StringEditQuantity + tabular: + name: Substrate/Orientation + s2_2: + type: str + m_annotations: + eln: + component: StringEditQuantity + tabular: + name: Substrate/Charge + My_schema: + base_sections: + - nomad.datamodel.data.EntryData + - nomad.parsing.tabular.TableData + quantities: + data_file: + type: str + description: | + A reference to an uploaded .xlsx + m_annotations: + tabular_parser: + comment: '#' + mode: row + ref_to_sub_section: + - sub_section_1/my_s_1 + - sub_section_1/my_s_2 + browser: + adaptor: RawFileAdaptor + eln: + component: FileEditQuantity + sub_sections: + sub_section_1: + section: + m_annotations: + eln: + dict() + sub_sections: + my_s_1: + repeats: true + section: + base_section: S1 + quantities: + s3_1: + type: str + m_annotations: + eln: + component: StringEditQuantity + tabular: + name: Substrate/Size + my_s_2: + repeats: true + section: S2 + data: + m_def: My_schema + data_file: ELAB-255.xlsx + '''), id='row_mode') +]) +def test_xlsx_tabular_row_mode(raw_files, monkeypatch, schema): + _, schema_file = get_files(schema) + excel_file = os.path.join(os.path.dirname(__file__), '../../tests/data/parsers/tabular/Test.xlsx') + + class MyContext(ClientContext): + def raw_file(self, path, *args, **kwargs): + return open(excel_file, *args, **kwargs) + context = MyContext(local_dir='') + + main_archive, _ = get_archives(context, schema_file, None) + ArchiveParser().parse(schema_file, main_archive) + run_normalize(main_archive) + + assert main_archive.data is not None + assert main_archive.data['sub_section_1'] is not None + for x in ['my_s_1', 'my_s_1']: + assert len(main_archive.data.sub_section_1[x]) is 2 # number of rows in Substrate sheet of the excel file + @pytest.mark.parametrize('schema,content,missing', [ pytest.param( -- GitLab From ecf2628243cf087b2a855751f196a4c8418b25b8 Mon Sep 17 00:00:00 2001 From: Amir Golparvar Date: Fri, 23 Sep 2022 16:19:56 +0200 Subject: [PATCH 06/24] get_sub_section returns either the subsection or key/index error. updated tabular parser. --- nomad/metainfo/metainfo.py | 5 +--- nomad/parsing/tabular.py | 47 +++++++++++++++++++++++++++-------- tests/parsing/test_tabular.py | 2 ++ 3 files changed, 39 insertions(+), 15 deletions(-) diff --git a/nomad/metainfo/metainfo.py b/nomad/metainfo/metainfo.py index a467e7540..02c94fec4 100644 --- a/nomad/metainfo/metainfo.py +++ b/nomad/metainfo/metainfo.py @@ -1844,10 +1844,7 @@ class MSection(metaclass=MObjectMeta): # TODO find a way to make this a subclas ''' Retrieves a single sub section of the given sub section definition. ''' if sub_section_def.repeats: if isinstance(index, int): - if self.__dict__[sub_section_def.name]: - return self.__dict__[sub_section_def.name][index] - else: - return self.__dict__[sub_section_def.name] + return self.__dict__[sub_section_def.name][index] elif isinstance(index, str): try: sub_sections: List['MSection'] = [section for section in self.__dict__[sub_section_def.name] if index == section.name] diff --git a/nomad/parsing/tabular.py b/nomad/parsing/tabular.py index eeedd83c0..201a588d7 100644 --- a/nomad/parsing/tabular.py +++ b/nomad/parsing/tabular.py @@ -37,6 +37,15 @@ from nomad.parsing.parser import MatchingParser m_package = Package() +def to_camel_case(snake_str: str): + """Take as input a snake case variable and return a camel case one + +""" + components = snake_str.split('_') + + return ''.join(f'{x[0].upper()}{x[1:].lower().capitalize()}' for x in components) + + class TableRow(EntryData): ''' Represents the data in one row of a table. ''' table_ref = Quantity( @@ -78,10 +87,9 @@ class TableData(ArchiveSection): parse_columns(data, self) elif kwargs.get('mode') == 'row': - section_names: List[str] = kwargs.get('ref_to_sub_section') - - sections = parse_table(data, self.m_def, logger=logger) + sections = parse_table(data, self, logger=logger) + section_names: List[str] = kwargs.get('ref_to_sub_section') top_level_section_list: List[str] = [] for section_name in section_names: section_name_str = section_name.split('/')[0] @@ -103,6 +111,22 @@ class TableData(ArchiveSection): self_updated = self_updated[section_path] section_updated = section_updated[section_path] self_updated.append(section_updated[0]) + # section_names: List[str] = kwargs.get('ref_to_sub_section') + # visited_top_level_section: List[str] = [] + # + # for section_name in section_names: + # section_name_str = section_name.split('/')[0] + # if visited_top_level_section.count(section_name_str): + # continue + # else: + # visited_top_level_section.append(section_name_str) + # sections = parse_table( + # data, + # self.__getattribute__(to_camel_case(section_name_str)), + # logger=logger + # ) + # + # self.__setattr__(section_name_str, sections[0][section_name_str]) m_package.__init_metainfo__() @@ -136,10 +160,11 @@ def _create_column_to_quantity_mapping(section_def: Section): def set_value(section: MSection, value, path=path, quantity=quantity, tabular_annotation=tabular_annotation): import numpy as np for sub_section, section_def in path: - # when the subsection has repeats set to true, it needs to be initialized this way. - if sub_section.repeats: - section[sub_section.name] - next_section = section.m_get_sub_section(sub_section, -1) + next_section = None + try: + next_section = section.m_get_sub_section(sub_section, -1) + except (KeyError, IndexError): + pass if not next_section: next_section = section_def.section_cls() section.m_add_sub_section(sub_section, next_section, -1) @@ -174,7 +199,6 @@ def _create_column_to_quantity_mapping(section_def: Section): mapping[col_name] = set_value for sub_section in section_def.all_sub_sections.values(): - # if sub_section in properties or sub_section.repeats: if sub_section in properties: continue next_base_section = sub_section.sub_section @@ -212,12 +236,13 @@ def parse_columns(pd_dataframe, section: MSection): mapping[column](section, df.loc[:, column]) -def parse_table(pd_dataframe, section_def: Section, logger): +def parse_table(pd_dataframe, section: MSection, logger): ''' Parses the given pandas dataframe and creates a section based on the given section_def for each row. The sections are filled with the cells from their respective row. ''' + section_def = section.m_def import pandas as pd data: pd.DataFrame = pd_dataframe sections: List[MSection] = [] @@ -263,13 +288,14 @@ def parse_table(pd_dataframe, section_def: Section, logger): def read_table_data(path, file_or_path=None, **kwargs): import pandas as pd + df = pd.DataFrame() if file_or_path is None: file_or_path = path + if path.endswith('.xls') or path.endswith('.xlsx'): excel_file: pd.ExcelFile = pd.ExcelFile( file_or_path if isinstance(file_or_path, str) else file_or_path.name) - df = pd.DataFrame() for sheet_name in excel_file.sheet_names: df.loc[0, sheet_name] = [ pd.read_excel(excel_file, sheet_name=sheet_name, @@ -277,7 +303,6 @@ def read_table_data(path, file_or_path=None, **kwargs): skiprows=kwargs.get('skiprows')) .to_dict()] else: - df = pd.DataFrame() df.loc[0, 0] = [ pd.read_csv(file_or_path, engine='python', comment=kwargs.get('comment'), sep=kwargs.get('sep')).to_dict() ] diff --git a/tests/parsing/test_tabular.py b/tests/parsing/test_tabular.py index ef6c2e394..29187aa69 100644 --- a/tests/parsing/test_tabular.py +++ b/tests/parsing/test_tabular.py @@ -254,6 +254,7 @@ def test_xlsx_tabular(raw_files, monkeypatch, schema): if 'pyrotemperature' in main_archive.data.process: assert len(main_archive.data.process['pyrotemperature']) == 6 + @pytest.mark.parametrize('schema', [ pytest.param( strip(''' @@ -323,6 +324,7 @@ def test_xlsx_tabular_col_mode(raw_files, monkeypatch, schema): for x in ['s1_1', 's1_2']: assert main_archive.data.my_s1[x] is not None + @pytest.mark.parametrize('schema', [ pytest.param( strip(''' -- GitLab From a8a1257ce71711b39fd98cd6cb656ef821144a74 Mon Sep 17 00:00:00 2001 From: Amir Golparvar Date: Mon, 26 Sep 2022 12:51:09 +0200 Subject: [PATCH 07/24] fixing linting and python tests --- nomad/parsing/tabular.py | 39 +++++++++++++++------------------------ 1 file changed, 15 insertions(+), 24 deletions(-) diff --git a/nomad/parsing/tabular.py b/nomad/parsing/tabular.py index 201a588d7..e7579e50e 100644 --- a/nomad/parsing/tabular.py +++ b/nomad/parsing/tabular.py @@ -87,8 +87,11 @@ class TableData(ArchiveSection): parse_columns(data, self) elif kwargs.get('mode') == 'row': - sections = parse_table(data, self, logger=logger) + # Returning one section for each row in the given sheet_name/csv_file + sections = parse_table(data, self.m_def, logger=logger) + # The ref_to_sub_section contains the ref to the location of which the sections are to be appended. + # Calling setattr will populate the non-repeating middle sections. section_names: List[str] = kwargs.get('ref_to_sub_section') top_level_section_list: List[str] = [] for section_name in section_names: @@ -97,10 +100,14 @@ class TableData(ArchiveSection): continue else: top_level_section_list.append(section_name_str) - self.__setattr__(section_name_str, sections[0][section_name_str]) - - sections.pop(0) - + if self.__getattr__(section_name_str) is None: + self.__setattr__(section_name_str, sections[0][section_name_str]) + sections.pop(0) + else: + continue + + # For each returned section, navigating to the target (repeating) section in self and appending the section + # data to self. for section in sections: for section_name in section_names: section_name_list = section_name.split('/') @@ -111,22 +118,6 @@ class TableData(ArchiveSection): self_updated = self_updated[section_path] section_updated = section_updated[section_path] self_updated.append(section_updated[0]) - # section_names: List[str] = kwargs.get('ref_to_sub_section') - # visited_top_level_section: List[str] = [] - # - # for section_name in section_names: - # section_name_str = section_name.split('/')[0] - # if visited_top_level_section.count(section_name_str): - # continue - # else: - # visited_top_level_section.append(section_name_str) - # sections = parse_table( - # data, - # self.__getattribute__(to_camel_case(section_name_str)), - # logger=logger - # ) - # - # self.__setattr__(section_name_str, sections[0][section_name_str]) m_package.__init_metainfo__() @@ -236,13 +227,13 @@ def parse_columns(pd_dataframe, section: MSection): mapping[column](section, df.loc[:, column]) -def parse_table(pd_dataframe, section: MSection, logger): +def parse_table(pd_dataframe, section_def: Section, logger): ''' Parses the given pandas dataframe and creates a section based on the given section_def for each row. The sections are filled with the cells from their respective row. ''' - section_def = section.m_def + # section_def = section.m_def import pandas as pd data: pd.DataFrame = pd_dataframe sections: List[MSection] = [] @@ -257,7 +248,7 @@ def parse_table(pd_dataframe, section: MSection, logger): for column in mapping: if column == 'data_file': continue - sheet_name = {column.split('/')[0]} if '/' in column else {0} + sheet_name: str = {column.split('/')[0]} if '/' in column else {0} main_sheet = main_sheet.union(sheet_name) if main_sheet.isdisjoint(sheet_name): raise Exception('The columns for each quantity should be coming from one single sheet') -- GitLab From 43eeb17fb512ea22f35ff64f7ad94a602559ba41 Mon Sep 17 00:00:00 2001 From: Amir Golparvar Date: Mon, 26 Sep 2022 14:19:31 +0200 Subject: [PATCH 08/24] fixing linting --- nomad/parsing/tabular.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nomad/parsing/tabular.py b/nomad/parsing/tabular.py index e7579e50e..6b8448022 100644 --- a/nomad/parsing/tabular.py +++ b/nomad/parsing/tabular.py @@ -237,7 +237,7 @@ def parse_table(pd_dataframe, section_def: Section, logger): import pandas as pd data: pd.DataFrame = pd_dataframe sections: List[MSection] = [] - main_sheet: Set[str] = set() + main_sheet: Set[Any] = set() mapping = _create_column_to_quantity_mapping(section_def) # type: ignore @@ -248,7 +248,7 @@ def parse_table(pd_dataframe, section_def: Section, logger): for column in mapping: if column == 'data_file': continue - sheet_name: str = {column.split('/')[0]} if '/' in column else {0} + sheet_name = {column.split('/')[0]} if '/' in column else {0} main_sheet = main_sheet.union(sheet_name) if main_sheet.isdisjoint(sheet_name): raise Exception('The columns for each quantity should be coming from one single sheet') -- GitLab From a53fa71d57f43d7cebe74c259dc5fe58acac6ced Mon Sep 17 00:00:00 2001 From: Amir Golparvar Date: Mon, 26 Sep 2022 17:12:04 +0200 Subject: [PATCH 09/24] rebasing --- nomad/parsing/tabular.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/nomad/parsing/tabular.py b/nomad/parsing/tabular.py index 6b8448022..578ef114c 100644 --- a/nomad/parsing/tabular.py +++ b/nomad/parsing/tabular.py @@ -291,8 +291,7 @@ def read_table_data(path, file_or_path=None, **kwargs): df.loc[0, sheet_name] = [ pd.read_excel(excel_file, sheet_name=sheet_name, comment=kwargs.get('comment'), - skiprows=kwargs.get('skiprows')) - .to_dict()] + skiprows=kwargs.get('skiprows')).to_dict()] else: df.loc[0, 0] = [ pd.read_csv(file_or_path, engine='python', comment=kwargs.get('comment'), sep=kwargs.get('sep')).to_dict() -- GitLab From fa1318cd47c08e6c71ef5a4810a51b5df81e5dc3 Mon Sep 17 00:00:00 2001 From: Amir Golparvar Date: Tue, 27 Sep 2022 10:03:25 +0200 Subject: [PATCH 10/24] fixing pytest --- examples/data/eln/schema.archive.yaml | 1 + .../metainfo/eln/solar_cells/solar_cell_eln.schema.archive.yaml | 1 + 2 files changed, 2 insertions(+) diff --git a/examples/data/eln/schema.archive.yaml b/examples/data/eln/schema.archive.yaml index 7e36c0cd2..daf0ca55d 100644 --- a/examples/data/eln/schema.archive.yaml +++ b/examples/data/eln/schema.archive.yaml @@ -156,6 +156,7 @@ definitions: tabular_parser: sep: '\t' comment: '#' + mode: column browser: adaptor: RawFileAdaptor # Allows to navigate to files in the data browser eln: diff --git a/tests/data/datamodel/metainfo/eln/solar_cells/solar_cell_eln.schema.archive.yaml b/tests/data/datamodel/metainfo/eln/solar_cells/solar_cell_eln.schema.archive.yaml index 12fd3dc17..0ae61ab64 100644 --- a/tests/data/datamodel/metainfo/eln/solar_cells/solar_cell_eln.schema.archive.yaml +++ b/tests/data/datamodel/metainfo/eln/solar_cells/solar_cell_eln.schema.archive.yaml @@ -64,6 +64,7 @@ definitions: tabular_parser: # sep: '\t' comment: '#' + mode: column browser: adaptor: RawFileAdaptor # Allows to navigate to files in the data browser eln: -- GitLab From 153be93896702cdad69d1ae3b9769f530fae3c00 Mon Sep 17 00:00:00 2001 From: Amir Golparvar Date: Fri, 30 Sep 2022 16:51:28 +0200 Subject: [PATCH 11/24] improving python tests --- gui/src/utils.js | 2 +- nomad/parsing/tabular.py | 11 +- tests/parsing/test_tabular.py | 193 +++++++++++++++++++--------------- 3 files changed, 119 insertions(+), 87 deletions(-) diff --git a/gui/src/utils.js b/gui/src/utils.js index 7baecb23a..87952d70a 100644 --- a/gui/src/utils.js +++ b/gui/src/utils.js @@ -375,7 +375,7 @@ export function formatInteger(value) { * @return {str} The timestamp with new formatting */ export function formatTimestamp(value) { - if (value.search(/(\+|Z)/) === -1) { // search for timezone information + if (value.search(/([+-][0-9]{2}:[0-9]{2}|Z)\b/) === -1) { // search for timezone information try { // assume UTC timestamp from server and attempt to manually add UTC timezone, // new Date will wrongly assume local timezone. diff --git a/nomad/parsing/tabular.py b/nomad/parsing/tabular.py index 578ef114c..9e90eb756 100644 --- a/nomad/parsing/tabular.py +++ b/nomad/parsing/tabular.py @@ -38,9 +38,8 @@ m_package = Package() def to_camel_case(snake_str: str): - """Take as input a snake case variable and return a camel case one + '''Take as input a snake case variable and return a camel case one''' -""" components = snake_str.split('_') return ''.join(f'{x[0].upper()}{x[1:].lower().capitalize()}' for x in components) @@ -83,10 +82,11 @@ class TableData(ArchiveSection): with archive.m_context.raw_file(self.data_file) as f: data = read_table_data(self.data_file, f, **kwargs) - if kwargs.get('mode') == 'column': + tabular_parser_mode = 'column' if kwargs.get('mode') is None else kwargs.get('mode') + if tabular_parser_mode == 'column': parse_columns(data, self) - elif kwargs.get('mode') == 'row': + elif tabular_parser_mode == 'row': # Returning one section for each row in the given sheet_name/csv_file sections = parse_table(data, self.m_def, logger=logger) @@ -119,6 +119,9 @@ class TableData(ArchiveSection): section_updated = section_updated[section_path] self_updated.append(section_updated[0]) + else: + raise MetainfoError(f'The provided mode {tabular_parser_mode} should be either "column" or "row".') + m_package.__init_metainfo__() diff --git a/tests/parsing/test_tabular.py b/tests/parsing/test_tabular.py index 29187aa69..8da409428 100644 --- a/tests/parsing/test_tabular.py +++ b/tests/parsing/test_tabular.py @@ -20,6 +20,7 @@ import pytest import os import os.path import pandas as pd +import re from nomad import config from nomad.datamodel.datamodel import EntryArchive, EntryMetadata @@ -138,17 +139,11 @@ def test_tabular(raw_files, monkeypatch, schema, content): tabular_parser: comment: '#' mode: column - browser: - adaptor: RawFileAdaptor - eln: - component: FileEditQuantity experiment_identifier: type: str m_annotations: tabular: name: Experiment Identifier - eln: - component: StringEditQuantity data: m_def: My_schema process: @@ -174,17 +169,11 @@ def test_tabular(raw_files, monkeypatch, schema, content): tabular_parser: comment: '#' mode: column - browser: - adaptor: RawFileAdaptor - eln: - component: FileEditQuantity experiment_identifier: type: str m_annotations: tabular: name: Overview/Experiment Identifier - eln: - component: StringEditQuantity data: m_def: My_schema process: @@ -210,17 +199,11 @@ def test_tabular(raw_files, monkeypatch, schema, content): tabular_parser: comment: '#' mode: column - browser: - adaptor: RawFileAdaptor - eln: - component: FileEditQuantity experiment_identifier: type: str m_annotations: tabular: name: Overview/Experiment Identifier - eln: - component: StringEditQuantity pyrotemperature: type: np.float64 shape: ['*'] @@ -255,35 +238,102 @@ def test_xlsx_tabular(raw_files, monkeypatch, schema): assert len(main_archive.data.process['pyrotemperature']) == 6 +quantityParams = { + 'quantity_0': '''quantity_0: + type: str + shape: ['*'] + m_annotations: + tabular: + name: header_0''', + 'quantity_1': '''quantity_1: + type: str + shape: ['*'] + m_annotations: + tabular: + name: header_1''', + 'quantity_2': '''quantity_2: + type: str + m_annotations: + tabular: + name: header_1''', +} + + +testParamsColMode = { + 'test_1': ['', '', quantityParams['quantity_0'], 'header_0,header_1\n0_0,0_1\n1_0,1_1'], + 'test_2': [f'''Mysection: + quantities: + {quantityParams['quantity_0']} + ''', '''sub_sections: + my_substance: + section: Mysection''', '', 'header_0,header_1\n0_0,0_1\n1_0,1_1'] +} + + +@pytest.mark.parametrize('test_case', testParamsColMode.keys()) +def test_normalize_column_mode(raw_files, monkeypatch, test_case): + base_schema = '''definitions: + name: 'Eln' + sections: + + My_schema: + base_sections: + - nomad.parsing.tabular.TableData + quantities: + data_file: + type: str + m_annotations: + tabular_parser: + comment: '#' + + +data: + m_def: My_schema + data_file: test.my_schema.archive.csv''' + + test_params = testParamsColMode[test_case] + schema = base_schema.replace('', test_params[0])\ + .replace('', test_params[1])\ + .replace('', test_params[2]) + schema = re.sub(r'\n\s*\n', '\n', schema) + csv_file, schema_file = get_files(schema, test_params[3]) + + class MyContext(ClientContext): + def raw_file(self, path, *args, **kwargs): + return open(csv_file, *args, **kwargs) + context = MyContext(local_dir='') + + main_archive, _ = get_archives(context, schema_file, None) + ArchiveParser().parse(schema_file, main_archive) + run_normalize(main_archive) + + assert main_archive.data is not None + if 'Mysection' in schema: + assert True + + @pytest.mark.parametrize('schema', [ pytest.param( strip(''' definitions: name: 'Eln' sections: - S1: - base_sections: - - nomad.datamodel.data.EntryData + MySection: quantities: - s1_1: - type: str - shape: ['*'] - m_annotations: - eln: - component: StringEditQuantity - tabular: - name: Substrate/Off-cut - s1_2: - type: str - shape: ['*'] - m_annotations: - eln: - component: StringEditQuantity - tabular: - name: Substrate/Charge + quantity_1: + type: str + shape: ['*'] + m_annotations: + tabular: + name: Substrate/Off-cut + quantity_2: + type: str + shape: ['*'] + m_annotations: + tabular: + name: Substrate/Charge My_schema: base_sections: - - nomad.datamodel.data.EntryData - nomad.parsing.tabular.TableData quantities: data_file: @@ -294,16 +344,12 @@ def test_xlsx_tabular(raw_files, monkeypatch, schema): tabular_parser: comment: '#' mode: column - browser: - adaptor: RawFileAdaptor - eln: - component: FileEditQuantity sub_sections: - my_s1: - section: S1 + MySubsection: + section: MySection data: m_def: My_schema - data_file: ELAB-255.xlsx + data_file: Test.xlsx '''), id='column_mode') ]) def test_xlsx_tabular_col_mode(raw_files, monkeypatch, schema): @@ -320,9 +366,9 @@ def test_xlsx_tabular_col_mode(raw_files, monkeypatch, schema): run_normalize(main_archive) assert main_archive.data is not None - assert main_archive.data.my_s1 is not None - for x in ['s1_1', 's1_2']: - assert main_archive.data.my_s1[x] is not None + assert main_archive.data.MySubsection is not None + for x in ['quantity_1', 'quantity_2']: + assert main_archive.data.MySubsection[x] is not None @pytest.mark.parametrize('schema', [ @@ -331,38 +377,27 @@ def test_xlsx_tabular_col_mode(raw_files, monkeypatch, schema): definitions: name: 'Eln' sections: - S1: - base_sections: - - nomad.datamodel.data.EntryData + Mysection1: quantities: - s1_1: + quantity_1: type: str m_annotations: - eln: - component: StringEditQuantity tabular: name: Substrate/Off-cut - S2: - base_sections: - - nomad.datamodel.data.EntryData + Mysection2: quantities: - s2_1: + quantity_2: type: str m_annotations: - eln: - component: StringEditQuantity tabular: name: Substrate/Orientation - s2_2: + quantity_3: type: str m_annotations: - eln: - component: StringEditQuantity tabular: name: Substrate/Charge My_schema: base_sections: - - nomad.datamodel.data.EntryData - nomad.parsing.tabular.TableData quantities: data_file: @@ -374,37 +409,31 @@ def test_xlsx_tabular_col_mode(raw_files, monkeypatch, schema): comment: '#' mode: row ref_to_sub_section: - - sub_section_1/my_s_1 - - sub_section_1/my_s_2 - browser: - adaptor: RawFileAdaptor - eln: - component: FileEditQuantity + - FirstLevelSubsection/SecondLevelSubsection_1 + - FirstLevelSubsection/SecondLevelSubsection_2 sub_sections: - sub_section_1: + FirstLevelSubsection: section: m_annotations: eln: dict() sub_sections: - my_s_1: + SecondLevelSubsection_1: repeats: true section: - base_section: S1 + base_section: Mysection1 quantities: - s3_1: + quantity_4: type: str m_annotations: - eln: - component: StringEditQuantity tabular: name: Substrate/Size - my_s_2: + SecondLevelSubsection_2: repeats: true - section: S2 + section: Mysection2 data: m_def: My_schema - data_file: ELAB-255.xlsx + data_file: Test.xlsx '''), id='row_mode') ]) def test_xlsx_tabular_row_mode(raw_files, monkeypatch, schema): @@ -421,9 +450,9 @@ def test_xlsx_tabular_row_mode(raw_files, monkeypatch, schema): run_normalize(main_archive) assert main_archive.data is not None - assert main_archive.data['sub_section_1'] is not None - for x in ['my_s_1', 'my_s_1']: - assert len(main_archive.data.sub_section_1[x]) is 2 # number of rows in Substrate sheet of the excel file + assert main_archive.data['FirstLevelSubsection'] is not None + for x in ['SecondLevelSubsection_1', 'SecondLevelSubsection_2']: + assert len(main_archive.data.FirstLevelSubsection[x]) is 2 # number of rows in Substrate sheet of the excel file @pytest.mark.parametrize('schema,content,missing', [ -- GitLab From eadd8cfb2d1cba102e41b1808cb96b38df1ea58a Mon Sep 17 00:00:00 2001 From: Amir Golparvar Date: Tue, 4 Oct 2022 14:52:05 +0200 Subject: [PATCH 12/24] updated python tests --- tests/parsing/test_tabular.py | 280 ++++++++++++++-------------------- 1 file changed, 112 insertions(+), 168 deletions(-) diff --git a/tests/parsing/test_tabular.py b/tests/parsing/test_tabular.py index 8da409428..e3c0cc86d 100644 --- a/tests/parsing/test_tabular.py +++ b/tests/parsing/test_tabular.py @@ -31,6 +31,55 @@ from nomad.parsing.parser import ArchiveParser from tests.normalizing.conftest import run_normalize +def quantity_generator(quantity_name, header_name, shape='shape: [\'*\']'): + base_case = f'''{quantity_name}: + type: str + {shape} + m_annotations: + tabular: + name: {header_name}''' + return re.sub(r'\n\s*\n', '\n', base_case) + + +testParamsColMode = { + 'test_1': ['', '', quantity_generator('quantity_0', 'header_0'), 'header_0,header_1\n0_0,0_1\n1_0,1_1'], + 'test_2': [f'''Mysection: + quantities: + {quantity_generator('quantity_0', 'header_0')} + ''', '''sub_sections: + my_substance: + section: Mysection''', '', 'header_0,header_1\n0_0,0_1\n1_0,1_1'] +} + + +testParamsRowMode = { + 'test_1': ['', '- my_substance1', '''my_substance1: + repeats: true + section: Substance1''', 'header_0,header_1\n0_0,0_1\n1_0,1_1'], + 'test_2': [f'''Substance2: + quantities: + {quantity_generator('quantity_2', 'header_2', shape='')} + ''', '''- my_substance1 + - my_substance2''', '''my_substance1: + repeats: true + section: Substance1 + my_substance2: + repeats: true + section: Substance2''', 'header_0,header_1,header_2\n0_0,0_1,0_2\n1_0,1_1,1_2'], + 'test_3': ['', '- subsection_1/my_substance1', f'''subsection_1: + section: + m_annotations: + eln: + dict() + sub_sections: + my_substance1: + repeats: true + section: + base_section: Substance1''', + 'header_0,header_1,header_2\n0_0,0_1,0_2\n1_0,1_1,1_2'], +} + + @pytest.mark.parametrize('schema,content', [ pytest.param( strip(''' @@ -238,40 +287,8 @@ def test_xlsx_tabular(raw_files, monkeypatch, schema): assert len(main_archive.data.process['pyrotemperature']) == 6 -quantityParams = { - 'quantity_0': '''quantity_0: - type: str - shape: ['*'] - m_annotations: - tabular: - name: header_0''', - 'quantity_1': '''quantity_1: - type: str - shape: ['*'] - m_annotations: - tabular: - name: header_1''', - 'quantity_2': '''quantity_2: - type: str - m_annotations: - tabular: - name: header_1''', -} - - -testParamsColMode = { - 'test_1': ['', '', quantityParams['quantity_0'], 'header_0,header_1\n0_0,0_1\n1_0,1_1'], - 'test_2': [f'''Mysection: - quantities: - {quantityParams['quantity_0']} - ''', '''sub_sections: - my_substance: - section: Mysection''', '', 'header_0,header_1\n0_0,0_1\n1_0,1_1'] -} - - @pytest.mark.parametrize('test_case', testParamsColMode.keys()) -def test_normalize_column_mode(raw_files, monkeypatch, test_case): +def test_tabular_normalize_column_mode(raw_files, monkeypatch, test_case): base_schema = '''definitions: name: 'Eln' sections: @@ -282,6 +299,7 @@ def test_normalize_column_mode(raw_files, monkeypatch, test_case): quantities: data_file: type: str + description: m_annotations: tabular_parser: comment: '#' @@ -294,7 +312,8 @@ data: test_params = testParamsColMode[test_case] schema = base_schema.replace('', test_params[0])\ .replace('', test_params[1])\ - .replace('', test_params[2]) + .replace('', test_params[2])\ + .replace('', test_case) schema = re.sub(r'\n\s*\n', '\n', schema) csv_file, schema_file = get_files(schema, test_params[3]) @@ -308,141 +327,51 @@ data: run_normalize(main_archive) assert main_archive.data is not None - if 'Mysection' in schema: - assert True - - -@pytest.mark.parametrize('schema', [ - pytest.param( - strip(''' - definitions: - name: 'Eln' - sections: - MySection: - quantities: - quantity_1: - type: str - shape: ['*'] - m_annotations: - tabular: - name: Substrate/Off-cut - quantity_2: - type: str - shape: ['*'] - m_annotations: - tabular: - name: Substrate/Charge - My_schema: - base_sections: - - nomad.parsing.tabular.TableData - quantities: - data_file: - type: str - description: | - A reference to an uploaded .xlsx - m_annotations: - tabular_parser: - comment: '#' - mode: column - sub_sections: - MySubsection: - section: MySection - data: - m_def: My_schema - data_file: Test.xlsx - '''), id='column_mode') -]) -def test_xlsx_tabular_col_mode(raw_files, monkeypatch, schema): - _, schema_file = get_files(schema) - excel_file = os.path.join(os.path.dirname(__file__), '../../tests/data/parsers/tabular/Test.xlsx') - - class MyContext(ClientContext): - def raw_file(self, path, *args, **kwargs): - return open(excel_file, *args, **kwargs) - context = MyContext(local_dir='') - - main_archive, _ = get_archives(context, schema_file, None) - ArchiveParser().parse(schema_file, main_archive) - run_normalize(main_archive) - - assert main_archive.data is not None - assert main_archive.data.MySubsection is not None - for x in ['quantity_1', 'quantity_2']: - assert main_archive.data.MySubsection[x] is not None - + if 'test_1' in schema: + assert main_archive.data.quantity_0 == ['0_0', '1_0'] + elif 'test_2' in schema: + assert main_archive.data.my_substance.quantity_0 == ['0_0', '1_0'] + + +@pytest.mark.parametrize('test_case', testParamsRowMode.keys()) +def test_tabular_normalize_row_mode(raw_files, monkeypatch, test_case): + base_schema = f'''definitions: + name: 'Eln' + sections: + Substance1: + quantities: + {quantity_generator('quantity_4', 'header_0', shape='')} + + My_schema: + base_sections: + - nomad.parsing.tabular.TableData + quantities: + data_file: + type: str + description: + m_annotations: + tabular_parser: + comment: '#' + mode: row + ref_to_sub_section: + + sub_sections: + +data: + m_def: My_schema + data_file: test.my_schema.archive.csv''' -@pytest.mark.parametrize('schema', [ - pytest.param( - strip(''' - definitions: - name: 'Eln' - sections: - Mysection1: - quantities: - quantity_1: - type: str - m_annotations: - tabular: - name: Substrate/Off-cut - Mysection2: - quantities: - quantity_2: - type: str - m_annotations: - tabular: - name: Substrate/Orientation - quantity_3: - type: str - m_annotations: - tabular: - name: Substrate/Charge - My_schema: - base_sections: - - nomad.parsing.tabular.TableData - quantities: - data_file: - type: str - description: | - A reference to an uploaded .xlsx - m_annotations: - tabular_parser: - comment: '#' - mode: row - ref_to_sub_section: - - FirstLevelSubsection/SecondLevelSubsection_1 - - FirstLevelSubsection/SecondLevelSubsection_2 - sub_sections: - FirstLevelSubsection: - section: - m_annotations: - eln: - dict() - sub_sections: - SecondLevelSubsection_1: - repeats: true - section: - base_section: Mysection1 - quantities: - quantity_4: - type: str - m_annotations: - tabular: - name: Substrate/Size - SecondLevelSubsection_2: - repeats: true - section: Mysection2 - data: - m_def: My_schema - data_file: Test.xlsx - '''), id='row_mode') -]) -def test_xlsx_tabular_row_mode(raw_files, monkeypatch, schema): - _, schema_file = get_files(schema) - excel_file = os.path.join(os.path.dirname(__file__), '../../tests/data/parsers/tabular/Test.xlsx') + test_params = testParamsRowMode[test_case] + schema = base_schema.replace('', test_params[0]) \ + .replace('', test_params[1]) \ + .replace('', test_params[2]) \ + .replace('', test_case) + schema = re.sub(r'\n\s*\n', '\n', schema) + csv_file, schema_file = get_files(schema, test_params[3]) class MyContext(ClientContext): def raw_file(self, path, *args, **kwargs): - return open(excel_file, *args, **kwargs) + return open(csv_file, *args, **kwargs) context = MyContext(local_dir='') main_archive, _ = get_archives(context, schema_file, None) @@ -450,9 +379,24 @@ def test_xlsx_tabular_row_mode(raw_files, monkeypatch, schema): run_normalize(main_archive) assert main_archive.data is not None - assert main_archive.data['FirstLevelSubsection'] is not None - for x in ['SecondLevelSubsection_1', 'SecondLevelSubsection_2']: - assert len(main_archive.data.FirstLevelSubsection[x]) is 2 # number of rows in Substrate sheet of the excel file + if 'test_1' in schema: + assert len(main_archive.data.my_substance1) == 2 + ii = 0 + for item in main_archive.data.my_substance1: + assert item.quantity_4 == f'{ii}_0' + ii += 1 + elif 'test_2' in schema: + assert len(main_archive.data.my_substance2) == 2 + ii = 0 + for item in main_archive.data.my_substance2: + assert item.quantity_2 == f'{ii}_2' + ii += 1 + elif 'test_3' in schema: + assert len(main_archive.data.subsection_1.my_substance1) == 2 + ii = 0 + for item in main_archive.data.subsection_1.my_substance1: + assert item.quantity_4 == f'{ii}_0' + ii += 1 @pytest.mark.parametrize('schema,content,missing', [ -- GitLab From d24d309e77d359bef0a8609b788ff434902548bf Mon Sep 17 00:00:00 2001 From: Amir Golparvar Date: Tue, 4 Oct 2022 14:56:13 +0200 Subject: [PATCH 13/24] rebasing for build failure --- requirements.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/requirements.txt b/requirements.txt index b763636cd..2c83a7d68 100644 --- a/requirements.txt +++ b/requirements.txt @@ -48,6 +48,8 @@ xrdtools==0.1.1 openpyxl==3.0.9 # [infrastructure] +importlib-metadata==4.13.0 +pyOpenSSL==21.0.0 optimade[mongo]==0.18.0 structlog==20.1.0 elasticsearch==7.17.1 @@ -91,6 +93,7 @@ oauthenticator==14.2.0 validators==0.18.2 aiofiles==0.8.0 joblib==1.1.0 +toposort==1.7 # [dev] markupsafe==2.0.1 -- GitLab From be554e18d463093f43d3df6d165f1b3c62a11daa Mon Sep 17 00:00:00 2001 From: Amir Golparvar Date: Wed, 5 Oct 2022 12:02:19 +0200 Subject: [PATCH 14/24] updating python tests. added minor changes to the docs --- docs/schema/elns.md | 2 +- docs/schema/suggestions.yaml | 2 + examples/data/eln/schema.archive.yaml | 1 - .../solar_cell_eln.schema.archive.yaml | 1 - tests/data/parsers/tabular/Test.xlsx | Bin 24878 -> 11090 bytes tests/parsing/test_tabular.py | 36 ++++++++---------- 6 files changed, 19 insertions(+), 23 deletions(-) diff --git a/docs/schema/elns.md b/docs/schema/elns.md index afeb1296f..7f0d7e534 100644 --- a/docs/schema/elns.md +++ b/docs/schema/elns.md @@ -64,7 +64,7 @@ Plot annotation is a wrapper for [plotly](https://plotly.com) library. One can u which can be customized by using plotly commands. See [plot examples]({{ nomad_url() }}/../gui/dev/plot). -## Build-in base sections for ELNs +## Built-in base sections for ELNs Coming soon ... diff --git a/docs/schema/suggestions.yaml b/docs/schema/suggestions.yaml index 4e084654d..d445d4dae 100644 --- a/docs/schema/suggestions.yaml +++ b/docs/schema/suggestions.yaml @@ -7,6 +7,8 @@ tabular: name: "Either < column name > in csv and xls or in the format of < sheet name >/< column name > only for excel files" unit: "The unit to display the data" comment: "A character denoting the commented lines in excel or csv files" + mode: "Either 'column' or 'row' only when using TableData as a base-section" + ref_to_sub_section: "List of paths to the targeted repeating subsection < section >/< sub-sections >/ ... /< subsections >" eln: component: "The name of ELN edit component" diff --git a/examples/data/eln/schema.archive.yaml b/examples/data/eln/schema.archive.yaml index daf0ca55d..7e36c0cd2 100644 --- a/examples/data/eln/schema.archive.yaml +++ b/examples/data/eln/schema.archive.yaml @@ -156,7 +156,6 @@ definitions: tabular_parser: sep: '\t' comment: '#' - mode: column browser: adaptor: RawFileAdaptor # Allows to navigate to files in the data browser eln: diff --git a/tests/data/datamodel/metainfo/eln/solar_cells/solar_cell_eln.schema.archive.yaml b/tests/data/datamodel/metainfo/eln/solar_cells/solar_cell_eln.schema.archive.yaml index 0ae61ab64..12fd3dc17 100644 --- a/tests/data/datamodel/metainfo/eln/solar_cells/solar_cell_eln.schema.archive.yaml +++ b/tests/data/datamodel/metainfo/eln/solar_cells/solar_cell_eln.schema.archive.yaml @@ -64,7 +64,6 @@ definitions: tabular_parser: # sep: '\t' comment: '#' - mode: column browser: adaptor: RawFileAdaptor # Allows to navigate to files in the data browser eln: diff --git a/tests/data/parsers/tabular/Test.xlsx b/tests/data/parsers/tabular/Test.xlsx index 832c17dabe7a76b2e5e5c44f1724aab5e02e8ba0..2716823d96e5fea4e35daa4894530291194c46e3 100644 GIT binary patch literal 11090 zcmeHtWmp|cw)Vye?gV!a?j9^SY&=MUySux)yC!%D&c-FU1$Xy^;O_G6WM=L;Cv)!o z|2;GPqxRmutC}=DIEC3z=0FVI&;!qfjApn4vFaQ8103K3X%+|)q z*v3g$)!ok6@g0+!wH4_{Xh_;T03^8oe~N}SA3N361!ER*fV`)4Y zD@+JO1OR&ntfCoQ+W z1-oSnUfGC;6lAH-^3(Rf0_FNraq}0b0VX`lw~nVAcYcJ1K^>R}juvH}{=uXP{$m9wd=7{TLTq*N zvw6Nh+GP190@;uQ+51;r*k6#yv;`X8C=&Mg;tgB~2u4skZm&xbAi5pJ3WgIqR;WQg zipC%3*Ft&CtU3-jRYnVg9qaiS3IO~|bb{1aC@#P)a^T=Y1V^W?gRzw(Gt7Nti6}wrGLXM>#LIRro2Qt#}Y;pnLyQr_5q-snMhnYo;~N|%&zr*&zI zr7vqN`5-&EM)79qRJ1;mV~%3*m(RRee4UgB7$ z?_5gJ4`ksmUb%zGbnF2~1Ji{HuK_Ev%UgUk6*FG*DuWycK5}Y|hYJb-*nk57 zP{AYPX2tAk>tJbMYis#4eHEx_+h($0`Q+9;`?(eCK|%wk=%B~xK`O~6hgYh+diZ62 z@vO)*)6eaOn3E>yMI~b4%>Dgqx1+sRx2f&)Ak!}Gp}q8&(O2&%ockQKghwxq%cf79 z%ds0r?FTK`rgWvse4B0_es~SvOl4HlAOrSXifn_-M5nB3kxT77`|y>!Wjfu0VdQS} z7r3(4TUXG?KTXy#4zn}JS$e;QzomQmnbz!$L5eeWX)mBlIvi%)mb=bevW}h~rlRYi z)G{hc4f*wFbLrkl$QcklA~T6)-)S>+gXgw|U}!F!uaSshY>M8G0aXe1Q7!c_c0Vq- zNy#UQVews3=tp}b1u+LXg`Co_8#&n5LIXofhVHg{19!F@(1nE-R6*8h=;|IkY5n3-XfoE#I?o+o$zOrBFg_ecxR z6H%6xlc?@)$FvLewxh$Pvujg)rrJA0!!4)k;m77oTx^7xYZD-mtK{*1WV=fxK(Xlh zrE*Sx`jHmf&TA*L?^xK$>W+uOndmydCx6veMI&x=B@%kaGsD19sDWl*(kd{|fSS5m53B-!zp*l8ga=#~MRT+4ny8A3#iL}k6Il$SBf5+3$52s4&M z#p?#!ck!Grth%L@EO=Xf}sC~t}u<*RmibOwPb z7Q{nbE7I3p251Leu6Bh%!*8)KtL}9p8cryaw#9cYj>LO=8}?@|!<#h*_cnwC)!coE zde5gWzYp&`e5b!<&0u-P2Vy!r32ms1UE#Ew5?m3=BL!j1g;B>?7D#7w*}v_HTxZ;% z3S5!AW%`kkxDgHr?MGNncOciv_K#Hb=Ro^cT7m#i7Qun~pMA6@j9B!rAO#%< zw+BwN*(W>!hxk0{g4^ns;8?0LLglRtONHIybS-+ax^ImKcQ8=&C z(bf<`H!o(6f)L(cD{5SmhJo0duTik0q^io9n7PL0Efo}vT3(Zv@+j-2cLye^Y7=r8 z3+2hC_VtT6;u|sqR-iUZXnwoRj_Tmd%P^0~Rzh?AhTr3ZG zlOyx53+qpo1~)xF|2k! zzY!jK19=Zm-2x%>m8`;or1W(FXAzCkXKL89Ic!^yA*K^THtC-E+Gq(DBhboyWJFdn zLO?|0y5$XhP5Fl-ikKIdF$g~B%Z@ZtZ5=?2Ec#Mob1yxVyDa8mUD#p2am*>>$3!zb zLfWgZhp;iK6HCfngW(MLgWqf^wX+7STBa?=A_*T+LM>s@m&gd=8m{21XJJB#>ebvM zl6jkZ%ist<=+zz}U1Zk=3T1kRSdnOsl<1QN#9zzGoJxhSv~cixK3E}ZUNe`6HYf{e z)_D2_la}#gFqHZ7Z7fai!P0D;H_2{Ru;1bCV1K5(U-XK*M17xzkkIK9(Y>==#QI8T zpqr-8HQ9e=J{Ch((HL2W#Uo%TA0CCWpNZHbhw@^JR+=5@`Fi&~0bN#?-*J4EBN4`? zU(W&urIku(&~99OZo$SwhLHe(j8AWOi$_OC8XMsUG#Q`Ho~bkjGuu+RMuZNMR2WKA z+Q&2$p0MX+IxpB3IdD5$Dd#W*`~*a*PZAYR(<~v<+U6=G3!VfG2rl;kDzO?wL()E} z$zv3FAPk#f0Cj+5M3Wy_q_!^*G$zL}(J+4tpSwUtptxcT5^-I80#Bj;>0v>$mKI&F zv0&EArEtby=9e=nmAH2#F8GJ7SJs!zAG#LFWO>HK`reJ`pk9+wcIr2*5a(aow;^4v zb>u&Gon}pJm_9m>=L)qp)gp1d5#}Vp9To8vX6*O()Pg0js4+0y<0yB5sP}mgJJz0vmUUPVu<3qOItv1fglC4>8J>K z^dvCMhhf^+o#aXMTfpNKLl7F7Xc#(15Ehx(06Ibt#s|?7bo?Nk4`RFEz8@E^O-C{i zKWhHpy)CE6M1`lNdR3reT;apD+P+<@9rhme*_TLeXs7=3+xxST#n7PWt$Q zuh!bra|;&ti46OoLX?@uN7F2DpfirrNEcJF8T5@Pl1X9T&v1<=8&JBEDnTo~RDcw5 z3%3YK$L}KJCQw*_lSU0lE}~ZYObUCkPgLnNOudW7Co=s6j4r{#27Y$$9O;cGR~%ok z(3X*yg_-v5WBstI*~uL6+$g`*zGn23>vsBXz7S4l*be8yPWg?;{HT8t;ow2R;&*8A z7gW38ljix+_%X-oL8eAp(j--O1VunMZ%zL^iqv2!D4(TzWab=>@jLH7*v=Yfmnf%A zcjE;@isSdZmCqa^kNcAQUp_vW$+x69*mg3YVTuxuAiOE}7J%Fs zAl8Ja&1E&46@F*26j~>edfmKc;pXt>xu7-6df)>~Y|TxlV{M>V@FP~}RJGa+l21yp zX;;sk*-J&Y8l01a= zE-fh+mfN1BWJo%Kq6mZ)?)G9tacI(7fiv}6Rpc15g z!Y@&TjAp>>43AbJUW{dcNQhcyzeq~fZs*LR^A$vrH>Fp*it0Mllbx71MXDkklfdDq z-VA3=6y%9$EOymHoTGJ^99W}o8KjN4FeIDVur>b#8BwLP1WkpP3l z{9*d8Y|2Ko1|NAZt8pM`h^?o%DNuJPNpo(~bc>AFnNh`#IP zI8qPDX<`pBi;y8}4&*{SS6AbHtYj0c^~7*_r?FI;cl?FO^E`&ZwaOaYW0c=a+%7h| z1^7(FW4~Qb9ZuoODNWQA>PqY<0O|fox|TcFzxDNurF=&AHS9Lg09>3RsTKuWSW1|B zm;!n>8K0z+|1QP^_D4%`?l7w_BWLEBRiRcr#5H)H10e@wnKhwSUx{mYJqKV9$>g*Q zr}1lNU)J~4As1P*SO=OD$SvtD!cK9e)pyp(GaSxNd=5|UgZo7zmL0R{MWP)a7 zo}zq*GSf;%%LO@7@~N%3_&UeO{70e1G@DfeE&Sut+>^P6cTCPC3>7*_>lO*x9QlWpSkk%4zJA6hhy)rN6a$93P; zHW7V_4W7$W^{omaL26!?K&%v@6VmAp%{D1E-Ji?D($(&slRbP7Y`Cxlic32*hA;(# zEL@uwv5mTRV=z~d0!7*q=*E=dkjoB~Rpj21bMI(H9kB@k{2-Mnr8ftAkrc<_YZ@bI zdDnZEsHWgCY(p@p^%_;_kY&_vDuw)?iYOxLS=b7a-|2^4S)A4XS zoZ9hx%6D0{xs1)+?tOOlBN2Da`|`VDEYac;AyL{1S$OqH#fITAb)>#O%Ka&XaCQ%i zXivgx^j(*dqu~L__u8a;UCvd9T}y;-`(RoHIxH8ONlu$6W6~?%RKqMAM!CFnEtk#Kf?zVp)L#g>;UPEma?JsX4^-;V3E z4}Wi(&>+`KCOVM$%rG#jcA2FIVVv;Ea+b~^6`sNP`ZUS9%GB0Azzv5s9DS|e7GHct z<{i&Yma0c~42t|ZC*%&vcH|V@9Lz3DLQ9AAel)zMX{ra#k=Hvr1|Qqnem&h)!NYcF zJTD78(yNB65i}*s00I3FcmyQem4GnpCmr$ZGA*m4q|EtDE>tM2H|lMeiQCb=W8pP4 z53WPRt`Lr}FtOUF_ep%2w8r1X{cB%blf1I@Fw0r~VwdpQan@bU@%_xr^Zh52srN#a z#-o<$=URQ2SC=9d>8)P=L)6D-yE%%j)jRBDNk_vly)B}y-q5WRQE)6JOG?c&O%@QN zXcwsfB6S7GuYew8V^8zGOwnU8=gVQ|B2LggP@|=Z=@(E1bK%D2v z`-Swk6Had@fP1MQwBFe7Ah1|tW4rBKtxL4+go-SRgqRQ}eK*7|u1jx?sz4Y`f8&wJ zUYZU8G+<^YjdfC4XtsB^a5n$cCQ-mh5#>}H%`(XnfV=UEec9;``2xUhwuUGXq>qxL;(T!`=w1e`XbS z{LZcGrrwa*Tx-N6Z&C~eUnJ1wW13M;7iW0I*9k=il9!4L8J!>YnFP{4L0z>eu5gpg z3^~G@e={xWrQe%&O8gPZzl~gWCN^C|Uug5F!01q6$<%~`G2<@Nx-sn?3DwEMnERDY zUb9u9JEFE$X{nC=6+DvSG}n!_^BW$)(+V2lf$XnZ&aYqX%Vgi&4K26Mn83OE3wYT1 z@)RrY0!3|8DQNE|2?mn5^dgrNHKXCzT6t+0pMB`8;8tYj8I$n!JJ3`El|P`@mj%1L zR3Cn<(%EySM_t#8SJFY5+P{O)(I1y^F|X{OV>;T}VH^oYkBpSl{fZ0PxZ7;kS@5zy zMkmd_2My(ETDw>Tn}7sqq1zPl#}8)tg5Q^GA0vCLR80^HoU~COqb$zY0Qb2OKn@Ms z@D^0YKKYs@aLF?KB@G0Rd5@$FhI%`6-bU?!Ym1ip=fB+m|2}Te69Qg#sA2#B$p1*c zj!y1Y#*RPN8eiWo*=%v4c{7juqFo&An$Q*Ld+IZgRef=(eyyBWStGt6PB)mh`+D`` z^D5rNT-sNY#8*JUdCE{nw{Kl!g@eTv7^9epjJA8F1j{!S-FvNcZfzOOzCx3o1I}8> zGZNGp8S*PcmMe<2YrdIv7ButS8LT@1@fC#?kaA}AchGHynx^NJ)m!9*ns~LMJO+*T zE%C1yv{XL@;okXds^L|>i)N`foAXmmfADO<+m;6nV(B_`&Q$a}WYd%QzMqNyngUtk z(#bNM%_L>C>vUEnCF}uG=yhDElP|Ra%vla4d_TxwIt@$Rp`m>3L!gZ2>EovFNwXHl zaY5<1`BKQ0tF!^TTy=kYq+VTxn-7Io>#WJ|B~A7IZb^YWgwL6+mz&OUo)0TG>j+4` zy)7fEWHo2TN}21DikC1=e(`uzh1yTkZ|eLF$7Foj&U7JG`eBnX_SDp*+)3f~CRd; z6;sG9*YSC7tuS=VO$fvp+jwqRW1MQ`!PH?AV6C77~uONnmQTLs8 z(4UDG73vqlR&-9(-mxw#?Y@>0UU8m{-d^hzmwWGIO5#^hhlx_|!-Fv^tNglmMkoV7 zss(w<@ZvyrG}d7QFSv5aVvzE1EEe@#BJKiPUd0{6{Z(}pt$?s5OxiI=2UvTY7JEK9 zY;h|Np?2BfgPV1Uub-w< zal|4?Edn#Uce!8%WRp@A+Y-X18W^S#SPQXBA_6&Kr{t-SNo0O=S)JRJk z+FE-8JSvK{qQET^B0_H2T|4wBa!jacmi^_BprSV&=ok{fo2b(d9P;uB9H3i9>Gf|m zBDMBn$V+^UlVi7w#MawZk;}5~dXc4qo`)%m>=P!XD|580_sHdBJ=@%(R{KD{+{CAx z8-cB6u#igubahbeANlc^?UwLK1Bj*LNK#)&Jpu~CXjU(TSwaOXW)MCO>2V=JoM=I3Q*o52-9WH<_6R(vE198{^+ zMuMJhCwo`PaFV+p9j3W})l-PPX9Eb02R$Ae7%%F$`)+sNd7-Snq9aX7l7`0-RI^-1 z8vV_&tuG|w1E{dwlmy6cq_I%V2!mV)?d(aWZlXLN`cProaiQ-Lp%P;IBQ)er?(7

9bq`_TAIOt&!@pG#wUJeV7;#Ipny#@1AurL)!@BO$4>P74^YFnzZAbSg; z;AyyG)7@@sJItU?sXAIR%RgTQMd`BQ^-wh{VnMQ@eE~tPB?S|&lfY*+k7*E>Me_)q zErY1cLoakUPglTliHrN!-5=+9}H zPo-t`LG!$UB75>_&UT!q=(`^RUw|k{Cft71v5GsCO{_2JxBLE0y?b^-4-$Fu9!+s1 zC}~N8I3Y)`^;aT$>Qt@yYu4x|WEj%uWMWnjhyPMT# zRSyrwp2Hbh)6R6CYT6goWt(&co9}qPI=J|rn9Dh-XgpJ>f5a*)zoFDfJrm7Bi}jPHS5(rj_Zb+L$Xa{ggfZ!-m`UZzm%pneQTx!GBu>xE1Zseywk3_g}XL z56nN0thhhwHfXOo@uV;_Cgrwdb*OowR`RD0$+h@px~88zkuYKGw@vOrBpvk&QDV#r zaSo)PcWNe;0Uj2RE+DTJVJ4e`fnNPB57CkxXib}gonUuqEV5*PCJUq^V{Bm$NO^%o z->%63Pt%+Zxxy}kPr{l&U^&s31Q6Lg=~1Zn$`6?%k_T!hy-_InS(M-`_vuVuSc1!H zgsV@;P2)zU1yM+;s6H%oLX0`58WsM{;}I)=Xhon=bVfcu{>SccS;vc>o1Q7#1|}#( zUk7U9(Mr&;T45#VY4PkG``-8PD=@wNgZv;M8Np=#_jQN=8kB!s|E3BNDEn6fe=Uvr z7x3pb2mHuC6-xaM{JmV{4`?^+|LBwdDjxYA{8u6T4=4aYjqnTnzsTah+xcA<{lk(T z(*L}~zlo&3TlrmY_``}kSZ@6rsr8ro@Vk}YcklkNB7yVE%I|x5zZ>{{{qcu^9-?0c z{#cFt4*h*5@CVeH{2$QY=LNr8_-h{h0}lYWQvd+}mQ{a;|CO%(3|FH56a1g#1(byW U`^e8)JVZbrSWtAK{dxEQ07hVC&j0`b literal 24878 zcmbTdbyQs2mn{sz-QC?OoWk8LxVt+9g1Z*M9Rh)1MR1n@K?`>Y8YH;81pm0VM}OVp zz5aFI*Y(G#S+n+@XVt2;$J}S1ecmgpfr8XelzUT`I+e<6 zNKv^cpcL4Fjb@RpKA*lNVd5PW)cb##CXBZh zK288{*AI>suC9*kKF&_@X@;&j9JnFtcPy-3rA8@JJji8SGvCsin;hD1m%6EZc=_s8 zQe2jU*L!Hm99!%#LQXuMz6dpp2GR}TH~v-}-~nDSL1&lcj;bhkiB`hnD8^ZH*-9#a z4p0dF1YS8-n4WW)Z0>#><_CbZD7=^|(F8d`snOwJMOxhO&zA_1IbCgm zr3xeUdtj3SFJ%;ZRI{%o%d6mZ$_7F#oUoK!eMI)BY4sh9i283aHs`e>KHIh2T{|c9 zST*Up>*rNFd;?Ti#fQs4akj&TboZJ*5=sfnw@nK8wz-Aw9Qei~`! zJIi97Vpa_5N{*qOl%f_*U9W`U2ef|MbQ+7$62T!;|JPd6uv%3Ajj_AuuRFbKL0t2J z7^%#-)$1x18)Z>0qO0FDX6(pkDfZL}=~5XnxcS9b6f9w{U;P=Ex9bD>dt)~o@G#}Gxk4$;l@@mBQ2Pig( zrGd84>ihJ6aM_xC;zN%5rI}D06RL9z_8*c3$z^MP^C(+bqnj346&pO#Mv}i=*SgxM zY1_G@U)Qn(hagh&zvl(Jb+M8<;#KV|Ve>*Wtj7A~c8ACHJVcIf(lDgl82M$wSdJu{if8B8^RxR~&nM8xe z^Z3=BPnA#0{%Z=OnMWY~bBz)17Gc3o$!=NvrxL|vobB09B8=}TZ-~a_r&uU`tsPD_ z?Q4dLR85P^CtI{yen|}G3C_;fbh{KluEjrprGWLt;nDicMP9v13(fd>6UX}Q*}8oo z9@zVXgN-5GU&=4$SJm{vf-cB>!ygh`qDiYD+8AZifm-!`H+%lluR`xgM2&RNX=Qn1 ziSrTFvH1j8WlHkwCNHSEGVeDT_0!rQNBKDDXdwg7psCrrNW~FHt|goEo_!uX2|h3y zF(BXPHkmauP{L?6%$C^x&1pPex9E^j?9z zeMb)YQQD@aEIFEzHf9_l-pt>6q%0yjeo+d-VCtRT&oy-o*!9`t$7Lj~QsZ{foDvD-k zp*M83#NA`0Sxlk4M}4%lQQh0lDQcB~y;KGm5_vP;kZcAyLdSP6h!6+i`h9r|w0u1` zkceGTB`0)dH}OOQZEZ`m1&nXLuw^ZP=Jv9oddaw^i7{@7b%`ejx`D3NQM9J>$je^If#LlY8&y*L)r=$iso)s0@n@gd z!CgHl%DRx%2LncjJ?8Nl%o>^sEOT90dXo^Op`&>MNn_4V$QGSh0Md_gIdKz745R+t zkEt}ZvFG*M0O>X8Y)mS8)LLRCzqZRC@pjfH#tAKDB!-j^n~6B@70~ z)qxhs@m;#)x2uDJv5oh_ zy=RwKb(f5fHVP@r>JH#yuXjzM#a_L$iw$KWzrJ**xa*YIjB=g~cW-kaHwTGPulJ`X zO`qgQKTXCSKUXh~!J=@777%ewo!U8v%1AVf?K)SFP!$D#SPIe^>L|E& z&pg6nmi7GjWUNup&auPcos;#OrmPL^Ja?7JV1o3v+5Ts4Kx$bZXZubma+AfE=MiPz0Z&0e3q(iC_#PJYIe9~U8s`6gyq1?Ip>R% z=OVdhdk5(Z@x6dB1_+}gmwgqZdM4OWzpHvDXD{Iu;V)CF^Az}D01pKf>iyrD65)T$ zl059Jtvx*ef4_15wVbEVbk`uOV5HM38*hSQ7dLJ1N&%&R+hY$A5O+unfS5vPQhE=})ma0C&H z=x%XPC*=I{Jg@Mpn*T;l3V5%HVp=3tC*dvRs`TLLFPs!oB1^p~6ciNU# zP#%duO~5x_;S$WpgY@1!8)|<9SCZ+hQ$1@X41GJFv?_-qr`+h1d{{5H!c5CQktFE% z8+pR(maYnM4lQ2@O=U(Vu83mr=XO3W2?QzsX`1q-PmFj*Og%alGImcgt@c{^dA#eg za^r(6vy=`x7Cc;EUWf;yC=~t~q^WT~(2Kdd{x}39{Ggh=0+^z>hqH8q6ApyKmy+?v5 z4waiX&&Yj@+%I*QWVQU9;sUp6noht#8M9%*7q>?=P3<5}Kc8QMqLV4wfG_N+?a;VO zxk9ejS+tRAS;YAq7VN_657bGu1~|Nm1C;e7f);C5oZmk69mEvZT@&wIUF*=j0Ll=$ zbkdXv*(vrSiRjV#SibCp?roIr)r%x5Rp@0{XGMWT+eaL^=&84!>h-7lYkfFBq?P1@ zee+UrF7DA3q^$>xi(xABWTu?2atDS@la1GrEY-Wl_HPNf{e~o!SQ_t)GGR4ZVkX0{ zVxxy&{Ut8NY+MT^saMLf{i-Y+sAckxG$xG>u`>-CEDZ9`W`>N2xibAqf?zUjc4yMQ zS8C6E-0(1-=g$0?tyD47Cc#FMJsLG;T~xSAS*No*(`IUslRXL^vwo&OnH@1|wqiHL z+k$VJbGSEnpPJaP)Db#s0i9AM-r-nf7{_B8)`;Tx{u!TUoPg@2m$|sbWrN2wiswKO zqPs$(Ou%Le;yF;9>rhd-Dv?jEmKj&130yA4n7)3VHCJsRo7SaMBo8S&y7o2nVG$XX zAT4_ekM_rgBBr8fJ!ddU*@Uol3G80aRW(&GV3~t`hVXu(UQ_N%afFy5_8^9F!r~x; zF~=ygq-sOVkV4UVnBn}O=P{iWzm8Psq^Wj!A6(O77HJ8hrLpVcmh`pk@3mb2W`jYD z!i=j`-}<#r$TH^956g16_lZe|X9ADAvUUQ`qtY&q1z&GkP?wcXAAx%yLtflk$8TPm z4(S`jAUeL()J}M7E#8(kG;3!=qMzb}!n7~bx6CtWk*P^KnIj2~2s79o!Z0LQO$ExY zKW(s)_L~HF+LM04w7L2kTPV?fmC`LMR?i5*Hzuj|h(q~exz_G6MvHk=G%-vkvse68 z-=v{C;%G0W(1Vgtk|RYA&cidnnCm-xwj!+=c(U3s+ccB4TFAg8b8&%=1?S^e^iQpP z<_Mn&1Z#uN&?^~28%KJ=KkgTxoBI4WX)6Eg>%}?1T=j+eNu1(UgVKV$R86`gLdGdV zHhkr(NH#nxB=8MEVBS!_RLlTW#RB7O+IkOKEB-K#nTe_uF$ZTU7ux=7wld76aHhR@ z9sw6x0R0FL=VD}Mc{JhdRG?-=|1@MBs<1rTy+p!emmCD<6q`at z7V4Lb9FoVT*l(>c+ zq&!BlC^d|96etdph=xnT>eCQvvH&%TbOvYUT@&1qYIt;al^aBVL zlZcv2(kgBUHkpZ$^dms6=b_#MNw20Yt)v;uqr);Jny&N{SBPu+YTK3h>A+0HEQ zkNRx+EkaekQ0|OhfuFY6P&NgPa#%JeU{A)&z^Umyo_TKq_8EI1TIZR9*;VqTXo9fl zl+~4n9GX3ZdeBnXbA74viL6!Zj-2K8l+k57t$Elf=Im}?;yCP6ziQ?NYu)qI(YGi> zw#j{=rI{(3`#lg3OCK%tr=t@u;Uj1gKlP{6^6us%bk#GDh81vij?2Yw6%E1N=(2uX zfwo_?bK=R^l5yhU!yN_jIYpV*>2Hve?sN=`sz|NQ4am zPH_%SycG#C!%|=W_EF~f7qMwYd~5f#nK&cmJOzP!jz{MQ>DaTK1Wa&fSY=1nG2!Ss ziR|~KF{R>YSt9!H+4)R<-H*5VTDN0%&mG$KR{$(LDZ!Qgw58Wqq-zNmhh zAwpLMd!3dDnu!Iy>D={!r}O}s3%cYd*`G}D5n_pC;p6Upl6vbZMf}zE=Z(v*9sR0yvp$))td{#Mt8r!R+!&qHWn+HS-R_Vt=&1PH;ppwksSE;oHD zFvcFF)%=XdcQ=jUhV#OZ{l~Q4b^>$Kt7M?rb8wSMutuSR&z?!yAj75bDyPvgwV7G* zPP@?4AvK|wb}lM*(siavr42c?)5KY1uWA1$zKC{GUV>?;qEQZnIrXI?>)J83bENp0 zIY!2&fnDWeZU)0`wS&?(@=~l=o7pf?`$)Zs^aF=GyQ+^~^uvkBK^7I*=%~~Y-QPtz zcQSifmDwd8fZU}6GRlK8CF5{xL-Ry~UNm5vhW>_e(K)JIjCR!C~c zs-KeBcD{u+3J_1YKfUBezQewz8l{4yFCyV#&P5wBQR-0CYls(PF5tobW*<1PtY(S( z8QIEKELZqEPrKa9+j8`oA$d{U06W?v3HL&>0AGognNou(0vV)*tSbd#NbgmIPF6HG zmPlIfmv3@&#*mi|{Vcs#rQjLi_`-xuBCoXRmOw}6QV=4qpr0Vs>pt`oXvK@JuZxMB z+ifHSgUx2BQusJe+t%BZcQ;es=D8!6z^XSCR>F#C-ILC$HyqaVSRt3-%`9iDmZa-$ z>qSuZ)Lq8$?eW)bG3$gjp)PYEo~(g}5}xexw&dGkr&^29KF5%WL(ic4NKi}!RfB+E zR3^9TaSx2HTm5l?EF)u=j$GKW4szO|GwaWv)O;xu?+!~3Glt)pSu{_j*`k8+j70G} zsgrF1>aMNWN}$jx6pcWX{{Bkw8^`af!5&q!mGgTiA-!_2+gEgpR1V!OdMTqE56@Y- z+Y5%DBOovxUl2xJ5BIrT73NW%?MlWLkbNN3X#dEdV8xw+T1PkIbs&9*g;{Bh@7dg1 zVJZI+@QpRNvi`jp-0Db}zj4qAhC!6FnYkg~Ia9UJpt+ z6Ct=~p6)aQCJAC}26S%9ZnJ*~uaYyjc~7;a)4O2@xo8t(y8a2Owuailz8-;_M-wb? zuL5;z>7s}k`t;plxu;R-#$LO1RZ9H$kbE2A8;>N&fUvt?XuQD}oaaov-jH|3XoFhD5IkHDcnFU5o^;sSqBD z;o<^ldpl1uwggvHaiR~TjDc4A=kt`odhtx*k&VZU;hBW_U5k`)zo3S5y49DGoBv(O znISP}Lf&xwaQG@&x54i+{N-3De&WK`gqEm%T&6)|B^l(cpE%waZAqh&@e6C7d}@aJ zUV$6G#3O}M!!yByLZK&R+^zfDa#()hCKYsFnO#e^U^R)`4R6u+)=S^P{X8zj2cIsS zFTz1*)QlftYhd%Nv(d*^G)Cs6WLFY|w++a5id&U}Qp$5QZ&sx5lpC3HnzaL0Zp-M2 zb=?gSk2nKII*;YOP?Fyv$h-^vG=J7lTMEZg7!i78-+2GR8QXP&J?|yj)Cv+*89-B* z6uQ z(s&%qlTZQAgsZSe%+3;@-umN|IDDlUTI}&}hlRg1{X_cZun>y>g~M|Ho5K!&C=$e7 z?W*~@*g_;Mk$F;`uvZ}TynIlwmJmX!2@S!i=MD&T>?Sm2sv|gX!Vd6k-8;-1>tf|Jb-6wRbZ^C5;l*kU0+(W1J0F;zDnT3Ax*qr50H$!w?9`qgHC<4h{fd9 z3#r`CF;u)t%+QW7OoNw!*<-Y%AyJ+Qsn6e@VEzymCjTpqKErW^j0J6TEDcLEMag1g zoF07|F}fYsS___v5s6W$YjMEW9dt`*j0>Rdfyp_f$~X8#BcRdmVW8N-gO0?09>ph_ zn^9KQaLx8=m1pmxU|}zQ>V1!Z5425fk5A1KjUgSEP=kGyjZ?{eFWtETy01q{c~@5_ z+$!ZL9lULkySCwq3Pd#w&(mHL-k0HIp=w#Bw43D@6QT$f;wwcQm?RXzZE`GH+>Az2 za?6D6r^T}il$Pvsl={qI)r(p(y50L-)v`7a*yO|L{p^JmqmImqV(O>rsFG8Y>Y=Wr?f&Zg990Wov$e*}jn}*~lZQW}7oiZ~S9V-Dx_$A|>u>ZAW zw2kBAxLJ|y%?sF z8gZ&ARjb{fquFunV?CK+)-@M58jYkIIOjn5%0I9?M#+n3Q{?~`wjOP%_zyevIXuN` z{pa&+^|-3hQ7d4hFbO5hP(zVm!I)VRX6T`Cuo>8HEN0p;tU8nB-%cH6ocTIhs>@g| z?dBbpPeTgH8MW1a`U~G(=OstY2$#h1rp+^17ZxauL$SG3(dCKN)iC9Mv( z5aWtn6qClarPz$2KL2r)^i9FiwBNoX`@+@%d0YZ&*?Fa7Jh-*llareGdyyGzC0Ita zI8_D!9!bMZ^5f`BqQ}f-iSmb=2#FR`GRB`H5#5OIED@C{IU>7J-+3UiQr5Dv%!`Qb z9S$kD3!Qb02y!L+83+D(pcGJPA2x%s2R&W5^KoXdD7q?XPBu$#s8iZE+ zlAQq4ob^2chCCI83_+M7ldkl(yol0tM37=Vv6FeIDJ(!UDHw0NJ**OMTUH{7aYzmB zzJ#*tXh^})d;NP;dwtW9zY#&>jJB?C;6=}s+oHBfP5XJ1Y6*}}Puh|UK~&0<^egXA z_wGs?x>9YLrjYrkcN3DOaQMshD5k$1yJMb5t~LQHXZFVkLfz@5i)V`=+}B#K5@@o_ z{dGOET=VW(6~4v=8kXNd;kXxK@>v zIB?m_L&_4n$o21&O2R4+L9E}jANSKzfkg}7f$zDR88;ZrWwa~Wx6Bcw=lB1NBq%(^ zaTC)@rKw9hP)y%~rhr-qtiGOCbd=tlII56j(CU6hH0=&=|C@FF$&Pq(2!8s3SIYL} zz2YuBujkUR)#equ+R~GC7+rZdWS2?en?zHeke~eYm2FZQf)ou@6b$$d2-e9zR0)r6 zrI!@x#aJ50w_b1pSGLHH(K9FPJ=i25(Mc%lA1Yw31y2;3u4L(yYRXE7Ela!yrC5P9 zgC=y(i*k>PYm0<;U8ipA3gu`azH#X5{Z`uag;27%u6&h;L|-7qKt=8IkGS!7gEw6J zzZp}a^(qw{7LGM!)7(B96IhD9tuFWcZC!t4NO?cLE#2CY{V%MG=Wpv;)z@6v?~J^7 zykcTJB}vL|PdPUDEH`=eh2-rXrse&j>db*!*ADdHJk1B7HWGik`33dx15$&^%jm;- zesEqXgXwJlhm>*AJYrjdQig+LDiSrDe6c*QjpQijo=c6{V24f+ok*Zyt7&y%IE z_vdVePd@?_XMZYmhAv%h9n3oLzFs{A+@Gh3CT7?!34hr!^&P{XEYf~C*u1*`&?zss zHO?=hHpxOjz`r+HG<@vY9w|oPx+Hu12k0w*a{PPLL(tHfzh1tyNIU4yXz6gkeSK5# zfa~&_59dRM*j(n808rZhG5z^Aj>YNB_p){I=kGvz`{%JP)YL!3ufg^9b)NMSdUYh;Ry##ZW%=I!`6 z#>J>-W5CiT{WX57yKr`iZ<|5DQsnq?PP)qTlhHQ8!f%2o`=A+S2U&7dUrht_4+ z54;Z*>8i8Q+Jd6XDkq~3pU+Bep6_d%+SYr4fuw=i)$@Z%X8^;FZPP2M7au{x{2BYs zai`9;=ycW6qi8?B%UTnXZK2lhf+F|)qj`oHMfO33E;SgURW6kmB z?~nqfPpxcMaL7tcXYF6ma%cp{E=8Oue&0_+2RtO@&+Y66FO+%uA zVD|0`%Fk>MG|4!JX#g4QM&VW_qrpw^=(+$8yCu3 zSy>fzed9iQdAz8p5rrY&62ll||Plg!iCqdF5UlOW~znZ2k?IseYU+tru+%xQ<~gFfmt)vdpn zt8h?a;vo{7)E7yAZlwOQ5qI;1*h9@if&Up1#err{+{+VT zk2s47rc##5`iDdhPnbRIEFu_7nI=n?Bi@YopAo3DgkVl(%d7#8cyr=Do+$f&Mj$EU zWYKe|n0@Z%3AKMW3k&{dgpRpEq&*&3JL|sy_iCIRRlSYKv&_mB{}t$Igr6l=cI2q~ zufRaCX}CQam^W+Q%;5h9(e`BEk6H8P2LA%Q6fr3uaa8p+qR$Hc9{_q5UYUf$xx4XS zf^*Ye^W_kG7%*BEpV@x~IAE15KJ(=W`~L={ZlQ5F_ckKU(ktiuC(zRfH;b=q$l?6o zg8pp0tY)+2Fnbg*+rIp{{{(g^v6WXiockKlX8HaNz|5j63vrxuH~!DS ztqm1SmgQx(9BPm77cgIrv?l-?|1%)Kg*eNu+|F^*+lVqtuI$fo($k3WPeCqBq&)^$ zBq2U)i#)GYM>6JTgI7YsST1WRS1n+rz#zkwvn zKXe{{NFji12!N;`klha;8USPu0Eh+w*@FP0Awc#JfM^(yJq#ck0c4K=h(-a~qX42Y zK=v4bXdI9|4j`HUWKRHyCIQ)#0HUeAh)!T3U}8U3>R~gj69@)O9K?n{Y!-I{%K;OI zv3(Dljh(=1z{F83%;P4c6Ic(JIF6Ni+??wKHUlP3V#6Odw>yDtfQi%CzQ@h$PGBcs z;w%>CX%n^!*bA6AkCl4b#O(qO046SC!=E;3yMQBriObl&r%j$N-~?deDi-E>Q@RT{ z1DLpum3rRP>H^LKCT?QGpEs?#fXjf1+t|M6P0udi8erls7UpF$q6@eQnD`Sb^|G1P z1>6Np+{cE$Y!-I`4*?Sov3)O_ja|S~z{F!L%89h$wYnATA+#=$I?e79!%u2p~A< zIx0#XSV%&M9yX@oI|~)@VW8iqpwvNtoQCL;W3Iqh1d1PHfZ*QMNl@w_LeN9?s4)#;E#k$Gu|P8K z>J%t-kRa@#di0npuohp$k8wbe@9NYkb&w&dp?b`ihHw^D;>UQP{&#iylsYI7mry-+ z%oR9`AL7RZAUK#hb4ndlNJ6L{7p5V+#klw}5l9B6&Yn^S4N@Jd$A`HBZ?P(VOah98 zsdJ~)L5EC)>Iq^RB3PV=ACrOlVd{J-bub{Op?ad2D+m_P;>Q#qIM}*SN*zoHdYGOh zrXivQn#3^`NCvhphEfL$!XBn4i@AbmK_+ob1B!&LOQzJphNy<=DPkHTS+GeQ(}DV7 z>#`|za3C&Wda9T!NERXz#|$7ixVi#L9b8C4n4SivA+m*v#4!^{2ClA*QU?!G9j2#) zxq@tAEOE>NiiE4HqtwBNOoZtfU>c%WI7=L}f%@U{CDu1GQr%C1^ zx~(5R;ST?F)SbBTHx*dOp%LhCV~%Y3z}=#RzP^R=f6gxh0^Pz|N}nE@G=r`hG+mz7 zO9LJ!QeDNi_*ayMfDig?c~>bi{E@smZ*iV!+h(#e3)cyi#3H-a?Pahtcl77!wtYMA zILKscV||iER3yL)wtD+7V=aabe={ArQQN z)9A9c_^`jBEQ?Q*liW9Cz0G+^1Q#bMi&d1R{sEUP|1`hT`sNI{>f>RXn%qBRv(52= z$Tu!P7P=@&9l~gnMp)BneDi=dzJ5>E%OboEVwL5iiB29JvfSoICX9@umgOwkW)u`% zKYOty-b&dc`L8uzjQf4=#3~7O|7!(^&rJ>1K>MpeZ#RnfzX;=nsrS#pI(5+K{lJmQ zzi88$!XNaY@zRp(+F{2-q#x%bi%yf7JUC>F%YjTJ6K5eCb8$FowpCga9{eW5!v3#h zqIiNHs4xGL!RP)LG8LmEYNp078IS$vJ9IR$$>T%j+gz7~_i^uJsfs}AV~k}T50~|q zr{2=#=Qsa#gY%o6`=0S{yRpMg+BvQ+bH0~p3G?2B#;0mKdlW56tn^i$uVF7thRQGF zHWkxzinbM=?ryE-wBL7oK@+ zI(kKD-X}xfqp0<)qH3RozovMW7K$;+Gn+pj^?wp@FTFDv7Ju z?TxLcz7zbGih}6ytT^%hz2fv`^uwk+apc_< zF6_!DcrLoo{n}Md>p0F`hnNGLA>yzGhM#tHB3<}>@T4??K z!rrjZ$7H4;MEElEtnSCz1lYT>UI7xJexwieNh#;B8@NvXaq;C=R^nY`&Vqstf*)< z8=UOU8=f87BLznZTEk9G9+j-4WYic26O=?oybQ!@U10!@o~ZyQ`RO{o6;uAY#^Wm< z^AMAVu{vRMN#B(lE>!d$k2yW~l}h`?QI`h8{XMU;+1e_C7oXMF0_3%di9kcw`RU2x z_?w)H;&XjHe-l?~F;WB;Ucltq?ncGl?V9)cMNz+~ujq&HY~;(*9Dd#L<9>eKiYDS- zpJm5_*8tg}=71Mru6+FnM4d+Zg zU1)qTyx0!*U&Jm@#Lil56O$(`W*)5O zJ7WGdIgBphpE;2V<=gwVW>uQQ6;kNSc~R3GPAwE{g#qOOvj_{kUJTf}n5S1)fc;Z`bD z4z@cgbyh`RrK2>|^cR3G9{o`XrV*|4@c;vQ}HP1!I zGPCM}+eV|Yo_P!5ed!GoiVmnlwdZ#BFhKGPSB>Tely9?tB5r6dw-PzB1W%$k#0n;! z{?N3-BoVJn>Pg*W$-6~U+VjV$ed87*M?4KN<2T5ko_q?SY^4bk)E${jq7$P>eEE7# z6it;)flKTC9&^5wkol@8Y&!wD*JVfzSOLlqMotyBQnOWX%{jnR^fNz`SsT|T>{yul zSvXXo2Czxs&8tq8*VK3jM=321+AluzH!==5@s)Sb28e#UJ{fQ@wYTYU zCeE5)iZ$q8xZZa>wVNvK5kZ6J@%h@51fR%5IBljJj)Vs0$(lA(4QC(*^0?X84h?YV zN2>7N4Z=~fm_T9Uhj0wNlgDcCFo4##2tF@286n&Q*%M?7B^=;qi}vYReP8v6W8{KK zNS4_RwUqCQBDKWNmP@GnGzPK+`B^3$;ASHYCb{Q*KqcBE>T4VZP6|ZDUE#b@Ucjih zhEYzdT!JOR<2;H^)h(m*k#CDEMZZ{JIuvm#ZjyD0Gzkgnn@@SNLD8^>L1a>e;6Phi?9T7YoghJd&YQ|uZlUN65zLX$1b$2zDgI4Dp~2UCTH_h zxqUuJvYgSiq7|TB1%tGc)HQ0dJ`9;3Rg)fgu+fiyUoe9w9t(%(o--pKZRed~w5jY- z=E<`6^|09)GAaTypbbo${ko)Ug_j~0o-vaS=R~9D$(k{f4HrPmv)h>JF6rqVS%3}1n5y=HYn)wTHFEq2 zdT*)^VofuIFC=glX{LO)T8H@~!ko$vtxs}$ThNT^gY`wX)cQ-kaDTygPib-jw}^qCl>|>Jia83F8Qg=K zEYVLG)$F6Syy?!w$wUV(fmEhtd5D0c?BjBadRm*K%x~%X+VDu`W%~9XBiU^Uzbm(% ze27XOjW8kDE0YjADMWQ2dzm@fO_*M|tqU*@wLxTx`W%J%`ETvnRvnya!U#U!xBoQ@ ziLyQ&yFtTZ;^XHLFsIL3y?F6j9hs!Ctu4JjA@F3Ii_fj-v_ZXyH&R}=!Np22Qn}+Q zuVnEPl05MIN(EEk5}VHl4HfvStX5*iIb=j+oK#TEtKm4>gVL0jz?Ui|#-v0}3rH z!NtuE<6{Q#x*=2dGl6i6zQXftKg1({P$@;`f-49ko{9)v#{JBNDX;fwY-ue#f_Uri z6SR5ak1ep!nEzbCm2Orm;K0-BeV2M9BUU)ZILiF~T52TMA^)dbmtUI{$d55Cn(L~z z4>UB_n>}Qf`w4ZR^_vu$z^A%oXR)8jB#JlzR^OUosRWSYtr*|e=Vo7pLunkMek+ps zevkd4r{t?XTJ9}?2s0T^R$kKcg@oXIAh!EMV=;I9jEjf|@o*cUZ-|60Rehowvwqp$xwuxT$rYQ?n+}h#gnYtv&IY~`@ zJ2Ey%1C>s%F9>&!N+T|scq%EB5aT;o6C>GNSQ>PZ6gnjZ?=ZEhA~=;x2JNP9$x22x z#3oK)+|gPjqxGXirt#vL8gJhbYlJsk&3So@L>4-KxiQ;YfRaXhPF87Yal#4rg#BXmgk^uF+&#rvEIoxpsWGL1kWB#j$2KFbX7uMP7-)w>)bhU)F8HE15o>5B z{nOtJwgoBbGz@Euk8szSrU3&UzMY|Um11r@v89*MJN@&05aG1>oE5YAnDi$cO z?nZG7esQ*$(-#fdr<`UDmnDkn!hzZr)!BDRY>S6m-{K#Bi$5N9VE^xu!Fr3Eh6p)= z)s~*L4^iJ_MmaqDiUvycFb@1z^zU94M`B5wq<;!I!8Y{6-)^JWq{icVrpB9jEy+uG zFCp46Abza3%8ai~Pq`N*vF{<77S%=g<~8{~H%TzZ`LB`1`B$~*wT)UEB`6a{a3&uR z;nm@0Hy9(Jee_!07q6aL@}^;T=}6Xg?MNOQAn}`dWX}G=@8d8Q%bZ7i5M5p$;5|&n zG1q&VnKyQ~HpF-8y~QknLiDLFsbETy%x*~B?rro&qDr%|%F&~S;b&xW|21+K$|42B z-v(ASWSrlwpriGk{zV|(fekk<((_IQ9t-77m`Ga~Z|06(B&MBVKhH%R6j5J7iz|n8 z;TQ}?SCuRl^b|!vg#iC9Q1>JvGEYn@w_4n64#X`pdx3@zRuf)iq;rJQatI$#reqNM zWe`|iV=#w-^8D*o@24gHWG<%M?_estp17zf=3*RD%6to#6v#U3NG9tbwLa_)Hti9o zq@<1ud8^YSUStp^jhTJJ;LcDnX?6z9-|D2-vxJgw>LnB8B);y6@s)mw@ng4(xj_uu zj^s;&-!f$`uP3Xh4x~OL!l6O=g;*gD)6mk$4*KM=oBj2#H2V%A}Ov=E97C+93a9z5;g)hK}uvttAx1D z4+(Jt;4r>wJzT1CswnN9-W1eV1o@~WVM(qqE7W1w6ee13r%lqY4DzJ%M)LTVoBWgY zhpu_nBn;o)-Ha)rY74>QOlD{%6ts*~zMW80ECzqFn(F^C@$wJS6?Hlz<5#rzDcRL>xv;;iScp z2Pciy#(0F}QMyyQQ|S-It~U*pcR406d9$rH%$KOQ!&PyYzhExgNgUOK9F2ccOc1>@ zWMFTuK1^gojj2i{Fx0(2lOkRe+_E?-4-&G-S`@r`L!AkHGZjB>4_X>Kcc%|1J#yp* z4)o&Z$YcZKPC1Fqu0y6Vi{#>D3FA3GoALD9MNN|yF~pI^8O7oITIFR(Tpin1-u?C? zP`MjEadiN660EoEx#e!2l^QloK`x565}gr#Kj_xRc8phb#Xg-MmwsP=;xGVQ#Mt;1 zW!Dh|nO1z_A4W4Wh*$Me$~<<*Jx-PL3ZO`Fl_;LyI5^APUgNk}&%kWPWV3H9amqgY zI4Oa7p~joY-|D-%aJ#mK@#O{}cJFqOwAMrSokHouR?ZV2<5K=$d;XlGJJu{mSs-4= z*)M`%er*wL#R$ir)7v(Z3pat7vIQ*(TC@CXmxEp26Hnghy0Vv_|>ax1(fs{1y7IE8a$yvcO7oC@6e~|HTzA z-``ifhAUn<^7w)IHB&RQV^TDql%VzN{gGB(-37f;vV-~xt?I(W(H0Y~ZZwVHN)u?w zovf=oi^u0hOI4j!)1^PYPMZ36UG)|&qJ-zLCvUq%!{tofgq7x;wnPt8*tFmSwzjq+ZBFWfV~=whI|orW5UJptcm}Ij_%l4*2mA>~q4={wF0!t) zR!`dA@~r%%G`Hjz)LZy*|I&9!Flg*gQ}`9+RR8iM$yklN7eLAgxN~{@vDw!WxUfa1 zT>9hfS!ES(anU6YNljJR`_sCG*4s9mv)%=y$@3-eO$`p1(&WABju3aAm^_qo$3KFo z&qM&YJDum=^|*yfSn4zTK@X!kXAK4Vi7k< z8^^jcSiTCe%$y2Mp>9pA*d$`EE~!mg)QsI8D`hjXgn~n{2+@8cl2^1_)COw?B3Vws zm8pX@)0P(qGP-?7gyf*ub^*;<@gkL!6;(Wg0n+EA4Eso_nIiU;B&i+-s}U|c%tJJ#`TAB3un}07 zp`{lyCyKd|I}`~54Pecxg?>b5 z)bIBVtzc$ZsIE_;E1WM^)_QM~T*w&04FMXYa`yuLQn?3!jlXc#CeK?((S(5TIgeLoe{_gk#D1cybYi(xO|6pEE<5zQcn6LkzAA96M3-Mx24$ z02>e|4u({~x#vw`V_86h9GUccH?TWdi51`N6?rQ}&r#bAyUvZle!(jxqr@i9BSl`l z`-G=MnaCI%2hgO?WSSjKTe2Q`;<9$nuJ2IRHwIVo_J}s|7MN2!FW*i#%vJh1Yz(w~ zZ1o8Fzcv>}bR)lWe15xUlE-q@zL2$R|I^zR0_8j{EB^`kVF{DQ-YkZ!fVOrZK^?n8 zUmbg&T{8_=-g#=udMMHQ-Qdd3anvd||v+54H6k z?dKXRXa;AYiGi|W`1L_kgxQNOXX@bkn1}oz*PKH_=Jfpw48qsilKrGu^u$7BXb&NW zGVV>mqBeB5mDZAptfM%`@HD#e`4_1C7Thmx(+X??N-1i(F3TF8BQm9O>Z>W;Dq@aI zJX?|tMM+?V!-L~pHkmym&mdg7Hdun`3`rFBMj*2KpAx$z4cp<58BGzyyOBjkO}NJH z(?8Yc<|bmQ<8jz}IvkWZ>N21zEpzq;gI_s_gDJWbzG`S{XshV5#S(+jm&a4skr=wALlEf{kP?uRa0nGd z8bLyG2?W0h{Xb7_uT9lP@t!-fCUsLtx^;GRlx?K= zi$VnlsXdMU3YZ!yRkU(@*o~y|SS=K{JreVAs#8*m(RT-Nav* zt=oIi?_px&36c;KjZat#If9Oi$>krM5XVoM(u9l$YG<=Q6Wq(j2#HQAu+KF@FUWJ7 z0nPU4)Qm{%<_>JIbR~OmFVH^591q=?5sBM8ihW4gyan9A3qB7sHzV4hYo-HkPzT?k zqhJ70Fz`??fGHRxDv9WkPYLbYu)y=WHF1@bufS1x!~CC{?x5_-xOV}*p{wSTFl4=Lc_Sq z*B+|h!{j3ChadyX`steeIx8 z{i)K^ve&v=R?h<*sl##niw&}f0wmlB`hw(sPQz2OD|-8-Y*=5seY>zEJwNZ@?ovOg z#Ilvg^`818CZQPl)NcHq$kWSJj~n+d)P4fxl@`5lG`@L)C9#4|Emv4ZoDX8CMQTE3 zsFzP$owDgRVu$i4rSmKELp+D{hK9SN=;GD9b{@VvT-Hms!dR`J_UI|4VT~bpRWSQ@_+YG;u-r9q_|bmqI|zY6#d`D6RmH4axfwlV-QXT>#w94o4_+MY8maOu4&b2tHDXFn&S2FBHTpK8}U zre&9*WdyaM{Ca|Cuq{y--;nDbqJJxj3oBT8)k9M}WNhKW0_}sfmgGX$llu8Za~d3Y zk|jgqJ!9ppIAP~hSO`a^+l2a5MEPd%+;oTZ>{*|9^?pb%Bc z)-qzt6#g%w{MKZxx{>_r6wCblg;5q@&+{({#`b0n_=&KKqLn53FL{Lq`z6QdSD}lJ zLL1+|49xPsVuncniy5-DaDiIsxVgX_ZGO{++Ntf}lfrwa=a&T}x?1E>x&nP_p<-4c z_kfV}#wM*M4XRNd_ak;K7IxNYvzq$s0W%N5S5`gh?j|l|U|zAKsH<_z4byRtO|sc=xBs?+jbZ)SLVWKfkcxU8zP|M83iO39$*NZ)z=( z@h@&;yH81X;29HJc3)!8>g8kts}$|dLkn`q%F)*~`f3{*Up&%Ki}e)+aZkVD2c!XE z;f-QYpAHick~$x5(($GwuLqQlI3hdK_2WoVOy@b{=-JoC0gu#~5{Kz*p=+mMZOqq_ z$}i_RF|Rs9abG?QZ^r{Ht5Xwir0a@=2m*Xes?7S=n_gzGCESh~*Jjp6cFIZ=j-tRU z42x$!Bz9+ zte!^l?zRHP(GOBvvQMLK-_zflP-WpEcb2ORtvLnJqM5bf^-w{*ZvqUAcQbH3nPi&} zHeCpsK7q{bTsP~&n3eNMEVwgVRb*mYs=4Mc%rRL;1wJziI9RnvrTBo??Ikv=i3GIK z{kaC0bV~IwTG&ebaP_~G0l5_4lK&_mQi7I&=a@`gO+bnL7>>3qQeFy7{lSw*I}Z)%H%NTMunU3i1O=$usn z&FMqo-0may_(zF^Rt=#8nK+!Acb%A*CkEVn8tgxPvKGur2+U(LHQ8Y(e`#`W9mYji zp<#O#mo;x#;s37v^4~cO0_8n5^}Nc|EumI z5ZzNiEll}X8%XHkvU?_8+$%R#sEK{O9)Dhy}G(-JKV{q+5dcZ$lcxa{#f#6K{U6DSM?_nDLu!uBEa4b3(6)|rD zcZb~z2W3Pw08CR2L7t$V+d)xi1hju0PoEE9B-olGoTOtocfG!%#*?+EKlnf-8Hw7Y z0Fq3eRX`y^7tFjn?NAd0^l9}v$?kq1mu|6xGEHh_TvoQ1u&it}uInbyZaY4lndSzQ zGgdSPZ#S|I74YaanhaXK?Vg1dIZ?5n2uffOlIijmgv%omv2Oz{hZk}d355(GXV1fg z(eQG^9*@6^ODz=KsK2iw_JB$-PyxFad2q6nUs|GYmi{_KIe zu>`m=@>Vhz{*%%JIRh~GBWkuH$6bie4!)@DDvmJHqZI@Tofp`u%s?l@_Sf$LRxy-u zd|%_ij6{Vz`FXnZ`6*eP327jwkQDg)R~nU$O;hzCmYs9pLY9tDiay<|v=IJf7GB%1 zW+>Fi8AjUblZfUGBgK1e8sy5e5SZJ+7PkWa;WMN5aUaAnLzxx5E`JMlj74Wxhe5_@ zi=7@gh7Q5dONlaS4@U!WKhfjfW$QwDEg~iW$i#ODL6T?PMXA!SN*6LGPSq-2l$CK| zEzMLbIM*{Wr7~SJPLJxm`0V6a?_2LnVIoZ4p^_X)EG26L|Cy7B>W-u;7sHFpR9z2E z_27f_IY14JG@dPC(JS^~o}NZTU}eo&tc%WA5zi%j?DI> ziPsbfX)NR%%KiAff-@o&>@{w%y8?iU3-;48+vmM_Yzsk>*uPwn2I8BQ>(Yu}*esTs zbxh~|lOJM-o#C($Zvc;y0D2#lGffcR2V{(-jQic<#s`Sub7q#EImVrJiUR9%DhbYl zw}lxq&^}OUIod zmleQeEmYAU02A_9q`!U-hL=UiFRf?4g5G;`idWTfeqshu(%77hT}W~woulI!oo47* zwzS_8f3b%aowRySa~-Cg6aY+Yd|xGN2O4{!uCf}pD@Zu~|44i7r(w*bxxYS{&yJwCva&OtqB65!z<`CZSoe@txYi(yqp$klZ zqlz`qnxKX zAYv~Tn147MN;j@jN>GzRG{W)x1UYm=Gx#7}!>bt8L8b zTgEEhNEC%+YDvx&ZJ$()(I_~{cV8H43nO-y-F0;%ii?u?vuri0&D=j}%3u%1Xf z;IO+}r}9v`0kOhc`bc|o3*C1t!{AkI1-tv$5J?4%#lv*snPH$-sz62UdLagu4sM0m ztQa5cS^5rX^p|OG*SxUY$-^R|h>1Z;Y~-rRyobPG+q!l=pQ>H(z5vzf;il;RoyAV| zRHA9m(=WRtI8?SnT{67bR7qzI-x`A0WEZl{E+>b>~ zx2Y8#?Jn+fWE8)>jf8zz2XJufyTBdmj<(0V<7V(aPJkl!H&0hFmCoD-d*xzB+DPNA zK~safJgFA2xA7h26GX_LM$_4NgM~elnlnSYDx)1MULV}$byUk~!Y7PAaw-WG(~57z zRK{x8(+9H8Ls{&jfvFcVCO_kqPeN;fGnCPHyYM}w?Ufuq4fw{~ZryghExQFaXB5q| zOEdvi;Ez(#=D$V6;bP#E@h-RCE2ljybMO3xc9oj{)Tr$>yae--iT~SO4bf$4ZUwj0 zbb&j$@>;@OpjTz&Fl{w>3*W8AZ3EqRsRxz}q|))oHke`}fPXWE1+%&amPCPj`a=b8 z6B&-kIG^r$OY18nLhkJZSl2D?WZsfv25>7EA9$$h_ZSK@2R(Sh(yxMV>4K^N8vmw9 z0aC{6uN5-a3c_oM_fSpaAHPE-UrZv#R=@5YcDPuGz);AZ;J$0(x5GWa9EFJhEG94& z)mYeyD!5c7<-Ha1&A5lDt=W|ecplTynC5RsWIyL9VVlMY&E2t@Ekv-0{rFm}{!mY^ zLYcaFTe6UMzerB-@oTqpJyb>Td!d$4j2H$q_@}-lBkL@Q`#d(xDtXK}`NFEX!?H)^-I@!*@=&KC#*6cJ2zJGC1lFp#$4Oc3t#={jP87e47Gqxz!s9 zB@mPtWVlAS2B1@8xcKbR-K(r^oew@2n`cnXhfdkR+D<}BiHt)EtHVM{;QSwAUzC_W zs@pt-Z!_%N*GWC1AX|0{pWNvWvNpTwwAPK7AP*D(VB=po!ocgDX5r*?)n{38y^mY@ zfT7z@aG6|cmg>u?gUjMHeQ*(2_YSOeVOg(N36%)?CJoeW6I?M+pIaY$cy>J=<`4@e z9DIS<`-~WBUQaf-L47d(aS1OetSj2Qo62?o2i52+*4|t>8a+!W7m=#%z#aLzHsKvK zTkU;EQXE?zZ$1Lllxx~n_*z? zA#$=HL~PA2zz`|7b|`c@l04u_h;O6Es86=OYPYcU#8%# zmqBR~ya?I!_a=g2C_gsa*YG(Ur z8h*#uA>ZF-^7d7`=0*X5sh3B+d-?tMb{Ri_JGwy~-OO~oouIC!SMBQ67t^>rC8kd~ z87tVFjdNt!U|4iax_Mu^+EUwu{JT6{_Py zo>^pe*Gag!n3OyezkEe%=!i<7n!~~At@BvTn~?n3lC8@-T}I_(E|$^cs)~8}%1+U+oYb?=i}7FRw-4auT>4Eq#5&tfRAwjJw2~Qnbgycq-T&0IE8jU zq7H6<<_v5e;a@|xvHl8@=n&s^o*<#Ox!g>X&?g4$X~hL!&%dr~>EncK(O z&f}Ru7?EvP96f1HMR0F7elq{T(o-qpQHeYWGTO_5rg@tg_ds*g4O+IS&@(1v){e#h z<^og*zv?=vHV;Ri%hS;h`>N|uP)PxQ*y-1C;aBYRzwH`H{ifq}tn(F3{Ex9-?&=0x z{HF7D{N@$;`H$73{dWGtfxhVqxW)#%;&c8nHQfK*@Bg87-c-5HKlxi_{c;}q@6lgZ z`ArqOsd9ZO`rj&Ql>eymo1Ah}<@)Qzf2&|K{G-b6MeH|Ku0OK?*Fv?etj^%n(F=--0+(Fe Date: Thu, 6 Oct 2022 11:15:29 +0200 Subject: [PATCH 15/24] docs added. updated python tests --- docs/schema/elns.md | 3 + docs/schema/suggestions.yaml | 2 +- examples/data/eln/README.md | 14 ++- .../TabularParserRowMode.schema.archive.yaml | 73 +++++++++++ examples/data/eln/Tabular_parser.archive.json | 6 + examples/data/eln/Tabular_parser.data.xlsx | Bin 0 -> 10485 bytes examples/data/eln/schema.archive.yaml | 9 +- tests/parsing/test_tabular.py | 115 +++++++++--------- 8 files changed, 155 insertions(+), 67 deletions(-) create mode 100644 examples/data/eln/TabularParserRowMode.schema.archive.yaml create mode 100644 examples/data/eln/Tabular_parser.archive.json create mode 100644 examples/data/eln/Tabular_parser.data.xlsx diff --git a/docs/schema/elns.md b/docs/schema/elns.md index 7f0d7e534..2cc28c8b2 100644 --- a/docs/schema/elns.md +++ b/docs/schema/elns.md @@ -38,6 +38,9 @@ NOMAD's upload page: --8<-- "examples/data/eln/schema.archive.yaml" ``` +```yaml +--8<-- "examples/data/eln/TabularParserRowMode.schema.archive.yaml" +``` ## ELN Annotations The `eln` annotations can contain the following keys: diff --git a/docs/schema/suggestions.yaml b/docs/schema/suggestions.yaml index d445d4dae..23167580a 100644 --- a/docs/schema/suggestions.yaml +++ b/docs/schema/suggestions.yaml @@ -7,7 +7,7 @@ tabular: name: "Either < column name > in csv and xls or in the format of < sheet name >/< column name > only for excel files" unit: "The unit to display the data" comment: "A character denoting the commented lines in excel or csv files" - mode: "Either 'column' or 'row' only when using TableData as a base-section" + mode: "Either 'column' or 'row' only when using TableData as a base-section". Defaulted to column ref_to_sub_section: "List of paths to the targeted repeating subsection < section >/< sub-sections >/ ... /< subsections >" eln: diff --git a/examples/data/eln/README.md b/examples/data/eln/README.md index a9a993434..3c149fb1c 100644 --- a/examples/data/eln/README.md +++ b/examples/data/eln/README.md @@ -1,6 +1,6 @@ -This is a simple example for a basic ELN. It demonstrates the use of a NOMAD schema -to define different types of entries. Based on this schema the ELN allows you to create -Samples, Chemicals, and Instruments. The Sample entry type also allows to define +This is a simple example for a basic ELN. It demonstrates the use of two separate NOMAD schemas +to define different types of entries. Based on these schemas the ELN allows you to create +Samples, Chemicals, Instruments and TabularParser. The Sample entry type also allows to define processes. The schema is meant as a starting point. You can download the schema file and @@ -12,11 +12,15 @@ Consult our [documentation on the NOMAD Archive and Metainfo](https://nomad-lab. This example uploads contains the following entries - A schema in NOMAD's *archive.yaml* format: *schema.archive.yaml* that defines Three types of ELN entries: sample, instrument, and chemical +- Another schema in NOMAD's *archive.yaml* format: *TabularParserRowMode.schema.archive.yaml* +that defines a tabular parser entry - Three chemicals (as defined in the schema): *Copper_II_Selenide.archive.json*, *Tin_II_Selenide.archive.json*, *Zinc_Selenide.archive.json* - An instrument *PVD-P*.archive.json - A sample (*sample.archive.json*) with two processes (PVD evaporation, hotplate annealing) as sub-sections, and references to instrument and chemicals. -- A *.csv* file. This is not directly parser by NOMAD, but the sample ELN uses it to -parse data for the PVD evaporation process. \ No newline at end of file +- A tabular parser sample file (*Tabular_parser.archive.json*) +- A *.csv* file. This is not directly parsed by NOMAD, but the sample ELN uses it to +parse data for the PVD evaporation process. +- An *.xlsx* file consisting dummy data for the tabular parser schema. \ No newline at end of file diff --git a/examples/data/eln/TabularParserRowMode.schema.archive.yaml b/examples/data/eln/TabularParserRowMode.schema.archive.yaml new file mode 100644 index 000000000..4685fdc69 --- /dev/null +++ b/examples/data/eln/TabularParserRowMode.schema.archive.yaml @@ -0,0 +1,73 @@ +# This schema is specially made for demonstration of implementing a tabular parser with +# row mode. +definitions: + name: 'Tabular Parser example schema' + sections: + # Here we define a sample section which later on, it will be inherited in the + # Tabular Parser section. + Substance_1: + base_sections: + - nomad.datamodel.data.EntryData # Declares this as a top-level entry section. + quantities: # Here we define quantities that belongs only to the Substance_1 section + quantity_1: # Name of the quantity + type: str + m_annotations: + # The eln annotation allows add the quantity to a ELN + eln: + component: StringEditQuantity + tabular: + # The tabular annotation defines a mapping to column headers used in tabular data files + name: Sheet_1/Column_1 # Here you can define where the data for the given quantity is to be taken from + # The convention for selecting the name is if the data is to be taken from an excel file, + # you can specify the sheet_name followed by a forward slash and the column_name to target the desired quantity. + # If only a column name is provided, then the first sheet in the excel file (or the .csv file) + # is assumed to contain the targeted data + Tabular_Parser: # The main section that contains the quantities to be read from an excel file + base_sections: + - nomad.datamodel.data.EntryData + - nomad.parsing.tabular.TableData # Here we specify that we need to acquire the data from a .xlsx or a .csv file + quantities: + data_file: + type: str + description: | + A reference to an uploaded .xlsx + m_annotations: + # The tabular_parser annotation, will treat the values of this + # quantity as files. It will try to interpret the files and fill + # quantities in this section (and sub_sections) with the column + # data of .csv or .xlsx files. + tabular_parser: + comment: '#' # Skipping lines in csv or excel file that start with the sign `#` + mode: row # Setting mode to row signals that for each row in the sheet_name (provided in quantity), + # one instance of the corresponding (sub-)section (in this example, my_substance_1 sub-section as + # as it has the repeats option set to true), will be appended. Please bear in mind + # that if this mode is selected, then all the specified quantities should exist in the same + # sheet_name. + ref_to_sub_section: # This is the reference to where the targeted (sub-)section lies within this example schema file + - subsection_1/my_substance_1 + browser: + adaptor: RawFileAdaptor + eln: + component: FileEditQuantity + sub_sections: + subsection_1: + section: + m_annotations: + eln: + dict() + sub_sections: + # The repeats option set tot rue means there can be multiple instances of this + # section + my_substance_1: + repeats: true + section: + base_section: Substance_1 # This section inherits from the Substance_1 that is defined on top + quantities: + quantity_2: + type: str + m_annotations: + eln: + component: StringEditQuantity + tabular: + name: Sheet_1/Column_2 + diff --git a/examples/data/eln/Tabular_parser.archive.json b/examples/data/eln/Tabular_parser.archive.json new file mode 100644 index 000000000..ba018b9ac --- /dev/null +++ b/examples/data/eln/Tabular_parser.archive.json @@ -0,0 +1,6 @@ +{ + "data": { + "m_def": "../upload/raw/TabularParserRowMode.schema.archive.yaml#Tabular_Parser", + "data_file": "Tabular_parser.data.xlsx" + } +} \ No newline at end of file diff --git a/examples/data/eln/Tabular_parser.data.xlsx b/examples/data/eln/Tabular_parser.data.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..52721d866c999c187841467053ba9bfffda0e168 GIT binary patch literal 10485 zcmeHN1y`I)(jFkV4GsYU!6mr66Wj^z?(XgoAP^+DTW}p*f_rdxCpaOveRE|u_h#?+ z3wGZ#r~Az5shYQ|>nUvoX{Z;N02lxq004Lm7>GroHGu#CaG?PJ3;-OYmWZ9Lvx%*< zo{ERPiIWb!yNxw*?h8n&Yyc$q_y2qTizU#LuqM~Vh}m~4>m63m5Q^2v*vHs-HdYW9 zj;b)nGCK~E9fR07=A+e*O34oBNn^9}YCoMjwI-CEjUIgmoN!kQ7h1)^Yn-&&+6{5f z;JdOF4b4w~H_JoS1EV0*mxPnIKmjn-KCS|?DK;je55IgF(uBJ1UhutIv8U33K8Oa# z62)=;RBw2a2l6eSC0mcXGisuy=FqEI-Zt_GeMMuprRZ|C*D7$c3Gb6tD$<{JCzrUk zwNtM2Z6I}-LSr*fj)%rzTAIy1P`pH5P!aP~LN*!zC7$xrueR!Khlz1h`KYF#*D)j^ zQ^p#FXS2=%rssL1J@TTKoZN1m&ROsL35@Q5#{)q>ttuM z{l2wH^N0m8BL}hcue!3p6G^oN8QsVe^!VWo{NUw#Pv*3}E`g8eeiFkM@ye+}^~Iy` z$J4x8DDRn7rvc~6XaTTdJwHPM6#j-zkSZhT5Acc%7<`Cebm}>pSUWM$|2F;)mH)+1 z{L9wM;$-E!8IeLyB_6^Cu4h(afx^;mLJ}>nReb#=mr+5{d1Ux29h3w>m6t(KZ~fYQ zABL7zc%u&nUSDmpmPMjt@{rcMm4_zXIk>>mP&j@PcPQKFL35qCoViMQE9F7u+7?4o z+W0v~dT@;PgY~q@TLM{Mp*f{JhZBTBl{qJEX{B3 zQ{gvcfp9LFqsbJk0VhMVg$nNh>(`gJFIAPzxh$#-Gab1}JoJpMzg&o>bznSt)5)a| zD3P$D-!PB8?I+K^^wX?kJs-~S=;46xE*&@<42&fDyb5+y{|ORtdfpO#C;;FT763p6 zOUB)r!OhOm%Fxcv>Noe5uWDnL#t3}kw|Is)&j^GE2ByjZ1xu&pR8v7qMMDr%>TI}p zQwp+=r#vDOX(p+;DCF$vV`F^Rn-{l96~>D!XvjM=+HcK1ReYq`;wpc&cXzF7dDA>A zp}muQ{(j^VXL4VUp5Kpq&GEc$_dycMza-@lpZf3%Ynru))N=ZHs!ElTy`mv}*5-t@ zrI7uqHQYyX7MtPkBj{67L_kOf1Q-T{)Q`z#Hj*@4{%~DYFYZP|XGg=|nHGIXRObbO zKAzXWgtM8bwExI%erMXbW7G?+%VOUmQZWmE<8k&D8|lg%{?iAUL{^N}jSyZab0=b- zZGjq@U=-nT!v1wuvVP3%loEmy4fxZbvR1xN*D#@F&*CBqnm*+(fUT&xO-WBp2@pgj&f^{y_N-(AMJfM%Zx<|ve;^th`_QTr< z2p+*<@f#j2ORA|EaDDn~5;>jsCf?96cIPa~u#^`%F1nEI&u7DKg>vR$#b|OQ!}jAN z9Hw%H_;TnW8(nERtTno?S-TLKKWH^AH03kyHA6kbe5!EZs34{_w|FvrP6Bx$u{|?I zSyc|9y11WGEwI~tEtM~CDN_B~&_>H}y4uiolez1d*Tr;BsDp2;2k)~CP$L>$SCMGd zExF*F(?QG)V3EukM5r`~ckF%Vy>|yO=DwcNW%PN&zv%XZ`5Ldky?~W!)|u9f!Ye=v zPDwTtJQ@}0Ci7{$R~ZV8m&?Army4EIAHk?`vWWuFQmN5A^bLMpPSuM~WE3)nu@s?s zt{>%e*=n*t?-cBr93KZ#M~G*rrBrw6NZ<4J+iy8<$|<%=EUTF4vW;gezk83GiMV#; zhVe!V0{P|px@VN>~c~z9kJWgI60d4y3tkUg%^R5RgLD_-Z`(Sy#C9RuGoB7wNxNzhZXtD%z67j4d z-PPeZwGF{6C@nGHNB1>jH@fy2WCu(tas9s=QuT`?z&*ubHZUZrWC#w5Bsk+480eis zF|b`>S$uH2?6m^}=|7<+Ra|Ir2VQv#hTBU390V9@f4HQ-0`6b#2?886f&u&A{j|o7 zTJ|s^1)qkr2Tim>ktaeGmRwAC-cGFGF9$yyr~3wYe8CY-QW}>el9M-X5hN+*RMJoB z4oXnbB49P)&z4T=>lbo*X+#@Tf!Zvlv3r{l)xnmXYVkfp5$Lx2vd0%`@Yc>|ja4T) zrrj5R{2UK)Faj|?`vYA~pv8kOwgx1$M`n(8{6=>_l!hq31A!V@k#y4wR1G&x%OhGlHN*QJ zRUj*5HFHM5=MZE&d%9MRB6TX$+jQxl!^BlmW|M@A|IR@&x>9rXONNq#BwhLe9eeq1 z-M(N=p8+smU1=ga)1bJ>{&Njxl%I+{x6-y`z4tu3)RZi8Qw&+*2#@7W@L9BY%E?_~9HtCyp41@}%L$)ezur1@;Zcus zh95d|MD=b=7u%T(R{JQrq@4Cf${SBo4NnrMN@<4c+<}QTeNIz_2(3U3!DUAW-n`2a z;J883&{XTl1Z+(!IaAl6Du@$ z%P)wH-A7Z>h+>32w%uAo`|aS9**kb_8ek2)mD-nM=ucWCpL5#+lbrRIcvPR!L&^nH zZZzV0EZF78-LFfzy*CZDG|Wa&G5W_;kd3Q6l^TgiM~L}cFUK*&a)`nPc}dS&`>*ad$7a3n5E7^jWvpaST>p1S3vUkjHn>TTI zkx?F`KvG+IZm{7(9Xv6G)++3b9~|<$vaNX{J`B&V4}7Bz%!Q+Q^+p3cVe_h26N^3v zuptU#cE(lC0~xi5^=9VD6ehBTgY2N}+TUI5+DsCtu-ZX7=~;PP&|9F;q`Cz(RbIAw zWU<1|l>2LC)-s4jy$K(Re+oN7Xw>0CRA6IP6KminIC|ZJuwh>O60ah6@4S?FWo3dz zAAP^s3w~1P930a8xkKj!g9Y5BP8&TJxOIJXT92%|7Bu!sXInEpp4 z|7qtQ#{_EQHS;)U3hmWmppL43aOm0b(7DKA$uhlC5LXS&+S7plQxx+s$O+`c&cN&sVy{x!V}$rotT#RoKB9v&;*eemF$ARlib|>oT2f>oeuo)a z^mz4mkp7ym^r1l))PRr01pfbpIrA?tk01W9M2s1Fop^^5?h_T^q?WFjW;wl}qFR)p zf#FS%=kE}pT{zR~g({aGkwBj-#IBZg`W&!zyV7Z11GSe}?lcsifd09FPNnVZx5}*R zHh6OW@MhrRZ0r=y*$>kFcqO^7n?cH*$EvIq3&?3GXul0Qj$oWN}ATk6e~a|qK*j|V!_ zeE!kCH@X@v5sk1&GK%vJ$y9HQ6?oJG`^E&Ec+s9)F46DJgEKO;uXxn1ZFc?K^8J;p zUDP!n%ib&GFw2K-v2H!SKF0Q&e^gUAq*=H8BtxpB8BJ9dp?E&m6NMR8ze2>cK%9aU z7BpcTMbLEFnJTxA=@%usW)onk@S?O_biLrKMiunISiGHY^ry5# zefWl98R`zxChHL)i0W8)W7&VV(FsD6nq z(k$$L0>YrxWNg|3rE*--xEtL-Lj^A%=GUZ0$ca*ZUd65Q8%!UCk)x>A)Jm$!bE>zx zYlOo?_LWWuUoU<)$YkPqwdpl^O@)r`Yko`qxQElgfIab!LUad4L&P@wnYl?u-&=jc z>HD`%%SmFmAWg3VJ*dyx{g;Q2njQ6%MG)q!fr*9n^W2Atwy_4t3U@O(Eo!H#!`Q3i zdOEdDge5T{bJ;3>RiQ*k&Ff-_l|t0~+TCFprbT9lbJ>`BTHSNf$GQp)Kdcl)B^?_> z=|e!4ZcU3=#$Ed#FjkR*gxcb$#}s3cOOKS4Wzpe^4Q*h{Zq39I)jVjd0QmQwV`~gper0?q)ne!8M^dr>qm9E?kdj@6dA84~@ zIEWcC`MjRazFmZM@cI4NzwOqmX#&wm+rpA7E1%57=siC^;;ia*Je?h!b9|eoJn!o1 zc(@%->UciqzO33@#$ssq`TqS|JkFZW<(_;D;o=ejVe;AQi0ZS74Wm(?!vm1ITEqukE>*`}O9bkD(5<{3RtwEU=S^fEQYyu(p_h%KTyfpF z*Gp1|&(7>KWme)Znj%^v#y?t}&`Xs8uJ>57)i#wDqN_}Hk>8_76moV>DtAXR$c*NGU04|kam2QPjfUA3v~{%keF=WR@C0Dccf z-|*s{zF~8E#UmvyQ6@IR_DJJxW=Fryw-gl&WTKw$sW0w~-ULhUa)({M6zV`g+CsU1 zrPIS6C+)aD*%XP(@{!MVK3aj-X;CI_pvRX2<+a#Hw;)Lz@0bN1Q9cm@g&L;i2Kf4} zM^lnU=xxi?1j_I`ldpu;tx+j$L^#fYRgor(y!G>-(FhhE?u+8Y#THy;=cqnhuLgc6 z^>G81;k~8_H4^c+7xZ z9k;*0^R~nzzG}D{1u9wv@*0G~At2$b1cqZhX^Uo*YFeKpq|K+Xqe5YdziY#Y-;VAb zi>RS|a2tB%2H^w)9iw%ApTM0)WwIw4Pz!%e^v255JahSreO#H-tcR+T?##{eeTnIm zE`O!TSF4l@&A!X4OCigYR_}lziqr4=newgGJ1nmgPKKd-TZG+wUbId`!7_ecQf#Jd zvV{0ba+AuI{AM|Uly1v`LV%KaF=iY|Q^aKDOZt16OBHt(ifp6&Fc(^^tnu=DxeNE0 z=qWj~rn2VLVnpb;TX$;hXptnT!hKD?)7A3nL*IQBH;p71s)Z?B0~u3%MHgH}JrrR~ zU9(`_g%tG(XY~n%gQOfyafclQMjI?F_noVCv9_Hsp+%ukQ-Xv&Bdnsjl-8&Ugs&;$ zp7AWjDG&;V3@pSk&dLkT4jz^+7A0+B`E;aF&b85ulZ=5l8*g|~oD3AB>*xrIH;TmX2S*>m{_Hn!C++mtXr?qf$l^b30Br|Pq z+zdY9;nakQCU9%V%MHOWeN6UB=%Lf2^Gif#ogm1yp;hw-@@sCT3epm#IRKq`2HcrD zNi9NGa1}G#$wm3u$Sgw+ByDP7|%9x>35Oo8kz#zM>#sj3M=|16!aMnsn(5YpGc@qM!KvK zW?79^x$gJ1wTer1EN|eD z`*9X2?JEe|C6Q9yP2vwEuqGIe+dp=~fO=4w&dKSzJEwL>==f)*Jmqc?&B+PK?n*Iw{; zI7K7QxCaeoYuLD2hM0o*s9vu{u2fAB@Se4iBBLzM*eV=yAb=bj zwBRhsO?>k-KEozT@qBK;f6RU)rZv*vq4qIu2V7gW)Ia}Y$+J9BNM#!wW-B5C0N~GW z*|d|hhqZ~*?*&h_m6$wspkL)QxZwHKP(pu;YC9oTjQg9W>SFn{8mx@rjG%EkR#VBd zml-k9QD6Jv@vT9%8E?n!2Q!?kW-0DUZBTA828TOwMfYJVwP>64vVb15-7a^zsY|D^ z@Tk-heDw~s5 z6SE|r2p#<%3j@SMJW~4b?5`s8=)>#op-h;Jt@S#D|5Cj|dFNuRrCHIMwsV?3xn}uW zpn8a05xtb>K}M$3c8Nurk^rD$EumoGwykNph=15l!vBFOpZL4vP2aVg576R(_KAkk z=fqKw9b}u)$~F5k&V%9&&av8FqGa}qiVp4VXzPSL*QK6O=1B&!xw6e}P=aK&YHml1 zSxE>c>ZZiGV*l+_p!n+@Rip5i0ae<9maJ+nFtB^Aeq4ma)NCpffT^|nug4jNMZ{6! zkiM1nP4ny}{w0L>+_9e!2c?liX{wjD)Hq;oT5Re&NIbsSBVSI`3dTQ|1%8at=&M zz1Pt>m#!9E49lyvOSnOxH=NMtyno@mvp{Zdh*`|x(Cm4n&qaQ6I{%2yy=x7s^123j6(7x)=8t8@vlqxF$HF#|Q&j^Rd8sn6joJO^z~4a5$OH^>tih zday5!A7PGpJ*s51OqC(C$v;y#Qcx@@pSrk--G64S zF>YO-6D*5rc~`BzR2$8{bU3ODTc5sII(JgshHjI&XcSH0v^=gXCF_tkT}V0nj&`ww z{Pd7mmKyY9nTC8?{ajtyH07ITVqqVG(9+25U=3SBc+(*@Xi%Rn)!0ydcb}KPVrm{2 zO4wS5IYq&;!aadWi!s1tSD#~%%akwN^a_>>qniQxUBv=Xe%d3Ru}pnV)Pkk3^%D|7 zyZ}_{U{(Oj%~pq3%!99WP*{Z9oiri_u6ITrdc^(M)ZIth%$o7EkC!89a_-v`=56P> z7R{^H%u>Q}Pdur9euj6){VvH`f!|sa8A(eow!D4gvh3=52nj>?>a$3`9918)Mh(&+ zgeS>Ltjij)UznbgeXpT{<2ZMI#59>0xY`d=W&o)7Pe-;TZQ2GQOj;{ z3ieweGIlKb;Vo6?JCuM)So1OQ`K5Vc=@0Kvj+fMIql1hvzZK&7v~>Hw-*RIZ#6oh_ zOk`c?P4F*qnJRXY6cAX|)uk4qRUyrPr!Yw*=(#;651mW?K~;Tnc5&ki?kwF8NHmli;b2*b{1os;!Wn3CBX> zzBo4y1f-^K(6z1lQ->*y7=`Ju1fs{^TZrLy5fa`ScdxJqSk`;xdadX0%$L%f?|*B1 zQWH#6VUy74RV6s%a8mO|Uyrw`z{-NTVf|tT!H;<6;Xmf%{c_*#obPe8y`iJmZ^Y|9 z&BM;G_Vn29oaprWisPd6DfzzQ0q38@`Yi%BBL^&2DzI45z}0SJJ0k^0J9{SvBRfZv z-z+V-2m8MwH(1|-<2vQG7=fXuw8uQ6?L{A^umfgbBE-@KH6tqCH{y9MzC>TDDGENB z+Tye@SCjC%7uNcdbHK+Yvd zJU25&g!rwo?v=0C;g`J7Sg|;G?Pl_Y*W7Xzu{`FY$)X_v| zUG*wHqWz$_*Km)G~Q1p1M?PkFwhHL!=D52l)! z53a6V(<;vz|2saT*8?K|iU2mx(%SmCW~H!by4c4YVu?z^zOf@BPH9mWI!OMWwH0#( zj8+DU2TPka`}|DN^q-9SK$-FlS0kKbMGMydsoU$L-0JZwUQ zA+s74PWcZ9UlkDi;JDZ2)GC`3O;idSDNLBpyAm7u4^?>a08 zdl6Uv_9+mMbYTDS&sW3#C6s^7|8P-ELHh3u{(gJqUxdHSnc%94`XPtsnZKS+NKHGbvr zcR%`*9snSC4FLRySN)az@8;^S&S1{r4Rvq;A>NO6u*7@Kh)l0 A`v3p{ literal 0 HcmV?d00001 diff --git a/examples/data/eln/schema.archive.yaml b/examples/data/eln/schema.archive.yaml index 7e36c0cd2..09514f70a 100644 --- a/examples/data/eln/schema.archive.yaml +++ b/examples/data/eln/schema.archive.yaml @@ -1,11 +1,11 @@ -# Schemas can be defines as yaml files like this. The archive.yaml format will be +# Schemas can be defined as yaml files like this. The archive.yaml format will be # interpreted by nomad as a nomad archive. Therefore, all definitions have to be # put in a top-level section called "definitions" definitions: # The "definitions" section is interpreted as a nomad schema package # Schema packages can have a name: name: 'Electronic Lab Notebook example schema' - # Schema packages contain section definitions. This is wear the interesting schema + # Schema packages contain section definitions. This is where the interesting schema # information begins. sections: # Here we define a section called "Chemical": @@ -122,7 +122,7 @@ definitions: section: # The sub-section's section, is itself a section definition m_annotations: - eln: # ads the sub-section to the eln and allows users to create new instances of this sub-section + eln: # adds the sub-section to the eln and allows users to create new instances of this sub-section # We can also nest sub_sections. It goes aribitrarely deep. sub_sections: pvd_evaporation: @@ -152,7 +152,7 @@ definitions: # The tabular_parser annotation, will treat the values of this # quantity as files. It will try to interpret the files and fill # quantities in this section (and sub_section) with the column - # data of .csv or .xlsx files. + # data of .csv or .xlsx files. There is also a mode option that by default, is set to column. tabular_parser: sep: '\t' comment: '#' @@ -213,4 +213,3 @@ definitions: eln: component: NumberEditQuantity - diff --git a/tests/parsing/test_tabular.py b/tests/parsing/test_tabular.py index 2a9a75829..f09cb82c4 100644 --- a/tests/parsing/test_tabular.py +++ b/tests/parsing/test_tabular.py @@ -41,45 +41,6 @@ def quantity_generator(quantity_name, header_name, shape='shape: [\'*\']'): return re.sub(r'\n\s*\n', '\n', base_case) -testParamsColMode = { - 'test_1': ['', '', quantity_generator('quantity_0', 'header_0'), 'header_0,header_1\n0_0,0_1\n1_0,1_1'], - 'test_2': [f'''Mysection: - quantities: - {quantity_generator('quantity_0', 'header_0')} - ''', '''sub_sections: - my_substance: - section: Mysection''', '', 'header_0,header_1\n0_0,0_1\n1_0,1_1'] -} - - -testParamsRowMode = { - 'test_1': ['', '- my_substance1', '''my_substance1: - repeats: true - section: Substance1''', 'header_0,header_1\n0_0,0_1\n1_0,1_1'], - 'test_2': [f'''Substance2: - quantities: - {quantity_generator('quantity_2', 'header_2', shape='')} - ''', '''- my_substance1 - - my_substance2''', '''my_substance1: - repeats: true - section: Substance1 - my_substance2: - repeats: true - section: Substance2''', 'header_0,header_1,header_2\n0_0,0_1,0_2\n1_0,1_1,1_2'], - 'test_3': ['', '- subsection_1/my_substance1', f'''subsection_1: - section: - m_annotations: - eln: - dict() - sub_sections: - my_substance1: - repeats: true - section: - base_section: Substance1''', - 'header_0,header_1,header_2\n0_0,0_1,0_2\n1_0,1_1,1_2'], -} - - @pytest.mark.parametrize('schema,content', [ pytest.param( strip(''' @@ -189,7 +150,7 @@ def test_tabular(raw_files, monkeypatch, schema, content): type: str m_annotations: tabular: - name: sheet_1/column_1 + name: column_1 data: m_def: My_schema process: @@ -259,7 +220,10 @@ def test_tabular(raw_files, monkeypatch, schema, content): data_file: Test.xlsx '''), id='w_sheetName_colMode') ]) -def test_xlsx_tabular(raw_files, monkeypatch, schema): +def test_tabular_entry_mode(raw_files, monkeypatch, schema): + ''' + Testing TabularParser parser. This feature creates an entry out of each row from the given excel/csv file + ''' _, schema_file = get_files(schema) excel_file = os.path.join(os.path.dirname(__file__), '../../tests/data/parsers/tabular/Test.xlsx') @@ -279,8 +243,23 @@ def test_xlsx_tabular(raw_files, monkeypatch, schema): assert len(main_archive.data.process['quantity_2']) == 6 -@pytest.mark.parametrize('test_case', testParamsColMode.keys()) -def test_tabular_normalize_column_mode(raw_files, monkeypatch, test_case): +@pytest.mark.parametrize('test_case,section_placeholder,sub_sections_placeholder,quantity_placeholder,csv_content', [ + pytest.param('test_1', '', '', quantity_generator('quantity_0', 'header_0'), + 'header_0,header_1\n0_0,0_1\n1_0,1_1', id='simple'), + pytest.param('test_2', f'''Mysection: + quantities: + {quantity_generator('quantity_0', 'header_0')} + ''', '''sub_sections: + my_substance: + section: Mysection''', '', 'header_0,header_1\n0_0,0_1\n1_0,1_1', + id='nested'), +]) +def test_tabular_column_mode(raw_files, monkeypatch, test_case, section_placeholder, quantity_placeholder, + sub_sections_placeholder, csv_content): + ''' + Testing the TableData normalizer using default mode (column mode). This feature creates a list of values + out of the given column in the excel/csv file for the given quantity. + ''' base_schema = '''definitions: name: 'Eln' sections: @@ -301,13 +280,12 @@ data: m_def: My_schema data_file: test.my_schema.archive.csv''' - test_params = testParamsColMode[test_case] - schema = base_schema.replace('', test_params[0])\ - .replace('', test_params[1])\ - .replace('', test_params[2])\ + schema = base_schema.replace('', section_placeholder)\ + .replace('', sub_sections_placeholder)\ + .replace('', quantity_placeholder)\ .replace('', test_case) schema = re.sub(r'\n\s*\n', '\n', schema) - csv_file, schema_file = get_files(schema, test_params[3]) + csv_file, schema_file = get_files(schema, csv_content) class MyContext(ClientContext): def raw_file(self, path, *args, **kwargs): @@ -325,10 +303,36 @@ data: assert main_archive.data.my_substance.quantity_0 == ['0_0', '1_0'] -@pytest.mark.parametrize('test_case', testParamsRowMode.keys()) -def test_tabular_normalize_row_mode(raw_files, monkeypatch, test_case): +@pytest.mark.parametrize('test_case,section_placeholder,ref_to_sub_section_placeholder,sub_sections_placeholder,csv_content', [ + pytest.param('test_1', '', '- my_substance1', '''my_substance1: + repeats: true + section: Substance1''', 'header_0,header_1\n0_0,0_1\n1_0,1_1', id='simple_1_section'), + pytest.param('test_2', f'''Substance2: + quantities: + {quantity_generator('quantity_2', 'header_2', shape='')} + ''', '''- my_substance1 + - my_substance2''', '''my_substance1: + repeats: true + section: Substance1 + my_substance2: + repeats: true + section: Substance2''', 'header_0,header_1,header_2\n0_0,0_1,0_2\n1_0,1_1,1_2', id='simple_2_sections'), + pytest.param('test_3', '', '- subsection_1/my_substance1', f'''subsection_1: + section: + m_annotations: + eln: + dict() + sub_sections: + my_substance1: + repeats: true + section: + base_section: Substance1''', + 'header_0,header_1,header_2\n0_0,0_1,0_2\n1_0,1_1,1_2', id='nested') +]) +def test_tabular_row_mode(raw_files, monkeypatch, test_case, section_placeholder, ref_to_sub_section_placeholder, + sub_sections_placeholder, csv_content): ''' - Testing the TableData normalizer with mode set to row. This feature is to create a section out of each row in a + Testing the TableData normalizer with mode set to row. This feature is used to create a section out of each row in a given sheet_name of an excel file or a csv file, and append it to the repeating (sub)section(s). ''' base_schema = f'''definitions: @@ -357,13 +361,12 @@ data: m_def: My_schema data_file: test.my_schema.archive.csv''' - test_params = testParamsRowMode[test_case] - schema = base_schema.replace('', test_params[0]) \ - .replace('', test_params[1]) \ - .replace('', test_params[2]) \ + schema = base_schema.replace('', section_placeholder) \ + .replace('', ref_to_sub_section_placeholder) \ + .replace('', sub_sections_placeholder) \ .replace('', test_case) schema = re.sub(r'\n\s*\n', '\n', schema) - csv_file, schema_file = get_files(schema, test_params[3]) + csv_file, schema_file = get_files(schema, csv_content) class MyContext(ClientContext): def raw_file(self, path, *args, **kwargs): -- GitLab From 1d93a9c18dc2d914d7385ece4a66a4dbc5529e7e Mon Sep 17 00:00:00 2001 From: Amir Golparvar Date: Thu, 6 Oct 2022 12:36:34 +0200 Subject: [PATCH 16/24] fixing pylint --- tests/parsing/test_tabular.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/parsing/test_tabular.py b/tests/parsing/test_tabular.py index f09cb82c4..0bf1ad893 100644 --- a/tests/parsing/test_tabular.py +++ b/tests/parsing/test_tabular.py @@ -303,7 +303,8 @@ data: assert main_archive.data.my_substance.quantity_0 == ['0_0', '1_0'] -@pytest.mark.parametrize('test_case,section_placeholder,ref_to_sub_section_placeholder,sub_sections_placeholder,csv_content', [ +@pytest.mark.parametrize('test_case,section_placeholder,ref_to_sub_section_placeholder,' + 'sub_sections_placeholder,csv_content', [ pytest.param('test_1', '', '- my_substance1', '''my_substance1: repeats: true section: Substance1''', 'header_0,header_1\n0_0,0_1\n1_0,1_1', id='simple_1_section'), @@ -326,8 +327,7 @@ data: my_substance1: repeats: true section: - base_section: Substance1''', - 'header_0,header_1,header_2\n0_0,0_1,0_2\n1_0,1_1,1_2', id='nested') + base_section: Substance1''', 'header_0,header_1,header_2\n0_0,0_1,0_2\n1_0,1_1,1_2', id='nested') ]) def test_tabular_row_mode(raw_files, monkeypatch, test_case, section_placeholder, ref_to_sub_section_placeholder, sub_sections_placeholder, csv_content): -- GitLab From f3da5a6ae533ae59c0fc225850ff90196343aa38 Mon Sep 17 00:00:00 2001 From: Amir Golparvar Date: Thu, 6 Oct 2022 13:44:49 +0200 Subject: [PATCH 17/24] fix linting --- tests/parsing/test_tabular.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/tests/parsing/test_tabular.py b/tests/parsing/test_tabular.py index 0bf1ad893..9a0e1a09c 100644 --- a/tests/parsing/test_tabular.py +++ b/tests/parsing/test_tabular.py @@ -305,10 +305,10 @@ data: @pytest.mark.parametrize('test_case,section_placeholder,ref_to_sub_section_placeholder,' 'sub_sections_placeholder,csv_content', [ - pytest.param('test_1', '', '- my_substance1', '''my_substance1: + pytest.param('test_1', '', '- my_substance1', '''my_substance1: repeats: true section: Substance1''', 'header_0,header_1\n0_0,0_1\n1_0,1_1', id='simple_1_section'), - pytest.param('test_2', f'''Substance2: + pytest.param('test_2', f'''Substance2: quantities: {quantity_generator('quantity_2', 'header_2', shape='')} ''', '''- my_substance1 @@ -318,7 +318,7 @@ data: my_substance2: repeats: true section: Substance2''', 'header_0,header_1,header_2\n0_0,0_1,0_2\n1_0,1_1,1_2', id='simple_2_sections'), - pytest.param('test_3', '', '- subsection_1/my_substance1', f'''subsection_1: + pytest.param('test_3', '', '- subsection_1/my_substance1', f'''subsection_1: section: m_annotations: eln: @@ -327,8 +327,7 @@ data: my_substance1: repeats: true section: - base_section: Substance1''', 'header_0,header_1,header_2\n0_0,0_1,0_2\n1_0,1_1,1_2', id='nested') -]) + base_section: Substance1''', 'header_0,header_1,header_2\n0_0,0_1,0_2\n1_0,1_1,1_2', id='nested')]) def test_tabular_row_mode(raw_files, monkeypatch, test_case, section_placeholder, ref_to_sub_section_placeholder, sub_sections_placeholder, csv_content): ''' -- GitLab From 459b701a2d9fad0ee881172fdffed26b79d1b80e Mon Sep 17 00:00:00 2001 From: Amir Golparvar Date: Thu, 6 Oct 2022 14:29:51 +0200 Subject: [PATCH 18/24] fix linting --- tests/parsing/test_tabular.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/tests/parsing/test_tabular.py b/tests/parsing/test_tabular.py index 9a0e1a09c..8fd559f3e 100644 --- a/tests/parsing/test_tabular.py +++ b/tests/parsing/test_tabular.py @@ -303,12 +303,11 @@ data: assert main_archive.data.my_substance.quantity_0 == ['0_0', '1_0'] -@pytest.mark.parametrize('test_case,section_placeholder,ref_to_sub_section_placeholder,' - 'sub_sections_placeholder,csv_content', [ - pytest.param('test_1', '', '- my_substance1', '''my_substance1: +@pytest.mark.parametrize('test_case,section_placeholder,ref_to_sub_section_placeholder,sub_sections_placeholder,csv_content', [ + pytest.param('test_1', '', '- my_substance1', '''my_substance1: repeats: true section: Substance1''', 'header_0,header_1\n0_0,0_1\n1_0,1_1', id='simple_1_section'), - pytest.param('test_2', f'''Substance2: + pytest.param('test_2', f'''Substance2: quantities: {quantity_generator('quantity_2', 'header_2', shape='')} ''', '''- my_substance1 @@ -318,7 +317,7 @@ data: my_substance2: repeats: true section: Substance2''', 'header_0,header_1,header_2\n0_0,0_1,0_2\n1_0,1_1,1_2', id='simple_2_sections'), - pytest.param('test_3', '', '- subsection_1/my_substance1', f'''subsection_1: + pytest.param('test_3', '', '- subsection_1/my_substance1', f'''subsection_1: section: m_annotations: eln: -- GitLab From 0a1b48cf8aa94e6259e2c3b530556daea8812074 Mon Sep 17 00:00:00 2001 From: Amir Golparvar Date: Fri, 7 Oct 2022 17:18:25 +0200 Subject: [PATCH 19/24] updated docs --- docs/schema/elns.md | 15 ++++-- .../docs/tabular-parser-col-mode.archive.yaml | 33 +++++++++++++ .../tabular-parser-entry-mode.archive.xlsx} | Bin 10485 -> 10407 bytes .../tabular-parser-entry-mode.archive.yaml | 18 +++++++ .../tabular-parser-row-mode.archive.yaml} | 45 +++++------------- examples/data/eln/README.md | 14 ++---- examples/data/eln/Tabular_parser.archive.json | 6 --- tests/data/test_examples.py | 12 +++++ 8 files changed, 92 insertions(+), 51 deletions(-) create mode 100644 examples/data/custom-schema/docs/tabular-parser-col-mode.archive.yaml rename examples/data/{eln/Tabular_parser.data.xlsx => custom-schema/docs/tabular-parser-entry-mode.archive.xlsx} (54%) create mode 100644 examples/data/custom-schema/docs/tabular-parser-entry-mode.archive.yaml rename examples/data/{eln/TabularParserRowMode.schema.archive.yaml => custom-schema/docs/tabular-parser-row-mode.archive.yaml} (61%) delete mode 100644 examples/data/eln/Tabular_parser.archive.json diff --git a/docs/schema/elns.md b/docs/schema/elns.md index 2cc28c8b2..923c12945 100644 --- a/docs/schema/elns.md +++ b/docs/schema/elns.md @@ -37,11 +37,20 @@ NOMAD's upload page: ```yaml --8<-- "examples/data/eln/schema.archive.yaml" ``` - +The following three schemas contain commented examples of how to create entries based on +data from an excel or a csv file. +- The following sample schema creates separate instances off of each row from an excel file: ```yaml ---8<-- "examples/data/eln/TabularParserRowMode.schema.archive.yaml" +--8<-- "examples/data/custom-schema/tabular-parser-row-mode.archive.yaml" +``` +- The following sample schema creates one quantity off the entire column of an excel file: +```yaml +--8<-- "examples/data/custom-schema/tabular-parser-col-mode.archive.yaml" +``` +- The following sample schema creates one entry for each row of an excel file: +```yaml +--8<-- "examples/data/custom-schema/tabular-parser-entry-mode.archive.yaml" ``` - ## ELN Annotations The `eln` annotations can contain the following keys: diff --git a/examples/data/custom-schema/docs/tabular-parser-col-mode.archive.yaml b/examples/data/custom-schema/docs/tabular-parser-col-mode.archive.yaml new file mode 100644 index 000000000..2496ef580 --- /dev/null +++ b/examples/data/custom-schema/docs/tabular-parser-col-mode.archive.yaml @@ -0,0 +1,33 @@ +# This schema is specially made for demonstration of implementing a tabular parser with +# column mode. +definitions: + name: 'Tabular Parser example schema' + sections: + Substance_1: + quantities: + quantity_1: + type: str + shape: ['*'] # Stating that this quantity takes the shape of an array + m_annotations: + tabular: + name: Sheet_2/Column_2 + Tabular_Parser: + base_sections: + - nomad.datamodel.data.EntryData + - nomad.parsing.tabular.TableData + quantities: + data_file: + type: str + description: | + A reference to an uploaded .xlsx + m_annotations: + tabular_parser: + comment: '#' + mode: column # Here the mode can be set. If removed, by default, + # the parser assumes mode to be column + sub_sections: + my_substance_1: + section: Substance_1 +data: + m_def: Tabular_Parser + data_file: tabular-parser-entry-mode.archive.xlsx \ No newline at end of file diff --git a/examples/data/eln/Tabular_parser.data.xlsx b/examples/data/custom-schema/docs/tabular-parser-entry-mode.archive.xlsx similarity index 54% rename from examples/data/eln/Tabular_parser.data.xlsx rename to examples/data/custom-schema/docs/tabular-parser-entry-mode.archive.xlsx index 52721d866c999c187841467053ba9bfffda0e168..67381a3364711b193aa9cddee89538882ad5aa3e 100644 GIT binary patch delta 3254 zcmZ9Oc{mjQ7RIe%FvwW57lx26JELUJz9h0I#DpkP%wRBf8rhp6Tg;4v>`U23WEuOC zrR=gzvTxV#-sibJ_nyDL&vV}M*ZI8f_w1S#o0StU!J-{2^pz;d$hK+7$e75;$b!6Y z1o}Mna`Ex;k_v))7g*W*WJ`lWFjeR96XP#b;9Ewbmo>4&#uizhrYsiWo;dFKc+dAm zr~CF=df9GS?|{Mz5BM?1P{&tm22a#1CZx@JnKdd?Zei~(;DU4Ve6Yg|$waXEA|fuf zZ-5>O8z}Bc;+Lwvt$~jWl7;6!e&msF$!lwkf_ms@$(aoStO0_C@8&EtY>gzvzGgEP zmM)~ZPBmLyw$2L^OfB|BzH{PA&>l^zGN+6w$N@{YOqADiO?0n=>R)#iS1ZVwjcR^; zlv9$9264HuJE&XVetnasMs=O*Eds*?^KP4i#b-IK>REl272`I0FPB?=DL?6iP#sCS zMO#15nbWyo`&vWpZCjJoAgtKx4BZ10Zrg)ZI)m}e*0*VAR3{pnpuqY@FlTX~)+%>F zSxd6Hd+gN+l#Z~ng*+zD{{UncERYOU_u-1RBG9kXRY>hGo>*}!OX~HE001uvqWfUm zrzP{A4@KK##vR|t&z*`lUy7_*;2MrV9hRwlfJ1@~lubnF1E0JsqG2fdRfOi6K)d%X za~_9bbYiy7!Uyyq->5uC9xcCK$`RyPhuGE&PS=KSl)D{b&oFzNGP@@P>9W(o(3Pbo z*MO-;l@E}FO|(_r_+D9KVpXAWJqNYU?)ftf|rdGrUWcV~=Gl}7I@`GHl!jRA>;#jweWWw z7Hy7$8ECrrfV$uLHa%)@mVTw}&lUPp@PY)I0_NsbTr-m|dccw_QiKg88pG@rOEC%7 zvc7(s)c!BuRib|m2atxNaK~k<9jooC1d8;93(CYF))0 zB^6$N$#i0p)$b5X=hK_H>S;64;c6wE9NJ**&ph~S(Ki!OPcUNP)kkBlICsj0` zFrs8Yph`7>3I!ln(-aVOVK8|*6r4c}mO7P~EzKOYb$ypNd5bc`LJw3ixhcLjV`)`W zY)8m&qIP|(n(d8ken|i(>zM`SYty*`&-eSKDdVyuJQ_ud)Ge)27Zy*6N` zjDF0zVk~2>lHw5#W~kI;o_TlGxvb2&tMq5;v|&9f?xrr%g;6HCrR+s9O;+yM9F^IW zy<(nu0NiFLD%+h#8=booZFHUOzM|lfN|?ZF>AIJ_I}7W)A9$khlhO=4j8<;)@TyWd02Y;bJwwCVNgMPHBeM~ zc%CGxnrs9c1(IyL@g3@LX)4DJ>Ctr4Hq1D$*WBI|XePo@O9XOnzXaAR%!yKVWQxIm zUx!&cd=;IW9dQ7$3_=>Ui|ArYF^(3sZ_ zEslbB@pspsbaJ43J=$dZNlW>G&qEs9gHvo{xBu+JS0CtqbiBi$qoNw}6B!!~5Ahep zD!7kS)p$+H-=t3Wg;Vpq*%ogbvfc@@Z#Sd!kyV365we964T7?Nc>W6d4)U{XJ!UYO z`aBP1(G`BM%yAk+Bf4SZK>T_P2~k>GJ64<}LzOXo07M#l!HO=BxA_NoE)<-Z1!kCZ zK|NvX%@MD}Y=mDIYb37J61!PgdZecMh}PQsE_C@L{%AUoL0`c<7|9++U|QP8I7fA~ z5j`qsmft`D-Kf;d9tDzCm0~MNTS*aI!WzgT(4>Otfcy3z=fkt5E{<4qYi=;|7xov{Y`2Of)knykS{ zN=b^&Ew7Oyx!$gqzB;uVWxRS`ws&xEtbf!+n7ceYtMbILdL_KzKh)EE8W6-p`r0Qu zeU|XrZ1BDhHd!EZS7x>d;_ff95E+ntZqKRV0^Ad4-)6h{%1-aqD`!TSqw~ZSXB53e zs8srv<^6${)m84?ML#{7Re9jOF-ZM`RKRtfN+Dorc<9_=tbcb}Tv=GCxY7Yu`Op=A zD58yH<9Ft89L2cOtPMR`pZxS`YUK<;EC4UuiikheqRLXoY0m$`$|#&|8FLqdd!*{e zcKjKV>@hIE0))YOfDP>aso|SE9eZ?qv1eyAtr~CWr)ZO0g=e3KBrLY~?1q_!Bb zo=mo%np`=gRrs*e#F6KD+cQKvd9z&;RQ+q4;d6=i^45H&>XmaJ+2SG%#$`VTKU&Pj zFHTIcd@+(8_GYqND~UkkOTrUAr4e?!5D>erGJRq!>{pH8)4(#@O*Q03oN2%qtYSGT z@aoD)NTmU53`ORbR%LD$cs{Fxy5ZYgV?%suZ%Q#fUQN)_o}G2riC^ zzh7Yrsz=o*wjOKhu!m?jFemoM9~M#OpG@+R6PDUk@>^@4(JUnaEq|%Po#f|X+N96% zYbmcgft=#q7%*4)Igb;BdfyT-Ysj)Nv-ZWw9>8AURP3Fy({r+wT93OjP%pJ6G^Fd# z@hbC2&w&Fy+C2#ULy;>K^`nL>^vVwQ#oP7Jcu1RDe&>Su>k%Ki-%7qUu=4N^}1M|nmVf};|3rm+kI(&IditMHpnY`o$a=K zpHyZBn*=2`N8x$X!$2(dB;JjsaHPVIJx=2@ zT*P}hy1NYccsX+$agDkGg92$>9V~rKyDThSn}n)?n;WJ!(kz7vRj`zdApT?zT!UNl zaC>}lS#S|KC}tO|BK6RhR1|wNK2h(LuWslceGE2UtML$V9(z{#1U;F1Mh&a2Yc|SX z>fCb>bK1R)1u!a@-Z)rJG4|UzU;+VG#d5;|0>sFp8Jmsz49D>4IaNde40dlF3JZfS zhbJPGkjOm2QYW=RcOVnbHNELBq~f0jlS_XVp9`E|uHJ{=8}hjm@(RtD>B4=zPAPz1 z<*H-pmk#^I(6e9rYq2A4W@#@3fB*ItUK%+m5bu}`O^Y~gGSPa++i;W!j%`%hh#_bT z_w6H9HQ!oW2;uBGMJ+8-FISHW;d>j3IiBL>YB-VaSYL4uV0{=tS>*^dONDEjmHe zM3jl>Wv+7`?mc%u{QF_;{jk2h*82Bfs(++kJ5GYs5SA}jBmw~TNdW+A008jHRm30e zj_;6X0?}fuPV#2uQi&E5spy0z|(+(CrAd$V;C?kK_^w54zn5|PXzM@2d^vD>x5IO zj zvcGtkeo`WmIb+>0IjCB16%GFsqTJ~F)qtd;S*|I`-TK$(GYyywpU3BQa`J$1WE6Te zKy1M%Gly*O`Lbqy?}dF3F$o+emQKoZXyt27*o8m%>|X+bMU~M&I6ixfVWk`(FPI^; z5vzS!`Qr7~_r?1t9wk_`&9xOh+-fJfe3Mq~DOC*?fH_?|;NLbOC?)WsaV|B()V{Cw zdFLeIUn=yfI!cYcv~&uH?0;)PcAmc5^eXHV#j#p3+t~nv7Xc>hBP)iQ80+#w=@~1E zf)FvS+)v~+wrH;b()GGNgPW|YyF>6Fv=#O((91{drZ)d#lf=^0Vg z6iUlmDOn2l=T@u_!TKyOU6ge{o?V3NDJ)o=zH^(spUFtZRZXMIGNXwWkLa(9zT40e zE=JYy=J&|9dE#o{wutUW`x-{n?-#s78sucx$@DEEsfIbqpB|_9FxlGL>=TJd9N%$D z@IM-WqjNd$G)R)m5|Zz#pEuXtxJ-u}9Z1QCG_-)vKcs)1+lU99tN0Cjw|*~qQB}Gp)E~38NvcZ7aJ)vze|u;Y?T}T- zzP#_q^gFItSH)%OfJMPG%zb))xr$1AUuU502$e4?Q(o!zmXX=mQ!Bp4{98|sfs>XH zGW(~HWYuQlwsQTTEvU%UU_9@gZCRLS?cbS@thUBM`F|Bg{ z3%|h>*N_*S&bGO-2J2eQL9>$Dle)Lt-aIJ5$~m8gAH-{9?4743)9VNnYVQz2CG7ZzrYSZcbVQN!DIwSfk$?3`Gzg5S{?oq9fd z)F*k)1I*CzAis<@YDczk7XWG^Qf4hqb~3=B!ezntA&;mFX5>{vA+$dM z*dUnYxpt!uN`>{v00{|IU$@>vQqXJ>fqs%f0ecdSjdb$4lKF!PajPR{>A7wc+uP~>lp zo{)Nt`DJYFD!Lbr?;Pe-g+VzQYH+rzzI@^1lQbArALj4~{L;Glo(A$WQ1LlP&=gca z#?K~bcC#_ao7REV@F5Zus`S0dHM5|H7GRsH*$2?^Jk>Bb5xmRzaMNvq#ZVkh=xyT? za476V#h>XP(pGbX3CI;EovK1vylW6qixZ3lp@2#}GEr|)WchW!gM1ZM zc8{sLU<+3PURoNb5PMF$-HrfGSm8*~!PGTWgEYj&^*40wfdBv_04>jR5BU%8e@Zs` zbHW0cKJp~>oGLmvF2>s=TR+SB>ztucX^t6P5KAG-GsLoF3WK1AWXGfk7pO>@XQ~D#CDc?Ep0d`S{(jrZ}@2)@# z$xn}P=`oHxq)pkNzc?qnmuXdUp@gT(2~=lb9`TkzJ0__i%GWK&)M@b~ggX2IX_OxO zawXbnt0pBb`AeWh~fvc~n`Bh~sH2;xaO8GDaUa^C6Bts(FcBbPk+B85K5e7st|e)SC%ep-0B4ExSEvwz5l$3y>YT zX8&lu@vnCdM_b*9928!|(Wb0}yc7`~oBu1COtEPoy7RRZgP*SHk;)j;)`D)msQbO4 zjX9A{iNlL5Y!#HlGxvI9Wj77zLL%}_#manj1<_QNR3YJoB7Ma8)fHq&$*-7B+-nz# zHwsDZ?Ic_;1$fi|h!c~Rk=Gf*38X$93@ z-}R&I)91m_A93T_?H)=Ex9MUif!+C?d|U(oz%DwMofSFa+<(J#KcXuvaWww&32(14 zRmcP>Y*b@*VHW%}!I)}y!Nfg2>?Qq=5~kpeepKw5zsRRMlz!%^;&THjsB+)QGH)#< zrEk{Od@2HloJGda33lbc^-(?I0vK(ZF#&ZD$33iwVI}JHsXW@X7APJwB1zwTO*zuM+TJvcD8LW!n$p2F&xx zos3qxl5A2IwcXzxV6si}lqYumOOheZ&4>cTO3}t_1wU^4N9VPPa;l+(rrCfI%Yl@) zNf7M{+O7IdF3G8Qk0XI1#X$gSG&tzqHXNsL#Xi#2(mjC0A}+E5;K+vkT{w(L@|#FR$ef5ezfzx;dR;)bp8Kw&+m1=S4>Iu5Z^ze+qX$FjKZod0!R6 zC8G?UnHpuM_|e+P_8PI%Qy7`3o(!~X=bF1O3vo)6gQ=yfMZ^rmAI0}Do(my;Rl8z@ z#_D;6VMM$s$|`ct_cS<|615XFTTXlldA0H*NhnE%x#M0t(FDCI^$59nXf{NZ(7@SA`SY8=3uTfRYR!_- zc?kxUih6ed7ao?!?6`B5&lv^U#jTls8tAfsn*E755oQzE`BAel#acF8Ab1$Mv9S@W zd*<`y>G9%-mtXf`nV$G#6Fg@!X`?LI3Y-fC2Os4-d;fs=4tQ c?+s4#e>ls1v@)*(!5{Q%UQQAw?!U4B07Q2p(EtDd diff --git a/examples/data/custom-schema/docs/tabular-parser-entry-mode.archive.yaml b/examples/data/custom-schema/docs/tabular-parser-entry-mode.archive.yaml new file mode 100644 index 000000000..ba2cce4a6 --- /dev/null +++ b/examples/data/custom-schema/docs/tabular-parser-entry-mode.archive.yaml @@ -0,0 +1,18 @@ +# This schema is specially made for demonstration of implementing a tabular parser with +# entry mode. +definitions: + name: 'Tabular Parser example schema' + sections: + Tabular_Parser: + base_sections: + - nomad.datamodel.metainfo.eln.ElnBaseSection + - nomad.parsing.tabular.TableRow # To create entries from each row in the excel file + # the base section should inherit from `nomad.parsing.tabular.TableRow`. For this specific case, + # the datafile should be accompanied + quantities: + quantity_1: + type: str + m_annotations: + tabular: + name: Column_1 # For entry mode, the convention for reading data from csv/excel file + # is to provide only the column name and the data are assumed to exist in the first sheet \ No newline at end of file diff --git a/examples/data/eln/TabularParserRowMode.schema.archive.yaml b/examples/data/custom-schema/docs/tabular-parser-row-mode.archive.yaml similarity index 61% rename from examples/data/eln/TabularParserRowMode.schema.archive.yaml rename to examples/data/custom-schema/docs/tabular-parser-row-mode.archive.yaml index 4685fdc69..e19094f3d 100644 --- a/examples/data/eln/TabularParserRowMode.schema.archive.yaml +++ b/examples/data/custom-schema/docs/tabular-parser-row-mode.archive.yaml @@ -3,25 +3,6 @@ definitions: name: 'Tabular Parser example schema' sections: - # Here we define a sample section which later on, it will be inherited in the - # Tabular Parser section. - Substance_1: - base_sections: - - nomad.datamodel.data.EntryData # Declares this as a top-level entry section. - quantities: # Here we define quantities that belongs only to the Substance_1 section - quantity_1: # Name of the quantity - type: str - m_annotations: - # The eln annotation allows add the quantity to a ELN - eln: - component: StringEditQuantity - tabular: - # The tabular annotation defines a mapping to column headers used in tabular data files - name: Sheet_1/Column_1 # Here you can define where the data for the given quantity is to be taken from - # The convention for selecting the name is if the data is to be taken from an excel file, - # you can specify the sheet_name followed by a forward slash and the column_name to target the desired quantity. - # If only a column name is provided, then the first sheet in the excel file (or the .csv file) - # is assumed to contain the targeted data Tabular_Parser: # The main section that contains the quantities to be read from an excel file base_sections: - nomad.datamodel.data.EntryData @@ -45,10 +26,6 @@ definitions: # sheet_name. ref_to_sub_section: # This is the reference to where the targeted (sub-)section lies within this example schema file - subsection_1/my_substance_1 - browser: - adaptor: RawFileAdaptor - eln: - component: FileEditQuantity sub_sections: subsection_1: section: @@ -56,18 +33,20 @@ definitions: eln: dict() sub_sections: - # The repeats option set tot rue means there can be multiple instances of this - # section my_substance_1: - repeats: true + repeats: true # The repeats option set to true means there can be multiple instances of this + # section section: - base_section: Substance_1 # This section inherits from the Substance_1 that is defined on top quantities: - quantity_2: + quantity_1: type: str m_annotations: - eln: - component: StringEditQuantity - tabular: - name: Sheet_1/Column_2 - + tabular: # The tabular annotation defines a mapping to column headers used in tabular data files + name: Sheet_1/Column_2 # Here you can define where the data for the given quantity is to be taken from + # The convention for selecting the name is if the data is to be taken from an excel file, + # you can specify the sheet_name followed by a forward slash and the column_name to target the desired quantity. + # If only a column name is provided, then the first sheet in the excel file (or the .csv file) + # is assumed to contain the targeted data. +data: + m_def: Tabular_Parser # this is a reference to the section definition above + data_file: tabular-parser-entry-mode.archive.xlsx # name of the excel/csv file to be uploaded along with this schema yaml file \ No newline at end of file diff --git a/examples/data/eln/README.md b/examples/data/eln/README.md index 3c149fb1c..a9a993434 100644 --- a/examples/data/eln/README.md +++ b/examples/data/eln/README.md @@ -1,6 +1,6 @@ -This is a simple example for a basic ELN. It demonstrates the use of two separate NOMAD schemas -to define different types of entries. Based on these schemas the ELN allows you to create -Samples, Chemicals, Instruments and TabularParser. The Sample entry type also allows to define +This is a simple example for a basic ELN. It demonstrates the use of a NOMAD schema +to define different types of entries. Based on this schema the ELN allows you to create +Samples, Chemicals, and Instruments. The Sample entry type also allows to define processes. The schema is meant as a starting point. You can download the schema file and @@ -12,15 +12,11 @@ Consult our [documentation on the NOMAD Archive and Metainfo](https://nomad-lab. This example uploads contains the following entries - A schema in NOMAD's *archive.yaml* format: *schema.archive.yaml* that defines Three types of ELN entries: sample, instrument, and chemical -- Another schema in NOMAD's *archive.yaml* format: *TabularParserRowMode.schema.archive.yaml* -that defines a tabular parser entry - Three chemicals (as defined in the schema): *Copper_II_Selenide.archive.json*, *Tin_II_Selenide.archive.json*, *Zinc_Selenide.archive.json* - An instrument *PVD-P*.archive.json - A sample (*sample.archive.json*) with two processes (PVD evaporation, hotplate annealing) as sub-sections, and references to instrument and chemicals. -- A tabular parser sample file (*Tabular_parser.archive.json*) -- A *.csv* file. This is not directly parsed by NOMAD, but the sample ELN uses it to -parse data for the PVD evaporation process. -- An *.xlsx* file consisting dummy data for the tabular parser schema. \ No newline at end of file +- A *.csv* file. This is not directly parser by NOMAD, but the sample ELN uses it to +parse data for the PVD evaporation process. \ No newline at end of file diff --git a/examples/data/eln/Tabular_parser.archive.json b/examples/data/eln/Tabular_parser.archive.json deleted file mode 100644 index ba018b9ac..000000000 --- a/examples/data/eln/Tabular_parser.archive.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "data": { - "m_def": "../upload/raw/TabularParserRowMode.schema.archive.yaml#Tabular_Parser", - "data_file": "Tabular_parser.data.xlsx" - } -} \ No newline at end of file diff --git a/tests/data/test_examples.py b/tests/data/test_examples.py index 5a2c8ece8..aae99e8e1 100644 --- a/tests/data/test_examples.py +++ b/tests/data/test_examples.py @@ -33,3 +33,15 @@ def test_eln(mainfile, assert_xpaths, raw_files, no_warn): for xpath in assert_xpaths: assert archive.m_xpath(xpath) is not None + + +@pytest.mark.parametrize('mainfile, assert_xpaths', [ + pytest.param('tabular-parser-col-mode.archive.yaml', [], id='col_mode'), + pytest.param('tabular-parser-row-mode.archive.yaml', [], id='row_mode'), +]) +def test_sample_tabular(mainfile, assert_xpaths, raw_files, no_warn): + mainfile_directory = 'examples/data/costum-schema/docs' + archive = run_processing(mainfile_directory, mainfile) + + for xpath in assert_xpaths: + assert archive.m_xpath(xpath) is not None -- GitLab From 22ab98c976500c019273c7b513721e14bc640b3b Mon Sep 17 00:00:00 2001 From: Amir Golparvar Date: Mon, 10 Oct 2022 10:55:52 +0200 Subject: [PATCH 20/24] python tests for tabular docs are added --- tests/data/test_examples.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/data/test_examples.py b/tests/data/test_examples.py index aae99e8e1..9db7b843c 100644 --- a/tests/data/test_examples.py +++ b/tests/data/test_examples.py @@ -36,11 +36,12 @@ def test_eln(mainfile, assert_xpaths, raw_files, no_warn): @pytest.mark.parametrize('mainfile, assert_xpaths', [ - pytest.param('tabular-parser-col-mode.archive.yaml', [], id='col_mode'), - pytest.param('tabular-parser-row-mode.archive.yaml', [], id='row_mode'), + pytest.param('tabular-parser-col-mode.archive.yaml', ['data.my_substance_1.quantity_1'], id='col_mode'), + pytest.param('tabular-parser-row-mode.archive.yaml', ['data.subsection_1.my_substance_1[4]'], id='row_mode'), + pytest.param('tabular-parser-entry-mode.archive.yaml', [], id='entry_mode'), ]) def test_sample_tabular(mainfile, assert_xpaths, raw_files, no_warn): - mainfile_directory = 'examples/data/costum-schema/docs' + mainfile_directory = 'examples/data/custom-schema/docs' archive = run_processing(mainfile_directory, mainfile) for xpath in assert_xpaths: -- GitLab From 9096e34055c62b455dc409ccf2f59bcaf7ef5b08 Mon Sep 17 00:00:00 2001 From: Amir Golparvar Date: Mon, 10 Oct 2022 11:34:54 +0200 Subject: [PATCH 21/24] minor bug fixes --- docs/schema/elns.md | 13 +++++++------ docs/schema/suggestions.yaml | 3 ++- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/docs/schema/elns.md b/docs/schema/elns.md index 923c12945..6b8577bed 100644 --- a/docs/schema/elns.md +++ b/docs/schema/elns.md @@ -39,17 +39,18 @@ NOMAD's upload page: ``` The following three schemas contain commented examples of how to create entries based on data from an excel or a csv file. -- The following sample schema creates separate instances off of each row from an excel file: + +The sample schema provided below, creates separate instances of a repeated section from each row of an excel file: ```yaml ---8<-- "examples/data/custom-schema/tabular-parser-row-mode.archive.yaml" +--8<-- "examples/data/custom-schema/docs/tabular-parser-row-mode.archive.yaml" ``` -- The following sample schema creates one quantity off the entire column of an excel file: +The following sample schema creates one quantity off the entire column of an excel file: ```yaml ---8<-- "examples/data/custom-schema/tabular-parser-col-mode.archive.yaml" +--8<-- "examples/data/custom-schema/docs/tabular-parser-col-mode.archive.yaml" ``` -- The following sample schema creates one entry for each row of an excel file: +The following sample schema creates one entry for each row of an excel file: ```yaml ---8<-- "examples/data/custom-schema/tabular-parser-entry-mode.archive.yaml" +--8<-- "examples/data/custom-schema/docs/tabular-parser-entry-mode.archive.yaml" ``` ## ELN Annotations The `eln` annotations can contain the following keys: diff --git a/docs/schema/suggestions.yaml b/docs/schema/suggestions.yaml index 23167580a..3a1f42d28 100644 --- a/docs/schema/suggestions.yaml +++ b/docs/schema/suggestions.yaml @@ -7,7 +7,8 @@ tabular: name: "Either < column name > in csv and xls or in the format of < sheet name >/< column name > only for excel files" unit: "The unit to display the data" comment: "A character denoting the commented lines in excel or csv files" - mode: "Either 'column' or 'row' only when using TableData as a base-section". Defaulted to column + sep: "In case of reading data from a .csv file, the separator annotation (e.g. ',') can be specified here." + mode: "Either 'column' or 'row' only when using TableData as a base-section. Defaulted to column." ref_to_sub_section: "List of paths to the targeted repeating subsection < section >/< sub-sections >/ ... /< subsections >" eln: -- GitLab From 88baffbb452149aceb2da5f1fe50bf8478b4ae75 Mon Sep 17 00:00:00 2001 From: Amir Golparvar Date: Thu, 13 Oct 2022 15:11:44 +0200 Subject: [PATCH 22/24] improving docs. added new python test to cover for spaces in header names --- docs/schema/elns.md | 64 ++++++++++++++---- docs/schema/suggestions.yaml | 7 +- .../docs/tabular-parser-col-mode.archive.yaml | 38 +++++------ .../tabular-parser-entry-mode.archive.xlsx | Bin 10407 -> 11250 bytes .../tabular-parser-entry-mode.archive.yaml | 9 ++- .../docs/tabular-parser-row-mode.archive.yaml | 35 +++------- .../docs/tabular-parser.data.xlsx | Bin 0 -> 11250 bytes nomad/parsing/tabular.py | 16 ++++- tests/data/test_examples.py | 5 +- tests/parsing/test_tabular.py | 48 +++++++++++-- 10 files changed, 145 insertions(+), 77 deletions(-) create mode 100644 examples/data/custom-schema/docs/tabular-parser.data.xlsx diff --git a/docs/schema/elns.md b/docs/schema/elns.md index 6b8577bed..a9cbb2a98 100644 --- a/docs/schema/elns.md +++ b/docs/schema/elns.md @@ -37,21 +37,7 @@ NOMAD's upload page: ```yaml --8<-- "examples/data/eln/schema.archive.yaml" ``` -The following three schemas contain commented examples of how to create entries based on -data from an excel or a csv file. -The sample schema provided below, creates separate instances of a repeated section from each row of an excel file: -```yaml ---8<-- "examples/data/custom-schema/docs/tabular-parser-row-mode.archive.yaml" -``` -The following sample schema creates one quantity off the entire column of an excel file: -```yaml ---8<-- "examples/data/custom-schema/docs/tabular-parser-col-mode.archive.yaml" -``` -The following sample schema creates one entry for each row of an excel file: -```yaml ---8<-- "examples/data/custom-schema/docs/tabular-parser-entry-mode.archive.yaml" -``` ## ELN Annotations The `eln` annotations can contain the following keys: @@ -66,6 +52,56 @@ ELN edit annotations and components [here]({{ nomad_url() }}/../gui/dev/editquan ## Tabular Annotations +In order to import your data from a `.csv` or `Excel` file, NOMAD provides three distinct (and separate) ways, that +with each one comes unique options for importing and interacting with your data. To better understand how to use +NOMAD parsers to import your data, three commented sample schemas are presented below. Also, each section follows +and extends a general example explained thereafter. Two main components of any tabular parser schema are +using the correct base section as well as providing a `data_file` quantity with correct `m_annotations`. + +The following sample schema creates one quantity off the entire column of an excel file. +For example, suppose in an excel sheet, several rows contain information of a chemical product (e.g. purity in one +column). In order to list all the purities under the column 'purity' and import them into NOMAD, you can use the +following schema by substituting `My_Quantity` with any name of your choice (e.g. `Purity`), +`tabular-parser.data.xlsx` with the name of the csv/excel file where the data lies, and `My_Sheet/My_Column` with +sheet_name/column_name of your targeted data. The `Tabular_Parser` is also an arbitrary name that can be changed. + +Important notes: + +- `shape: ['*']` under `My_Quantity` is essential to parse the entire column of the data file. +- `My_Quantity` can also be defined within another subsection (see next schema sample) +```yaml +--8<-- "examples/data/custom-schema/docs/tabular-parser-col-mode.archive.yaml" +``` + +The sample schema provided below, creates separate instances of a repeated section from each row of an excel file. +For example, suppose in an excel sheet, you have the information for a chemical product (e.g. `name` in one column), +and each row contains one entry of the aforementioned chemical product. Since each row is separate from others, in +order to create instaces of the same product out of all rows and import them into NOMAD, you can use the following +schema by substituting `My_Subsection`, `My_Section` and `My_Quantity` with any appropriate name (e.g. `Substance`, +`Chemical_product` and `Name` respectively). + +Important note: + +- This schema demonstrates how to import data within a subsection of another subsection, meaning the +targeted quantity should not necessarily go into the main `quantites`. +- Setting `mode` to `row` signals that for each row in the sheet_name (provided in `My_Quantity`), one instance of the corresponding (sub-)section (in this example, `My_Subsection` sub-section as it has the `repeats` option set to true), will be appended. Please bear in mind that if this mode is selected, then all other quantities should exist in the same sheet_name. +```yaml +--8<-- "examples/data/custom-schema/docs/tabular-parser-row-mode.archive.yaml" +``` + +The following sample schema creates one entry for each row of an excel file. +For example, suppose in an excel sheet, you have the information for a chemical product (e.g. `name` in one column), +and each row contains one entry of the aforementioned chemical product. Since each row is separate from others, in +order to create multiple archives of the same product out of all rows and import them into NOMAD, you can use the +following schema by substituting `My_Quantity` with any appropriate name (e.g. `Name`). + +Important note: + +- For entry mode, the convention for reading data from csv/excel file is to provide only the column name and the +data are assumed to exist in the first sheet +```yaml +--8<-- "examples/data/custom-schema/docs/tabular-parser-entry-mode.archive.yaml" +``` Tabular annotation accepts the following keys: {{ get_schema_doc('tabular') }} diff --git a/docs/schema/suggestions.yaml b/docs/schema/suggestions.yaml index 3a1f42d28..6ee894653 100644 --- a/docs/schema/suggestions.yaml +++ b/docs/schema/suggestions.yaml @@ -7,9 +7,10 @@ tabular: name: "Either < column name > in csv and xls or in the format of < sheet name >/< column name > only for excel files" unit: "The unit to display the data" comment: "A character denoting the commented lines in excel or csv files" - sep: "In case of reading data from a .csv file, the separator annotation (e.g. ',') can be specified here." - mode: "Either 'column' or 'row' only when using TableData as a base-section. Defaulted to column." - ref_to_sub_section: "List of paths to the targeted repeating subsection < section >/< sub-sections >/ ... /< subsections >" + sep: "In case of reading data from a .csv file, the separator annotation (e.g. ',' for comma or '\\t' for tab) can be specified here." + separator: "Aliases for 'sep'." + mode: "Either 'column' or 'row' only when using TableData as a base-section. Defaults to column." + target_sub_section: "List of paths to the targeted repeating subsection < section >/< sub-sections >/ ... /< subsections >" eln: component: "The name of ELN edit component" diff --git a/examples/data/custom-schema/docs/tabular-parser-col-mode.archive.yaml b/examples/data/custom-schema/docs/tabular-parser-col-mode.archive.yaml index 2496ef580..d09b653d9 100644 --- a/examples/data/custom-schema/docs/tabular-parser-col-mode.archive.yaml +++ b/examples/data/custom-schema/docs/tabular-parser-col-mode.archive.yaml @@ -3,31 +3,31 @@ definitions: name: 'Tabular Parser example schema' sections: - Substance_1: - quantities: - quantity_1: - type: str - shape: ['*'] # Stating that this quantity takes the shape of an array - m_annotations: - tabular: - name: Sheet_2/Column_2 - Tabular_Parser: + Tabular_Parser: # The main section that contains the quantities to be read from an excel file. + # This name can be changed freely. base_sections: - - nomad.datamodel.data.EntryData - nomad.parsing.tabular.TableData quantities: data_file: type: str - description: | - A reference to an uploaded .xlsx m_annotations: - tabular_parser: - comment: '#' + tabular_parser: # The tabular_parser annotation, will treat the values of this + # quantity as files. It will try to interpret the files and fill + # quantities in this section (and sub_sections) with the column + # data of .csv or .xlsx files. + comment: '#' # Skipping lines in csv or excel file that start with the sign `#` mode: column # Here the mode can be set. If removed, by default, # the parser assumes mode to be column - sub_sections: - my_substance_1: - section: Substance_1 + My_Quantity: + type: str + shape: ['*'] + m_annotations: + tabular: # The tabular annotation defines a mapping to column headers used in tabular data files + name: My_Sheet/My_Column # Here you can define where the data for the given quantity is to be taken from + # The convention for selecting the name is if the data is to be taken from an excel file, + # you can specify the sheet_name followed by a forward slash and the column_name to target the desired quantity. + # If only a column name is provided, then the first sheet in the excel file (or the .csv file) + # is assumed to contain the targeted data. data: - m_def: Tabular_Parser - data_file: tabular-parser-entry-mode.archive.xlsx \ No newline at end of file + m_def: Tabular_Parser # this is a reference to the section definition above + data_file: tabular-parser.data.xlsx # name of the excel/csv file to be uploaded along with this schema yaml file \ No newline at end of file diff --git a/examples/data/custom-schema/docs/tabular-parser-entry-mode.archive.xlsx b/examples/data/custom-schema/docs/tabular-parser-entry-mode.archive.xlsx index 67381a3364711b193aa9cddee89538882ad5aa3e..b96fffc18ce1772d4ebe65f18c5fd37730b2a3fc 100644 GIT binary patch delta 4296 zcmZ9P1ymGD+sBu#C0#%lq++Xm^z zs#+pq(9A&8qi;i@XT@%A%_h0QV>_rq-fO^G1VJSwONE!i_h@0D0d?0e3_{j;4eIk44G{*%7+3jSC{HjS+@-o@EPah0# zcR5p_|L|HAA;hp0$L5xWb$1&7Xd;m>oWZrs3UES?(-g@`a9Yx|O0n&KzX|!4k-D-3 zhaCq_qk;}nzySa#Z?A&<_BOm6A9})E9n4`c2R<(+$6OsvSdIuqfaLZy&Ueu+4k<$W zdKN7A_>@WRp=wX{c$|C`@!fj`%!d2$7J(3<*%Dn%a&{;K-0z3Cwc}EN-9WpS4SFRz zg~c`aLtBxyWus4ec8gy4HfxS%RlhAWHykSGJC*g&6MpT|aObJ(-&NH_cGQNCWO%+#0qVJ*dGAI7%2Ygwb zU+C8|Q9j?xc(M3w%DQ?i`(j#uR6x)J8H~)xtM28e96fP+>AUm{I!cGgqDI!l-_r6& zH`c5ghQ_716piZWIt3?aeZ!D4QD3*Ss^F}Gj9OT_L~y^5@sj$YKdWJU19Yn_+8Pv zr;+}<^{~ayvuWhb>hVnHFTzx^3_0s93v&!-&Rrx*;T!4-M~1bDiQ2sw`i|WW!{sV< z(=`!P(*bO2OR!ffqqSz9?rP@{s^y-~yA#iTlvMvj%Q{_g_dQAM z%!&MDo)-%4JyAfI_Pl=FC4)&8e=f3d81ZU>s~M*y_KrxeSo^+72bQblJHaEu!=f&* zWnPuyCrkGNt5~8oXx68nY;YZ3^U!S5XjKJKvr->h?P)rrK4vz->CBbUPH1`vTfQvE zOxloGk9&^g+IuCTn1yH4VG`!Sy1spqb#8ve7b%+6@y;Q#_dHjnzVx;E2yHa2P#jdU zLiFVqIJR1@?;lZ?#ReG@sb!SmVp{lgBY9Bd$2;p!>S*ZstTnyFC9Gfab-?wGC4)(J z6S1n9HVcRteQSh&i_cfdK6*qHZtMnieJL1@#H|??OHr4gdPR78&a#AOqB3Q|wNOo5 zw-<;Nxat1{sw{Jq$P53fr<_SW#}HSmYC!2RpEI2|U&d0p0&TeI|1G^7{n}SFEUE*F_eSF#!C0%ax8^EFYZKM;L@QV{BiIlCE>f`%F z8_ag;FF8_hwNh}{NsDivt`p17MOc+rY&Z`&6={+X3ucjM7*?@)8Q ze3pO0mL0*47dCsCn>%J&&gH`3ls3WlGsWT_c>4|9dy2GQO!dp7%gI3Ve6IKLLCz&m zCbi%g%#A}s{c9CJmvx(%gmV2}7%j$<;z`8L2oq0|AzM!$M|dYwdB>V?SYM(kny$lA zg@69BS?^Zhr0BaOnfem@saHYXhSXv2_`(8+fzc;%SF0(%=+%iH9$0Yoc+R~`TQEN+ z0RV20Ijoe#r~xou0yY3(hllh5a>0?RF{%6%VTbYOjJ!+X%oJ1_z1*onJBvjDdl~ly zae1KmHA?8B^Ci((j)oU}qsoE!z8l^)M+28DxztYr-;4?2Mawg@j}U70TkDOOND8+_ zQEhr_xmtlCTRbA?6{nYW0YV;!_5k(Rk=mfC#V=1R)F!g(=#pu`Ukjrqzrt6G7X+pa zm)A9Nsp;OC^$R7Eir;bdCGXIV2US3`$45>DJA=M4TzFPHE`;WHiHi*FH#^{%xkn;O zwZ~e>G?c4)Gd-eFe#OoQS|6RI)o98E7LVnu^;(Y#xkI&t~M?lkxY&?_poK5Hso z#tsN#T9c~WI(6NG>N+pdo9j5_q7V^Ulr;b`$^ZCZBN(2+PQTEQTe z&g5CGF#GA{-O##IHp5>npr_%P81cTb>AOSn!&C5Sb?W@7&$?OCiaAsa1*E$)}ofJ$$hir68yT6Z$H z@rvS%yiH>X44$U>!;n2uPJ`YD;wZ40IM0A+JKKH1}XrZeLrqJb!? zxRJi_KfV1;|JfMLrTAR^M3q;0c#uc8S4ouzrSoV+Q11n#|M_qrO65CAYj(aevCel@1R^lcRtre|JZhpcH7BZ8SnyeXT9qN6HMouCC z;4T0u#tDZ1bI>$xV^}Ib$Un2{Ch(G>)i3lJ^WkMimqNkXnDyi4-XYx^})lFduQu4{I(b8$jwD?A@_ z@IuK_i_!hux$P8z)weRmB-(W24}zuO<0(Yh{>3hTD|D8oU4=z4A^n*IGf6dZi_{k6jMHRP6)&i_-*Dq%To%s!Xb73IdJ)e zj27iz_pnNY5fR8Lkohd+Mp6&5tQb|G9l;!Ob!qBW6ZIWbaI^i$ndp?@?AVE-1@5U zVJBeBgQ0mRZR}c_Z$htmY>I-_>r~mvjV~VbOr6O)i<4CZ=J+(KA6d|S_cHF(6z98* z`{X`srn1C9sl|=4mM7IUdrV zyc};{Yfhs8DL}!1C0MCJ@dFE9C-vO?l1pG@E)qc~QQd`T3J`YcOR3nAZNu;{@eXv? z+R-Qaw)~tL9QK5Md0sKtbp9^5Jkop$_^L6ezG|@TW-?;#Xc8>ZN-o39)yMWZ$#E+; zB5oEJ>yxZ-Z9&MlSik|7G;%x`9QUZ8;30WK8)?WRK(kSkQ&U8k-kQE@);E2YPxAS> zsK?=W)F|#Yd27nl^v$#Rl|EG=OY+Y%ImnFO=g=u{hDlb}b4TfJj+e)hN5Bv7Wn$?tS?IP9@nL1duqAvboK;9KhaR=y%n5ERCb z)78PVL}z88e%kcJ3Fze(=qxY|3_)G6&uN*1zC zTqf;(Z==`hW`;}dRS`#7X8o-AUc+QmQ>x2u=DYE0N3oRd^2l$@!&yZe8!674itd3!+}Sm*ne zwe|#5>IHQk9!&5Rr#nS2?j}$AidTN!oo!9ry&ar?uqnBT&%OD}CGk>2NJX+LSD7GK z@UQ>NZn6)D20dyjZF8hlr6SZs;c`r`%fxy4!kuO-L-xm~UC?H<%q~9$FR#UZ_$F+Y z8yhuBA-ZR&%_1W}rt#z5uXWcv{0%bVShA}OYcTB*drWaZHg*&9zRT1BrrW#;ZJB7o zEps#ig{pQb(88aFa{=jg@_Z8LvKoujYzZLM&y)yEw{2LGV-1HC zTS%SoZRI|NlJUy-(?m7fQOeLtHpcQ0^kbGW1O@u@1OHf!Ft4sl@rQYZ8RzD{-XI>U$4(dh+xzDW|E zT{nx&Tmu^x@$TRG22lo*YT~U5_-~xHYO)j>-8<&KI>^EDN6`@-x+&2tcLC`}{X_?1 z23^2B4I&;S^@`1i(Jue>vg?G|duB=NTi46uNn81460GPV zOF!duB8=SCYG5sakoRNu_Iur-b~29@yd56oSJz30&-cl$ODNty@c6-4nZ+uKP4st~ zja*}Yft8IE=3t=v|LNZ?;Qmv3kj@+|bbrUJw;Tlj-_)O86j{pgfc~EV?k`6EyZ8f< z0YEU4f|G*&zd`zMkrMeWR=FSjF`e5T^#73d--1)Bzf)2-TPrJfw}*c^K_mqPgd_w} z0RItB(jbF4C4t%ue^$7;dplYo$syGA|NM3U0Q8Uic4WbVyyoPf|MP-x?Evyu8vI{^ b309;OL>#-A0||$K$$)^{daD_M{3ZGywA0Xl delta 3469 zcmY*cXH=6(6b)Tk00|uyO>D^JC7;ym#iznRDO0_x-S`Fk>?{BqN7_C_$G%AP_I8KOHRQMgjt{ zH?Z-M0=-!a#@&*TzFnhdab+#BELh1tN$lQ8S!O(g>6Fam=o_OElCRrpV({cVOk{6? zoOdX0cWT$?iqT}skTv62uuZ(yJR1l0qxaHkOmLCf!2_Mx(n9M=6_H*_)7yPHY$Y>7 zp!*Kn^^9_6*yooAIXPom85V=fel(c%*7-|B128r(@T2Kn=fRIEr21-Ja=pQU4A1TC z2B4Fwoq`E)vwJ~vDK$2{mX{{85_2tU3(r3$Nu8u((~H2{10$5}ZSJW; zob)r6x=+pNUHjiW-&L2G?{Hp@(^VD;v&y^t`g#t%aJp+eMs^Z3KCPM(Qj$Cu5Of$o zfXg05aJwS9n70DFszM`UcrqhLO2NvHXpPmSUmHzgfQ?Q=72RlQ`e+&a{1X}KtG7C% zU4NT!_W~UKKULKd1p<~A4Y+87chb}npaHd3c1xPbo=waL^as|ZXdnrfS(@s} z1j%UPoEvW{l7T>bRLBKNHo);=fg~gh{rWuO#P}__h?bG?WzDK_V~hMZa~3NRp4FVm z$(}FEPl=A&1_f^UFTjHG4p-5~zRu4!Z#_`8n2|IaVpMO+(5kwZ4L_+iyJUHX`a z`3fQ_ad?#0ceG+K?W#oc4fUFsVCe{q*L{x?%PV%)NMDbe`7&moXsiJmUc(oQ7V36J z;-c>h=*wO$XS>eftS(y@NAhM>_{G0~bEN1_X1_KkODHWAk?fkOZ{wI5*n+fW_E$8^ z%a~1SHry|)%t1jo+*qB|tZ!t>QMIUSaTFoY41iD1Vsbuw!@%mjv?!<9OBqZvWl7oz zzBz`Vb*XKMy|8cDE)!6fDeCF8`Uq55ouLK+!JY%4>8?l(&iclsd6k)tPG4|ahX{K` zkoE>=X>C`!xqBjiG*VAc$wC%g9Pk@r|CA@)SM4E3oE4sSi@H&QxN>5}sU&VNI6*@b zQW`hBcUrmRc~7`UYTEg^?BXd5M;X(y%rPE~Jgil&i_i%E4Jeri(FQ#TEvKR@|6Y#b zm_fM@udyFTqV*DX&!R>RG(M1fPTXJ1TrK2f+tRRW`47O!AsVj zehgnrk%KNZCPIwN4+@c65X+9DEDSu1W#p1^hCk`qeECuq5_msxh^u_d3T< z6}3_HVI!u>V4tk4A12?;i|I91t} zSGz{XX~p*VDIHZm6s-Rly5oVBd-eX(2!`)xX$S!_2gVdP3Yf{3J20gSl~)Cg89OSx zLZ?_u`}yxs1cbh5LVX(#B#g&aAJ=ZY+vrun6ZimvdC}9bsMw-O^NuDd(y9fc1TVqD zQPZ;1pqKbS;V-mFTJ^LGZ>gCjmT-M>U(QMAyXoIJ%$F));c3&dKo);BqS&|LvYb37Ak4$?To7`*HK%4HJ z)x)CXE;&rTXJU~VW6&yRZ#doi!-u{LvW_WfxNxpf0tfB;1X?5}RlG6E^Tdhu4k<+l zs@MVfwFggjM@QYC+eVyHB_hSp(sdQ18{Wv2rmOBcjL=s`dQ!0wC zOJ@Z5(y+54wej=uMrrl*XxHJKm651Bn(GE_)#m_=&Y1Ka3hTQ_&C)3@SeRP9PM)-I zX;3dW%)tw{91>(lDmgPG)O_9otexJ&8ofUh`GoI%9EYc#sv04$p-G#&j&yf4 zq;ZugLqloY=mAQ2J(0YnPWZN<5*Diw^{wCJMPR-wcA-SYx0L;wPk71Ywv8;tP&e{H zHu6y^;Rr?BvZ*_QotIte5RX23vUg?kYgsN{<2~s995?S)mzDiXIzHNY)x=*hX}c$^ zxYzSKLh;gm6=LsZV8RV<1VJv(CAY`*9PZzk`~y;PUMcOzyvKm7V`2g7g`_mz(TH}& zBQ&jj6EdB;oOy58+}?W7$A%j6=#d^Ftd;~l3G3*6nqil?_mdd0;c&A-hfYsfCG1;# zVq8R6fN+((`^4)O?^#(niX6WP3hw87u%1uW`@xRAX4DU*RefXe1$?Quf(yQSeh>Z( z@wecR(l0z1rVW;OI-06Nt9I54tJX(`4CUhjkYmO@JdoH6t47sbdquoJkKT>VFqr`}MemvB z&8&DEqqi_`Tp)4ft{?t~_fEx)Toz-56YUBX|< zn;DsXV0PF5T7;wioExq|LA0bWM`L%^3oNK=-q^P%D(@FpFV!u+?voR}3p+2aY5OL`@AcOLk zCsMmxpabQJQ-`HZ2`0(~M~tOStzF^aA7^^67d8;C4mkhgttike5BbndYy2W4z|ub? zXK)P8*sWZGu*T2wG>94V)aWpIJnVxe=JYmii59D>F$>dg(8o zC1;Cc%%)KP7hO%VjJYjLy>%40RVrECnzN}}8=0d^K(<7f8)kJ-t%mb8GF46>ein=h zL|Aikdc1X6b`knr!OHuZFi2b3Sm8teKy662xqW2POT>7y#Y5=s)Z?ZHs9DSz1%PeE z8I`Q|9XN@?f8D5}p_ezk{(CLM*nj^w0|dMwiix7(8H+iZx7}{bb&dkQEUF*^0h=vf zAkud&Dixs^A79M-3acmMY?A=0mcJy(liS~;t?MJC1i5-gD z33y$>VPd?BW|6f8U$r9}yRfB2R`VoZ&5-!5&&g02QP&G-`Ofgi0=EhYlT!cNdLTDM z%t=s4RZ%F{f3_F|Vz|(&|3(k;u_zzczhZw;?417vFIFPRMp1F@f0d8=qNVv?+=ULQ z1!K7oLl%lZY=QKHv5-O;FAibI1{f#l5i^p&%7SEpF(Q9lY(uz^V6m&*|J57_1o_{6 z@mG-mA1l&XjFI~f=q?IVK_-c*k(To#2gEqRv>;kgA1MelDELp(-zTVB A5&!@I diff --git a/examples/data/custom-schema/docs/tabular-parser-entry-mode.archive.yaml b/examples/data/custom-schema/docs/tabular-parser-entry-mode.archive.yaml index ba2cce4a6..6ef95ab1c 100644 --- a/examples/data/custom-schema/docs/tabular-parser-entry-mode.archive.yaml +++ b/examples/data/custom-schema/docs/tabular-parser-entry-mode.archive.yaml @@ -1,18 +1,17 @@ # This schema is specially made for demonstration of implementing a tabular parser with # entry mode. definitions: - name: 'Tabular Parser example schema' + name: 'Tabular Parser example schema' # The main section that contains the quantities to be read from an excel file + # This name can be changed freely. sections: Tabular_Parser: base_sections: - - nomad.datamodel.metainfo.eln.ElnBaseSection - nomad.parsing.tabular.TableRow # To create entries from each row in the excel file # the base section should inherit from `nomad.parsing.tabular.TableRow`. For this specific case, # the datafile should be accompanied quantities: - quantity_1: + My_Quantity: type: str m_annotations: tabular: - name: Column_1 # For entry mode, the convention for reading data from csv/excel file - # is to provide only the column name and the data are assumed to exist in the first sheet \ No newline at end of file + name: My_Column diff --git a/examples/data/custom-schema/docs/tabular-parser-row-mode.archive.yaml b/examples/data/custom-schema/docs/tabular-parser-row-mode.archive.yaml index e19094f3d..a4ba73956 100644 --- a/examples/data/custom-schema/docs/tabular-parser-row-mode.archive.yaml +++ b/examples/data/custom-schema/docs/tabular-parser-row-mode.archive.yaml @@ -4,49 +4,32 @@ definitions: name: 'Tabular Parser example schema' sections: Tabular_Parser: # The main section that contains the quantities to be read from an excel file + # This name can be changed freely. base_sections: - - nomad.datamodel.data.EntryData - nomad.parsing.tabular.TableData # Here we specify that we need to acquire the data from a .xlsx or a .csv file quantities: data_file: type: str - description: | - A reference to an uploaded .xlsx m_annotations: - # The tabular_parser annotation, will treat the values of this - # quantity as files. It will try to interpret the files and fill - # quantities in this section (and sub_sections) with the column - # data of .csv or .xlsx files. tabular_parser: comment: '#' # Skipping lines in csv or excel file that start with the sign `#` - mode: row # Setting mode to row signals that for each row in the sheet_name (provided in quantity), - # one instance of the corresponding (sub-)section (in this example, my_substance_1 sub-section as - # as it has the repeats option set to true), will be appended. Please bear in mind - # that if this mode is selected, then all the specified quantities should exist in the same - # sheet_name. - ref_to_sub_section: # This is the reference to where the targeted (sub-)section lies within this example schema file - - subsection_1/my_substance_1 + mode: row # Setting mode to row signals that for each row in the sheet_name (provided in quantity) + target_sub_section: # This is the reference to where the targeted (sub-)section lies within this example schema file + - My_Subsection/My_Section sub_sections: - subsection_1: + My_Subsection: section: - m_annotations: - eln: - dict() sub_sections: - my_substance_1: + My_Section: repeats: true # The repeats option set to true means there can be multiple instances of this # section section: quantities: - quantity_1: + My_Quantity: type: str m_annotations: tabular: # The tabular annotation defines a mapping to column headers used in tabular data files - name: Sheet_1/Column_2 # Here you can define where the data for the given quantity is to be taken from - # The convention for selecting the name is if the data is to be taken from an excel file, - # you can specify the sheet_name followed by a forward slash and the column_name to target the desired quantity. - # If only a column name is provided, then the first sheet in the excel file (or the .csv file) - # is assumed to contain the targeted data. + name: My_Sheet/My_Column # sheet_name and column name of the targeted data in csv/xlsx file data: m_def: Tabular_Parser # this is a reference to the section definition above - data_file: tabular-parser-entry-mode.archive.xlsx # name of the excel/csv file to be uploaded along with this schema yaml file \ No newline at end of file + data_file: tabular-parser.data.xlsx # name of the excel/csv file to be uploaded along with this schema yaml file diff --git a/examples/data/custom-schema/docs/tabular-parser.data.xlsx b/examples/data/custom-schema/docs/tabular-parser.data.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..b96fffc18ce1772d4ebe65f18c5fd37730b2a3fc GIT binary patch literal 11250 zcmeHN1zQ~3vK>MQ?iwVxyJT>ez~B}OlgI!P1PvVY_WyZc~JuoUud3 zO=EZysWWG$38UaMy49dzX`4g`@28FS&I2FuY)%R&q?!X4$EtI>l%LBxN)lw)(xqSP zH?&~o5_gfN=htEMS;lrpkGyQF$~|+<0^Y{T8imEwkRnyHtqlcVmpVFr)y)qaS50IR zZg3b=ct+?AD6V=D7X96PD|*tUXK7QuD}I$vDGJI*zG|Sje3&0zI1FDPZhl{qk?SV; zqjk-*bJ1h#d0Kod^Sx}Yi@J0vH{CrV!a(14`|CG^A8dHTI9{#9Q8?y6;my1_nM`I1 zf8|hN3^*r*Zi%FZy((*2N89x#YJmm-9v`6r3V&fJSe1qRC&cY!AOu8&FjUvU*vgTa z>5u*YnEC&hj(_^=WpT3d9V|#8$C7uUy_Zuxa9E)ryb#K)ai1&G+0iGKatqZ6zv3 z%^sP#t!m-BGi%1qbg1X5?BWCkd*5o(oH{`Bi(zT1 zEb)jMN-(0=7O%lZ9G1YS;+iU}DE`v?W)Xf-lFILB?!(GB;Z9mLpmT~iY!&}Ila758 zJ^BgD9T4409XK_j=0(J7=K6Wp{9c-g)dw&}Tm_=Ki?nuL;>nLyc@~a~Rro?C?L@xB zT=a%UD2q~D?`od_Mv-BW(RbaeJXAe#2bueXuT-Uu<4c;!lP`rOP99Cj6*P6qpQAO) z*EGDb1~xVgxwmEAjK4P1@hvDAR`HJt0MZTD8_Z!v>(jU#43c~~w;Di90qYRQ#5%h& zY}=0as>^)|))Xd1{aMA%XKT=eKi97Zcfm=C?A99d>f3j6Z`jMK-UEfq%Wuc6Ual>p z&g-YMofmNFC|TkrJB9|s)OmRE)o<3S@1aG=TS%~-N`ax2=EJlOLXTaO0d*ZnZEZ=W z#1F0O9fo@kMJ6e?A7?&t9%Gs|Z5f_=nAMIn_jguk^*?NQ&!lF%7Xo<*$ybCWY;Pk^ zXOPX#`<}Z5=k6CQ$0%ZuQ7HQf+;K@4zEMG~&pHhXyd^STkfGC_Of~@>Iu=N%~ssH5MGWf6rBlq*keqpu8BRPVLL9X3Maj@TO|p-<7I=;@;h?`aJG_J?(-}9-jHgkhaHl zJ~U&=;1u!iJg9mAA0`PhtucozKJb34wxgM`v6Cb7uLJ8J@(uav@$uI#BVINpodqrA zIPr#J+QD&s;1;-g*TR}Nr`C;J=aau)t$Kyhs&`AKt6V~K45zJ z3%8-%WKIJ?Dh~c)an#h}dg&7DjOOa5LOwP@sD3|3GBQ7=gC}Z-N+MPjFn40~ioG*n z?bWR-*lH=bu#2B-=o0D~bZBwl~`)y4tJM?npVoPFN17l6Co1+(f-q zaa>7#U1sx*q{()cZO@2$cYS@F&2}pyCgote@CEEu2)=JwftiDG^orgoto|Nq&w|t&~0&8a7oSpjc#EA z<5r+=7A4_QQ;OTydnkk;*vT-O-NT00CM(ne-<86|I4ORLF*WmjLr9?8@d!Y9HcX#_tOB=NCwNlt}*Tas#$`VV9QVtsdPHuC1)i}o6Br76n_|D9IaVacSY*3vLFy3C}Q)VXhHTs zLKm!DIxFr2S~1mGdoG2rlB!4HTkor1Q-xm_uc4Kckw2GZ0F4aNzwMQhrLR_H8)a8h zqUo0z@vD|TL;RLo2tts$rlh=XV0kBDVF0o~4>K*xk4?(feYU={Ft?Mujcbf+nqQbS z`-hDEry72R-UPZsl#m+!zxp!U6JJ)*u}x>e^3JY(^t*fY%_~@pbZWj>Q@7AOd*nT9 z8$$P>OPZ7N<81{p#fMz*Cz?}M;_VF225OBZLh*Hewt@yL#GrdL_p*YTjl()2!LKVL z4jbm8^Kl$C#xNcR_)BFgty)C+^Sf!R>;598h$;jVjDbQEY4Ew;bJaLUDoXGMt18ye zOSBT8HyE;L!TLCG?OK!c3{zOh8mbvJF>yyQma@#b>Y_-Km6pr9in61$4gsH*zNRbE z^2E-bgrrkbNAL+Jqb|Iqt1-_xCYOUde~%!5?zKMFt<&m&kwI5%Z0x0ga+6lMp!+=1 zZ!~_~`7z$e4uj^R_Yg3mG`0Y0-<6{0iR`k)8jv%%a4z1=N`|>1KGxqJgUjmF@m%Mc2a-_EuMq<=3#K9uOs zED#i8CA~76X0cdbNB4W<(u41I2W&^XTN$1|YUASK);;Q>JJK0#rKvQ+p5xq-1Gc)E zTiA8`WEDy!jVN<0-_xOH10<7~Yto+_$=VpZpFJG4b7lYEbY+M+;sEtN3rh&1Bq zR2hwx*Y)iV?*`^tF5)mha@K6IphrP9npMfDB9#czp!+*L$GWI9tfI$VHaqw$ z*ndl3(6}r@2MJop_tG%jl(fz}Ec*PPWEjshz?2}5d!OJO#i*sBtM<?D$ct{PH}SdSVh1t(tQ0tI z+vXfYt-oX&s7_r0U zzoqMf^1MP3gs$rUov!Rp=o&BmN9gH+gdQS>l|cJm%;2&TmC~bvk{do=6S%ilGS=JG z21FDV7W3bE#a z3ZtecKlQI4me%x^re^^DBp**<%0*!!*m(#V%ct284ve}N zeV)c&CbYg^$_jiqmcuWi8DMIh`QeBZqm%7xpC1G|a8lQ}mrndBp_3I=WZoSL$4BG? z4wcUVI)9sBNPW{-i?7Iqk+&8Tc2E62lmBOmRl;ER`j`<={Qcs!-jK%X#)_xG5|4bX z=?FvWnN!yN$_M9u=HF2%C|(C4kOeKMlB_{U#^8>Yb#y*53{C*t71rXzY2BArwI|v( z%(B8T&|Pm%nC#j5b9!*Z75GJ_YOv)dG_W$#U>fONb3kM5VB6zV#Qez=zQ8wB5mMSd ziccw4JNXfDbI+kZigMN$1+B#bj%bDBCj#-~*oumnQJdP3gV+FHwOXuOB0|(Q)N|I< z$v#RH=2yQChrzCHE703_>&2?I(>_!-LB)S)5aR zm08k)U0=$q;|=20)W^h20q#T8_rQDwxo(px*NN8m_uBN$4LF`Z0<=CCPP@!lmI0%U z6uc>-p5M#<`6l?JW@MiKUg@q7OaZS?(limQa&75iuL*aExFrp(Z0$-l(U=U zkEHgmY=D!Qv9&Stul=tn<$=afBmpN@EAEXTlB4V0>-8AQ<<$|Z_$3OX%mhMk-GSm; zcDA@?0?cQW+~-O(`31=$w)_d>LeR)9KV#r2YIljpsE20B%Qq>~tw5SC$dPXzT8fG; zvwS>$IGv8Sq86E6Q1s;C~xcJgeT9zvqP1VO>zFPAf zzofyy@G-llyx+$C!i+QamP%~%g@&k2&LdlcjGmYJnB&=Qo5eT@T%e|BfiCnX?Vj`f zd(GCm@ggWQj)26%x>=t6M4MQBWQCilk4_cgndtNqySh*?M<9klgxs%5z?|wusIA)%)yh zHy(Gz`+QqIhG>3)kSOJZG`#wx;=AE7RiwT@%IztXU}h(aNN1c7`i{$|qk-OM@3ctv z+MTNo+ZPDcyJ4F7S}o@qiBB6S-ltYdRKqM9M!Dd*@~oDm4V;|VWyvhX|7Zwr3LpJo zdBg-N16*#i=cuhI%|%xkZy`rugcowPjVrfSi$)QZHi?F9&hWFX$y9~^JVL7s44a@7 zbUoA9lyO{%t~z9 z8<7*VGcY?WaZRmK`_XV3rpX>SM_xL1^ggzqd-Qad`47K7f9YlMlH}sc#SogJWdNUk z2pj?u?ovP)_Jg)qW~rvtQ9}A`IuI2aOX6({X8cBU*GPB`^_^?~D_1DT7cener?&|_ z=`_aMV*a1uFNql}J;xq6eGG~s9frmIQB)%$t#SQISFrv=4E>IMs_VX~{V*C`B(3FM6H_Edt@ zZ1XXrNSdO?OW!ggWX@IG*r{?1^Fy8Kv9m|ZZ{>ct#Y9iYnKqO)rWGT?#9h15YDbGE zg9>*v`Hz>&C-;4JR9rQZU(n1=;OWbl5GXq1De9t#V7)U9dN-GAz z!r8o}MLeI8Jj&^FG|MR#=EU3U#@0q|9 zg{X{TjW-!d)iqT8Evir)?8 z-9Roq6P>J~E3mnjV|1vnWNJXcm~sa-f1mV@gl=PD%pPQu)o7OMi1_?jaiNx-0S-xi z5_o0pEWyQpT0t$?n>ncIEX1%6%DlSiUu>Q-fpzug^RV;fDpJ}}5V1`rr@0v?=uH6X zMJ~o`M8mB#b5k=u`p{azE`eql6Y%w0(NqGJKBCu^2D{+B9k{R3-gBlyUDf-bsEsnQ ze*>khKPv8GUfDClbhN(7I24Q?87ZSXhztIHv-VYc&ddH7oh0)X+@GUi?P3vZ0_LZA z-XfPbdN9os{H|Q<7};Z~YK)NYq=g(AWq!&=VV@HL?C?bk&Vtg|Cr{%OY$AyF(-(sK zoO=>_L%mH}Z=n=)_jsPsjKcwId3%cJ^BYa7VjemMIhKFvzdDuKhB+s z0ryaCJ5eg-M?R<{YJ+0ah`meWS;L9yjID@$5YY&z2SVGe1JHxOEu19?lZu-g9S*gAD;< zHAb|%faC+3^MSsk;aGEBsm>TX1ZH1vD|?RCKAI@vqrHtNy3e( z@v<{*Rt7Zy%0adm(jg_oMVd^RHG!$Z!GdBb`Ly{poSqXajZv$*k3q7i7B|)E3!kHb z3;RRwVCypGOJ|OXTQID%<_)6>9T!KHL9+IFlZDg+Z|Ub-DUbI_WNE=a7wIS`)lb!x zO;UF~5(~Q#gck;{`)Xb%gf;Bbg8TFs(~Jz%w|4jhDkf&}phc{7*iscND%=uSwOIU( zxAZvYxlQ=POfF!#F*}%H-d4;J=cnJlG?JhY@}|GI8_4CaY#N{locSx}4j_m|4qdu6g6K6&pw-?twSW*Vo|Y zu*W$?D`2-dk%heU$GVqKT()g(ClOJ|c3n2vw}a|Kj;KC5gs@~;$yHfH;182iinAIT zIL=e2do1Iz-iw_;Wwz5my_7FwS7sIBT?L<*KX)1D;wkHY@>}Q3IAKmbv3*2jgw(G8 zCNma^URf>(5sL8V007oM<-kne!PrRI$-&&l^p`3GCGguqR$w8QpnKFLcFq@ZZ&hK8 z!c?4hr-zeZlKp~4u+mHJZsR)1o*C->;J>^Pco%tfLQQ{$7|n!D#j*Gz0boMMe3~(O zTzOff2C79*i3E{S)I!Um?yifLfk|6%oJ9}~_c*(q-uhvAlv;et%1|K>-i;Yb-ED;W zu>b5^Z-lt;Nqauchd3Fq8AhXLVSd+HQE(n|5J$9gYB$yW!Ih$Fd1p#0he`4(a_@Un zwQr94&r#i~;?OHh#7bW_O+~e&+wW(GPCQugrFB-@Y8pz{7hZJUv zYz-A0Z0#JG4Q(Ba|IlDa-@yOsF%VA-iffZw{6{qb<-@T?kDPSdJTgW(7dCvjIEhUK zKLb;^_1x#6!-*OD8hu<+`yYdM4}3p7leQ~$w3>y&y63RG?I@k%XYoN`bnwDXRFxO~6NXUvX>#W{eSKk!L zinU&z(%m2BL3vjbz#ltG;jDHc5sZ294#W)F%USo2JzR{glm}ht-Lcox%zM@|qGOFP%*IVe$KQzj{~>_t%u|-NEJ}uiv;^ zG8Tdxgu@s5L^lN_iH=-;5dWQy5U1-z9z&j51jMz`{_NKJc6R^sYRJI+bIXYR-3>x> z`TC_KX4<&Sy0kVGSJYD8F0-3%IFWftnBn z6btLZLdQgzv#L?yB<_z`c|$7#jiS@?c=2;P!lfO5c3yQ(*nVMxM)Y-{dNo`L9#Ac) z1V7B5>9Fr@hhO|V*g!pFganqq?`Ztjp#1CjH~ozY(tic`Ys1yQ4F5RhLN58I_N%9c zPn%GFoA$i;kCv3D#(%A*|272xcoBXX|CieOQ=F$Y)!#_oNdLZx|ERD&MR{7y`i&w5 z=>_;_cg>$se$}>~qC8Eq|3CS|1fOBY3eqqT SPXhpu{&+#0b(H3hZ~q6Fec03h literal 0 HcmV?d00001 diff --git a/nomad/parsing/tabular.py b/nomad/parsing/tabular.py index 9e90eb756..4de34ebe6 100644 --- a/nomad/parsing/tabular.py +++ b/nomad/parsing/tabular.py @@ -90,9 +90,9 @@ class TableData(ArchiveSection): # Returning one section for each row in the given sheet_name/csv_file sections = parse_table(data, self.m_def, logger=logger) - # The ref_to_sub_section contains the ref to the location of which the sections are to be appended. + # The target_sub_section contains the ref to the location of which the sections are to be appended. # Calling setattr will populate the non-repeating middle sections. - section_names: List[str] = kwargs.get('ref_to_sub_section') + section_names: List[str] = kwargs.get('target_sub_section') top_level_section_list: List[str] = [] for section_name in section_names: section_name_str = section_name.split('/')[0] @@ -296,8 +296,18 @@ def read_table_data(path, file_or_path=None, **kwargs): comment=kwargs.get('comment'), skiprows=kwargs.get('skiprows')).to_dict()] else: + if kwargs.get('sep') is not None: + sep_keyword = kwargs.get('sep') + elif kwargs.get('separator') is not None: + sep_keyword = kwargs.get('sep') + else: + sep_keyword = None df.loc[0, 0] = [ - pd.read_csv(file_or_path, engine='python', comment=kwargs.get('comment'), sep=kwargs.get('sep')).to_dict() + pd.read_csv(file_or_path, engine='python', + comment=kwargs.get('comment'), + sep=sep_keyword, + skipinitialspace=True + ).to_dict() ] return df diff --git a/tests/data/test_examples.py b/tests/data/test_examples.py index 9db7b843c..61a0d4dfc 100644 --- a/tests/data/test_examples.py +++ b/tests/data/test_examples.py @@ -36,8 +36,9 @@ def test_eln(mainfile, assert_xpaths, raw_files, no_warn): @pytest.mark.parametrize('mainfile, assert_xpaths', [ - pytest.param('tabular-parser-col-mode.archive.yaml', ['data.my_substance_1.quantity_1'], id='col_mode'), - pytest.param('tabular-parser-row-mode.archive.yaml', ['data.subsection_1.my_substance_1[4]'], id='row_mode'), + pytest.param('tabular-parser-col-mode.archive.yaml', ['data.My_Quantity'], id='col_mode'), + pytest.param('tabular-parser-row-mode.archive.yaml', ['data.My_Subsection.My_Section[4].My_Quantity'], + id='row_mode'), pytest.param('tabular-parser-entry-mode.archive.yaml', [], id='entry_mode'), ]) def test_sample_tabular(mainfile, assert_xpaths, raw_files, no_warn): diff --git a/tests/parsing/test_tabular.py b/tests/parsing/test_tabular.py index 8fd559f3e..e8ddc2300 100644 --- a/tests/parsing/test_tabular.py +++ b/tests/parsing/test_tabular.py @@ -303,7 +303,7 @@ data: assert main_archive.data.my_substance.quantity_0 == ['0_0', '1_0'] -@pytest.mark.parametrize('test_case,section_placeholder,ref_to_sub_section_placeholder,sub_sections_placeholder,csv_content', [ +@pytest.mark.parametrize('test_case,section_placeholder,target_sub_section_placeholder,sub_sections_placeholder,csv_content', [ pytest.param('test_1', '', '- my_substance1', '''my_substance1: repeats: true section: Substance1''', 'header_0,header_1\n0_0,0_1\n1_0,1_1', id='simple_1_section'), @@ -327,7 +327,7 @@ data: repeats: true section: base_section: Substance1''', 'header_0,header_1,header_2\n0_0,0_1,0_2\n1_0,1_1,1_2', id='nested')]) -def test_tabular_row_mode(raw_files, monkeypatch, test_case, section_placeholder, ref_to_sub_section_placeholder, +def test_tabular_row_mode(raw_files, monkeypatch, test_case, section_placeholder, target_sub_section_placeholder, sub_sections_placeholder, csv_content): ''' Testing the TableData normalizer with mode set to row. This feature is used to create a section out of each row in a @@ -351,8 +351,8 @@ def test_tabular_row_mode(raw_files, monkeypatch, test_case, section_placeholder tabular_parser: comment: '#' mode: row - ref_to_sub_section: - + target_sub_section: + sub_sections: data: @@ -360,7 +360,7 @@ data: data_file: test.my_schema.archive.csv''' schema = base_schema.replace('', section_placeholder) \ - .replace('', ref_to_sub_section_placeholder) \ + .replace('', target_sub_section_placeholder) \ .replace('', sub_sections_placeholder) \ .replace('', test_case) schema = re.sub(r'\n\s*\n', '\n', schema) @@ -396,6 +396,44 @@ data: ii += 1 +@pytest.mark.parametrize('schema,content', [ + pytest.param( + strip(''' + definitions: + sections: + MyTable: + base_section: nomad.parsing.tabular.TableRow + quantities: + header_0: + type: str + header_1: + type: str + '''), + strip(''' + header_0, header_1 + a,0 + 0,b + '''), id='space in header' + ) +]) +def test_tabular_csv(raw_files, monkeypatch, schema, content): + '''Tests that missing data is handled correctly. Pandas by default + interprets missing numeric values as NaN, which are incompatible with + metainfo. + ''' + csv_file, schema_file = get_files(schema, content) + + class MyContext(ClientContext): + def raw_file(self, path, *args, **kwargs): + return open(csv_file, *args, **kwargs) + context = MyContext(local_dir='') + + main_archive, _ = get_archives(context, schema_file, None) + ArchiveParser().parse(schema_file, main_archive) + run_normalize(main_archive) + main_archive.m_to_dict() + + @pytest.mark.parametrize('schema,content,missing', [ pytest.param( strip(''' -- GitLab From ab2346aad3868b66960b3aca5776d36cadea0c16 Mon Sep 17 00:00:00 2001 From: Amir Golparvar Date: Thu, 13 Oct 2022 17:42:40 +0200 Subject: [PATCH 23/24] improving python test. Correcting eln docs --- docs/schema/elns.md | 3 ++- tests/parsing/test_tabular.py | 16 ++++++++++++---- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/docs/schema/elns.md b/docs/schema/elns.md index a9cbb2a98..9530e1300 100644 --- a/docs/schema/elns.md +++ b/docs/schema/elns.md @@ -56,7 +56,8 @@ In order to import your data from a `.csv` or `Excel` file, NOMAD provides three with each one comes unique options for importing and interacting with your data. To better understand how to use NOMAD parsers to import your data, three commented sample schemas are presented below. Also, each section follows and extends a general example explained thereafter. Two main components of any tabular parser schema are -using the correct base section as well as providing a `data_file` quantity with correct `m_annotations`. +using the correct base section as well as providing a `data_file` quantity with correct `m_annotations` +(except for entry mode). The following sample schema creates one quantity off the entire column of an excel file. For example, suppose in an excel sheet, several rows contain information of a chemical product (e.g. purity in one diff --git a/tests/parsing/test_tabular.py b/tests/parsing/test_tabular.py index e8ddc2300..ffa1e1915 100644 --- a/tests/parsing/test_tabular.py +++ b/tests/parsing/test_tabular.py @@ -402,17 +402,25 @@ data: definitions: sections: MyTable: - base_section: nomad.parsing.tabular.TableRow + base_section: nomad.parsing.tabular.TableData quantities: + data_file: + type: str + m_annotations: + tabular_parser: + comment: '#' + mode: column header_0: type: str header_1: type: str + data: + m_def: MyTable + data_file: test.my_schema.archive.csv '''), strip(''' header_0, header_1 - a,0 - 0,b + a,b '''), id='space in header' ) ]) @@ -431,7 +439,7 @@ def test_tabular_csv(raw_files, monkeypatch, schema, content): main_archive, _ = get_archives(context, schema_file, None) ArchiveParser().parse(schema_file, main_archive) run_normalize(main_archive) - main_archive.m_to_dict() + assert main_archive.data.header_1 is not None @pytest.mark.parametrize('schema,content,missing', [ -- GitLab From 033b632240be97f71dde895bee2559cc64d2de49 Mon Sep 17 00:00:00 2001 From: Amir Golparvar Date: Fri, 14 Oct 2022 12:22:42 +0200 Subject: [PATCH 24/24] Moving tabular docs. Improving docs text for better readability --- docs/schema/elns.md | 50 +++++++++++------- docs/schema/suggestions.yaml | 10 ++-- .../docs/tabular-parser-col-mode.archive.yaml | 0 .../tabular-parser-entry-mode.archive.xlsx | Bin .../tabular-parser-entry-mode.archive.yaml | 0 .../docs/tabular-parser-row-mode.archive.yaml | 0 .../docs/tabular-parser.data.xlsx | Bin tests/data/test_examples.py | 2 +- 8 files changed, 37 insertions(+), 25 deletions(-) rename examples/data/{custom-schema => }/docs/tabular-parser-col-mode.archive.yaml (100%) rename examples/data/{custom-schema => }/docs/tabular-parser-entry-mode.archive.xlsx (100%) rename examples/data/{custom-schema => }/docs/tabular-parser-entry-mode.archive.yaml (100%) rename examples/data/{custom-schema => }/docs/tabular-parser-row-mode.archive.yaml (100%) rename examples/data/{custom-schema => }/docs/tabular-parser.data.xlsx (100%) diff --git a/docs/schema/elns.md b/docs/schema/elns.md index 9530e1300..f72abe92f 100644 --- a/docs/schema/elns.md +++ b/docs/schema/elns.md @@ -53,15 +53,20 @@ ELN edit annotations and components [here]({{ nomad_url() }}/../gui/dev/editquan ## Tabular Annotations In order to import your data from a `.csv` or `Excel` file, NOMAD provides three distinct (and separate) ways, that -with each one comes unique options for importing and interacting with your data. To better understand how to use +with each comes unique options for importing and interacting with your data. To better understand how to use NOMAD parsers to import your data, three commented sample schemas are presented below. Also, each section follows and extends a general example explained thereafter. Two main components of any tabular parser schema are -using the correct base section as well as providing a `data_file` quantity with correct `m_annotations` -(except for entry mode). - -The following sample schema creates one quantity off the entire column of an excel file. -For example, suppose in an excel sheet, several rows contain information of a chemical product (e.g. purity in one -column). In order to list all the purities under the column 'purity' and import them into NOMAD, you can use the +1) using the correct base section as well as 2) providing a `data_file` quantity with the correct `m_annotations` +(except for the entry mode). Please bear in mind that the schema files should 1) follow the NOMAD naming convention +(i.e. `My_Name.archive.yaml`), and 2) be accompanied by your data file in order for NOMAD to parse them. +In the examples provided below, an `Excel` file is assumed to contain all the data, as both NOMAD and +`Excel` support multiple-sheets data manipulations and imports. Note that the `Excel` file name in each schema +should match the name of the `Excel` data file, which in case of using a `.csv` data file, it can be replaced by the +`.csv` file name. + +The following sample schema creates one quantity off the entire column of an excel file (`col mode`). +For example, suppose in an excel sheet, several rows contain information of a chemical product (e.g. `purity` in one +column). In order to list all the purities under the column `purity` and import them into NOMAD, you can use the following schema by substituting `My_Quantity` with any name of your choice (e.g. `Purity`), `tabular-parser.data.xlsx` with the name of the csv/excel file where the data lies, and `My_Sheet/My_Column` with sheet_name/column_name of your targeted data. The `Tabular_Parser` is also an arbitrary name that can be changed. @@ -70,27 +75,33 @@ Important notes: - `shape: ['*']` under `My_Quantity` is essential to parse the entire column of the data file. - `My_Quantity` can also be defined within another subsection (see next schema sample) + ```yaml ---8<-- "examples/data/custom-schema/docs/tabular-parser-col-mode.archive.yaml" +--8<-- "examples/data/docs/tabular-parser-col-mode.archive.yaml" ``` -The sample schema provided below, creates separate instances of a repeated section from each row of an excel file. -For example, suppose in an excel sheet, you have the information for a chemical product (e.g. `name` in one column), -and each row contains one entry of the aforementioned chemical product. Since each row is separate from others, in -order to create instaces of the same product out of all rows and import them into NOMAD, you can use the following -schema by substituting `My_Subsection`, `My_Section` and `My_Quantity` with any appropriate name (e.g. `Substance`, -`Chemical_product` and `Name` respectively). +The sample schema provided below, creates separate instances of a repeated section from each row of an excel file +(`row mode`). For example, suppose in an excel sheet, you have the information for a chemical product +(e.g. `name` in one column), and each row contains one entry of the aforementioned chemical product. +Since each row is separate from others, in order to create instaces of the same product out of all rows +and import them into NOMAD, you can use the following schema by substituting `My_Subsection`, +`My_Section` and `My_Quantity` with any appropriate name (e.g. `Substance`, `Chemical_product` +and `Name` respectively). -Important note: +Important notes: - This schema demonstrates how to import data within a subsection of another subsection, meaning the targeted quantity should not necessarily go into the main `quantites`. -- Setting `mode` to `row` signals that for each row in the sheet_name (provided in `My_Quantity`), one instance of the corresponding (sub-)section (in this example, `My_Subsection` sub-section as it has the `repeats` option set to true), will be appended. Please bear in mind that if this mode is selected, then all other quantities should exist in the same sheet_name. +- Setting `mode` to `row` signals that for each row in the sheet_name (provided in `My_Quantity`), +one instance of the corresponding (sub-)section (in this example, `My_Subsection` sub-section as it has the `repeats` +option set to true), will be appended. Please bear in mind that if this mode is selected, then all other quantities +should exist in the same sheet_name. + ```yaml ---8<-- "examples/data/custom-schema/docs/tabular-parser-row-mode.archive.yaml" +--8<-- "examples/data/docs/tabular-parser-row-mode.archive.yaml" ``` -The following sample schema creates one entry for each row of an excel file. +The following sample schema creates one entry for each row of an excel file (`entry mode`). For example, suppose in an excel sheet, you have the information for a chemical product (e.g. `name` in one column), and each row contains one entry of the aforementioned chemical product. Since each row is separate from others, in order to create multiple archives of the same product out of all rows and import them into NOMAD, you can use the @@ -100,8 +111,9 @@ Important note: - For entry mode, the convention for reading data from csv/excel file is to provide only the column name and the data are assumed to exist in the first sheet + ```yaml ---8<-- "examples/data/custom-schema/docs/tabular-parser-entry-mode.archive.yaml" +--8<-- "examples/data/docs/tabular-parser-entry-mode.archive.yaml" ``` Tabular annotation accepts the following keys: diff --git a/docs/schema/suggestions.yaml b/docs/schema/suggestions.yaml index 6ee894653..aa25e364b 100644 --- a/docs/schema/suggestions.yaml +++ b/docs/schema/suggestions.yaml @@ -4,12 +4,12 @@ m_annotations: plot: "plot annotations" tabular: - name: "Either < column name > in csv and xls or in the format of < sheet name >/< column name > only for excel files" + name: "Either < column name > in `.csv` and `excel` or in the format of < sheet name >/< column name > only for `excel` files" unit: "The unit to display the data" - comment: "A character denoting the commented lines in excel or csv files" - sep: "In case of reading data from a .csv file, the separator annotation (e.g. ',' for comma or '\\t' for tab) can be specified here." - separator: "Aliases for 'sep'." - mode: "Either 'column' or 'row' only when using TableData as a base-section. Defaults to column." + comment: "A character denoting the commented lines in `excel` or `.csv` files" + sep: "In case of reading data from a `.csv` file, the separator annotation (e.g. `','` for comma or `'\\t'` for tab) can be specified here." + separator: "Aliases for `sep`." + mode: "Either `column` or `row`. Use only when setting `TableData` as a base-section. Defaults to `column`." target_sub_section: "List of paths to the targeted repeating subsection < section >/< sub-sections >/ ... /< subsections >" eln: diff --git a/examples/data/custom-schema/docs/tabular-parser-col-mode.archive.yaml b/examples/data/docs/tabular-parser-col-mode.archive.yaml similarity index 100% rename from examples/data/custom-schema/docs/tabular-parser-col-mode.archive.yaml rename to examples/data/docs/tabular-parser-col-mode.archive.yaml diff --git a/examples/data/custom-schema/docs/tabular-parser-entry-mode.archive.xlsx b/examples/data/docs/tabular-parser-entry-mode.archive.xlsx similarity index 100% rename from examples/data/custom-schema/docs/tabular-parser-entry-mode.archive.xlsx rename to examples/data/docs/tabular-parser-entry-mode.archive.xlsx diff --git a/examples/data/custom-schema/docs/tabular-parser-entry-mode.archive.yaml b/examples/data/docs/tabular-parser-entry-mode.archive.yaml similarity index 100% rename from examples/data/custom-schema/docs/tabular-parser-entry-mode.archive.yaml rename to examples/data/docs/tabular-parser-entry-mode.archive.yaml diff --git a/examples/data/custom-schema/docs/tabular-parser-row-mode.archive.yaml b/examples/data/docs/tabular-parser-row-mode.archive.yaml similarity index 100% rename from examples/data/custom-schema/docs/tabular-parser-row-mode.archive.yaml rename to examples/data/docs/tabular-parser-row-mode.archive.yaml diff --git a/examples/data/custom-schema/docs/tabular-parser.data.xlsx b/examples/data/docs/tabular-parser.data.xlsx similarity index 100% rename from examples/data/custom-schema/docs/tabular-parser.data.xlsx rename to examples/data/docs/tabular-parser.data.xlsx diff --git a/tests/data/test_examples.py b/tests/data/test_examples.py index 61a0d4dfc..0bb9d0cd3 100644 --- a/tests/data/test_examples.py +++ b/tests/data/test_examples.py @@ -42,7 +42,7 @@ def test_eln(mainfile, assert_xpaths, raw_files, no_warn): pytest.param('tabular-parser-entry-mode.archive.yaml', [], id='entry_mode'), ]) def test_sample_tabular(mainfile, assert_xpaths, raw_files, no_warn): - mainfile_directory = 'examples/data/custom-schema/docs' + mainfile_directory = 'examples/data/docs' archive = run_processing(mainfile_directory, mainfile) for xpath in assert_xpaths: -- GitLab