diff --git a/normalizer/normalizer-repo-tags/calculate_repo_tags.py b/normalizer/normalizer-repo-tags/calculate_repo_tags.py index 03999e7bdef6c49e806a2b5bf27470e3b38284e9..5bbc5168a311f94d6d566260c6dea756e4a6514d 100644 --- a/normalizer/normalizer-repo-tags/calculate_repo_tags.py +++ b/normalizer/normalizer-repo-tags/calculate_repo_tags.py @@ -9,20 +9,24 @@ import logging base_path = os.path.abspath(os.path.dirname(__file__)) with open(base_path+'/setting.json', 'r') as file_conf: repo_conf = json.load(file_conf) repo_base_path = repo_conf['repo_base_path'] +rawdata_archive_path = repo_conf['rawdata_archive_path'] def calculateTags(inputDict, backend, calcUri): + repoSectUri = inputDict.get("section_repository_info.uri") if repoSectUri: backend.openContext(repoSectUri) else: backend.openContext(calcUri) repoSect = backend.openSection("section_repository_info") - repo_dic_sub = subprocess.Popen([os.path.join(repo_base_path, 'bin/python'), base_path+'/extract_from_repo.py'], stdin = subprocess.PIPE, stdout = subprocess.PIPE) + + repo_dic_sub = subprocess.Popen([os.path.join(repo_base_path, 'bin/python'), base_path+'/extract_from_repo.py', rawdata_archive_path], stdin = subprocess.PIPE, stdout = subprocess.PIPE) inputDict_str = json.dumps(inputDict)+'\n' - repo_dic_sub_out = repo_dic_sub.communicate(inputDict_str.encode('utf-8'))[0] + repo_dic_sub_out_all = repo_dic_sub.communicate(inputDict_str.encode('utf-8')) + repo_dic_sub_out = repo_dic_sub_out_all[0] repo_dic = json.loads(repo_dic_sub_out.decode("utf-8")) - #backend.addValue("repository_checksum", repo_dic['checksum']) + backend.addValue("repository_checksum", repo_dic['checksum']) backend.addValue("repository_chemical_formula", repo_dic['formula']) backend.addValue("repository_parser_id", repo_dic['prog_name'] + ' v1.0') backend.addValue("repository_atomic_elements", repo_dic['elements']) @@ -57,8 +61,8 @@ def main(): calcUri, parserInfo = {'name':'RepoTagsNormalizer', 'version': '1.0'}) - dictReader=ParseStreamedDicts(sys.stdin) - #dictReader=ParseStreamedDicts(open("/u/jungho/myscratch/nomad-lab-base/normalizers/repo-tags/test/examples/scalaOut2.txt", 'r')) + #dictReader=ParseStreamedDicts(sys.stdin) + dictReader=ParseStreamedDicts(open("/u/jungho/myscratch/nomad-lab-base/normalizers/repo-tags/test/examples/scalaOut1.txt", 'r')) while True: inputDict=dictReader.readNextDict() diff --git a/normalizer/normalizer-repo-tags/extract_from_repo.py b/normalizer/normalizer-repo-tags/extract_from_repo.py index 1680c8e388b3984a6f2fc2942f525f785cbbd183..5293e2ab5d318ac9032eba4e47218f45a6da00fb 100644 --- a/normalizer/normalizer-repo-tags/extract_from_repo.py +++ b/normalizer/normalizer-repo-tags/extract_from_repo.py @@ -1,27 +1,29 @@ -import sys, json +import os, sys, json, zipfile base_path= sys.exec_prefix + '/python' sys.path.insert(0, base_path) -from nomadrepo.core.settings import connect_database, settings from nomadrepo.core.api import API -def extract_metadata(dic_in): - #session = connect_database(settings, None)() - #checksum = session.execute("SELECT checksum FROM calculations WHERE calc_id = %d ;" % 4000000).fetchone()[0] - #session.commit(); session.close() +def extract_metadata(dic_in, path_in): + main_file_uri = dic_in['main_file_uri'][0].replace('nmd://', '') + main_file_uri_list = main_file_uri.split('/') + gid = main_file_uri_list[0] + zip_path = os.path.join(path_in, gid[0:3], gid+'.zip') + main_file_path = "/".join([gid]+main_file_uri_list[1:]) + with zipfile.ZipFile(zip_path, 'r') as zip_f: main_file_size = zip_f.getinfo(main_file_path).file_size work = API() for calc, error in work._parse(dic_in, "normalizerRepo"): calc, error = work.classify(calc) calc.info['oadate'] = None - #calc.info['checksum'] = calc.get_checksum() + calc.info['checksum'] = calc.get_checksum(filesize=main_file_size) json.dump(calc.info, sys.__stdout__) pass def main(): inputStr = sys.stdin.readline() inputDict = json.loads(inputStr) - extract_metadata(inputDict) + extract_metadata(inputDict, sys.argv[1]) if __name__ == "__main__": main() diff --git a/normalizer/normalizer-repo-tags/setting.json b/normalizer/normalizer-repo-tags/setting.json index 12d7fe369f0cfb95d14956440e286f692c26b6b1..76ade16a39ae76328f48f016f0adff07287f01c6 100644 --- a/normalizer/normalizer-repo-tags/setting.json +++ b/normalizer/normalizer-repo-tags/setting.json @@ -1 +1,6 @@ -{"repo_base_path":"/u/jungho/myscratch/NomadRepositoryParser/"} +{ +"repo_base_path": "/u/jungho/myscratch/NomadRepositoryParser/", +"rawdata_archive_path": "/raw-data/data", +"tmp_dir": "/tmp" +} +