diff --git a/normalizer/normalizer-repo-tags/calculate_repo_tags.py b/normalizer/normalizer-repo-tags/calculate_repo_tags.py
index 03999e7bdef6c49e806a2b5bf27470e3b38284e9..5bbc5168a311f94d6d566260c6dea756e4a6514d 100644
--- a/normalizer/normalizer-repo-tags/calculate_repo_tags.py
+++ b/normalizer/normalizer-repo-tags/calculate_repo_tags.py
@@ -9,20 +9,24 @@ import logging
 base_path = os.path.abspath(os.path.dirname(__file__))
 with open(base_path+'/setting.json', 'r') as file_conf: repo_conf = json.load(file_conf)
 repo_base_path = repo_conf['repo_base_path']
+rawdata_archive_path = repo_conf['rawdata_archive_path']
 
 def calculateTags(inputDict, backend, calcUri):
+
     repoSectUri = inputDict.get("section_repository_info.uri")
     if repoSectUri:
         backend.openContext(repoSectUri)
     else:
         backend.openContext(calcUri)
         repoSect = backend.openSection("section_repository_info")
-    repo_dic_sub = subprocess.Popen([os.path.join(repo_base_path, 'bin/python'), base_path+'/extract_from_repo.py'], stdin = subprocess.PIPE, stdout = subprocess.PIPE)
+
+    repo_dic_sub = subprocess.Popen([os.path.join(repo_base_path, 'bin/python'), base_path+'/extract_from_repo.py', rawdata_archive_path], stdin = subprocess.PIPE, stdout = subprocess.PIPE)
     inputDict_str = json.dumps(inputDict)+'\n' 
-    repo_dic_sub_out = repo_dic_sub.communicate(inputDict_str.encode('utf-8'))[0]
+    repo_dic_sub_out_all = repo_dic_sub.communicate(inputDict_str.encode('utf-8'))
+    repo_dic_sub_out = repo_dic_sub_out_all[0]
     repo_dic = json.loads(repo_dic_sub_out.decode("utf-8"))
 
-    #backend.addValue("repository_checksum", repo_dic['checksum'])
+    backend.addValue("repository_checksum", repo_dic['checksum'])
     backend.addValue("repository_chemical_formula", repo_dic['formula'])
     backend.addValue("repository_parser_id", repo_dic['prog_name'] + ' v1.0')
     backend.addValue("repository_atomic_elements", repo_dic['elements'])
@@ -57,8 +61,8 @@ def main():
         calcUri,
         parserInfo = {'name':'RepoTagsNormalizer', 'version': '1.0'})
 
-    dictReader=ParseStreamedDicts(sys.stdin)
-    #dictReader=ParseStreamedDicts(open("/u/jungho/myscratch/nomad-lab-base/normalizers/repo-tags/test/examples/scalaOut2.txt", 'r'))
+    #dictReader=ParseStreamedDicts(sys.stdin)
+    dictReader=ParseStreamedDicts(open("/u/jungho/myscratch/nomad-lab-base/normalizers/repo-tags/test/examples/scalaOut1.txt", 'r'))
 
     while True:
         inputDict=dictReader.readNextDict()
diff --git a/normalizer/normalizer-repo-tags/extract_from_repo.py b/normalizer/normalizer-repo-tags/extract_from_repo.py
index 1680c8e388b3984a6f2fc2942f525f785cbbd183..5293e2ab5d318ac9032eba4e47218f45a6da00fb 100644
--- a/normalizer/normalizer-repo-tags/extract_from_repo.py
+++ b/normalizer/normalizer-repo-tags/extract_from_repo.py
@@ -1,27 +1,29 @@
-import sys, json
+import os, sys, json, zipfile
 base_path= sys.exec_prefix + '/python'
 sys.path.insert(0, base_path)
-from nomadrepo.core.settings import connect_database, settings
 from nomadrepo.core.api import API
 
-def extract_metadata(dic_in):
-    #session = connect_database(settings, None)()
-    #checksum = session.execute("SELECT checksum FROM calculations WHERE calc_id = %d ;" % 4000000).fetchone()[0]
-    #session.commit(); session.close()
+def extract_metadata(dic_in, path_in):
+    main_file_uri = dic_in['main_file_uri'][0].replace('nmd://', '')
+    main_file_uri_list = main_file_uri.split('/')
+    gid = main_file_uri_list[0]
+    zip_path = os.path.join(path_in, gid[0:3], gid+'.zip')
+    main_file_path = "/".join([gid]+main_file_uri_list[1:])
+    with zipfile.ZipFile(zip_path, 'r') as zip_f: main_file_size = zip_f.getinfo(main_file_path).file_size
 
     work = API()
 
     for calc, error in work._parse(dic_in, "normalizerRepo"):
         calc, error = work.classify(calc)
         calc.info['oadate'] = None
-        #calc.info['checksum'] = calc.get_checksum()
+        calc.info['checksum'] = calc.get_checksum(filesize=main_file_size)
         json.dump(calc.info, sys.__stdout__)
         pass
 
 def main():
     inputStr = sys.stdin.readline()
     inputDict = json.loads(inputStr)
-    extract_metadata(inputDict)
+    extract_metadata(inputDict, sys.argv[1])
 
 if __name__ == "__main__":
     main()
diff --git a/normalizer/normalizer-repo-tags/setting.json b/normalizer/normalizer-repo-tags/setting.json
index 12d7fe369f0cfb95d14956440e286f692c26b6b1..76ade16a39ae76328f48f016f0adff07287f01c6 100644
--- a/normalizer/normalizer-repo-tags/setting.json
+++ b/normalizer/normalizer-repo-tags/setting.json
@@ -1 +1,6 @@
-{"repo_base_path":"/u/jungho/myscratch/NomadRepositoryParser/"}
+{
+"repo_base_path": "/u/jungho/myscratch/NomadRepositoryParser/",
+"rawdata_archive_path": "/raw-data/data",
+"tmp_dir": "/tmp"
+}
+