Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
nomad-lab
normalizer-repo-tags
Commits
15187b8e
Commit
15187b8e
authored
Dec 08, 2017
by
Jungho Shin
Browse files
adding checksum
parent
9d00a570
Changes
3
Hide whitespace changes
Inline
Side-by-side
normalizer/normalizer-repo-tags/calculate_repo_tags.py
View file @
15187b8e
...
...
@@ -9,20 +9,24 @@ import logging
base_path
=
os
.
path
.
abspath
(
os
.
path
.
dirname
(
__file__
))
with
open
(
base_path
+
'/setting.json'
,
'r'
)
as
file_conf
:
repo_conf
=
json
.
load
(
file_conf
)
repo_base_path
=
repo_conf
[
'repo_base_path'
]
rawdata_archive_path
=
repo_conf
[
'rawdata_archive_path'
]
def
calculateTags
(
inputDict
,
backend
,
calcUri
):
repoSectUri
=
inputDict
.
get
(
"section_repository_info.uri"
)
if
repoSectUri
:
backend
.
openContext
(
repoSectUri
)
else
:
backend
.
openContext
(
calcUri
)
repoSect
=
backend
.
openSection
(
"section_repository_info"
)
repo_dic_sub
=
subprocess
.
Popen
([
os
.
path
.
join
(
repo_base_path
,
'bin/python'
),
base_path
+
'/extract_from_repo.py'
],
stdin
=
subprocess
.
PIPE
,
stdout
=
subprocess
.
PIPE
)
repo_dic_sub
=
subprocess
.
Popen
([
os
.
path
.
join
(
repo_base_path
,
'bin/python'
),
base_path
+
'/extract_from_repo.py'
,
rawdata_archive_path
],
stdin
=
subprocess
.
PIPE
,
stdout
=
subprocess
.
PIPE
)
inputDict_str
=
json
.
dumps
(
inputDict
)
+
'
\n
'
repo_dic_sub_out
=
repo_dic_sub
.
communicate
(
inputDict_str
.
encode
(
'utf-8'
))[
0
]
repo_dic_sub_out_all
=
repo_dic_sub
.
communicate
(
inputDict_str
.
encode
(
'utf-8'
))
repo_dic_sub_out
=
repo_dic_sub_out_all
[
0
]
repo_dic
=
json
.
loads
(
repo_dic_sub_out
.
decode
(
"utf-8"
))
#
backend.addValue("repository_checksum", repo_dic['checksum'])
backend
.
addValue
(
"repository_checksum"
,
repo_dic
[
'checksum'
])
backend
.
addValue
(
"repository_chemical_formula"
,
repo_dic
[
'formula'
])
backend
.
addValue
(
"repository_parser_id"
,
repo_dic
[
'prog_name'
]
+
' v1.0'
)
backend
.
addValue
(
"repository_atomic_elements"
,
repo_dic
[
'elements'
])
...
...
@@ -57,8 +61,8 @@ def main():
calcUri
,
parserInfo
=
{
'name'
:
'RepoTagsNormalizer'
,
'version'
:
'1.0'
})
dictReader
=
ParseStreamedDicts
(
sys
.
stdin
)
#
dictReader=ParseStreamedDicts(open("/u/jungho/myscratch/nomad-lab-base/normalizers/repo-tags/test/examples/scalaOut
2
.txt", 'r'))
#
dictReader=ParseStreamedDicts(sys.stdin)
dictReader
=
ParseStreamedDicts
(
open
(
"/u/jungho/myscratch/nomad-lab-base/normalizers/repo-tags/test/examples/scalaOut
1
.txt"
,
'r'
))
while
True
:
inputDict
=
dictReader
.
readNextDict
()
...
...
normalizer/normalizer-repo-tags/extract_from_repo.py
View file @
15187b8e
import
sys
,
json
import
os
,
sys
,
json
,
zipfile
base_path
=
sys
.
exec_prefix
+
'/python'
sys
.
path
.
insert
(
0
,
base_path
)
from
nomadrepo.core.settings
import
connect_database
,
settings
from
nomadrepo.core.api
import
API
def
extract_metadata
(
dic_in
):
#session = connect_database(settings, None)()
#checksum = session.execute("SELECT checksum FROM calculations WHERE calc_id = %d ;" % 4000000).fetchone()[0]
#session.commit(); session.close()
def
extract_metadata
(
dic_in
,
path_in
):
main_file_uri
=
dic_in
[
'main_file_uri'
][
0
].
replace
(
'nmd://'
,
''
)
main_file_uri_list
=
main_file_uri
.
split
(
'/'
)
gid
=
main_file_uri_list
[
0
]
zip_path
=
os
.
path
.
join
(
path_in
,
gid
[
0
:
3
],
gid
+
'.zip'
)
main_file_path
=
"/"
.
join
([
gid
]
+
main_file_uri_list
[
1
:])
with
zipfile
.
ZipFile
(
zip_path
,
'r'
)
as
zip_f
:
main_file_size
=
zip_f
.
getinfo
(
main_file_path
).
file_size
work
=
API
()
for
calc
,
error
in
work
.
_parse
(
dic_in
,
"normalizerRepo"
):
calc
,
error
=
work
.
classify
(
calc
)
calc
.
info
[
'oadate'
]
=
None
#
calc.info['checksum'] = calc.get_checksum()
calc
.
info
[
'checksum'
]
=
calc
.
get_checksum
(
filesize
=
main_file_size
)
json
.
dump
(
calc
.
info
,
sys
.
__stdout__
)
pass
def
main
():
inputStr
=
sys
.
stdin
.
readline
()
inputDict
=
json
.
loads
(
inputStr
)
extract_metadata
(
inputDict
)
extract_metadata
(
inputDict
,
sys
.
argv
[
1
]
)
if
__name__
==
"__main__"
:
main
()
normalizer/normalizer-repo-tags/setting.json
View file @
15187b8e
{
"repo_base_path"
:
"/u/jungho/myscratch/NomadRepositoryParser/"
}
{
"repo_base_path"
:
"/u/jungho/myscratch/NomadRepositoryParser/"
,
"rawdata_archive_path"
:
"/raw-data/data"
,
"tmp_dir"
:
"/tmp"
}
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment