Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
P
python-common
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Deploy
Releases
Model registry
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Berk Onat
python-common
Commits
dc85e5b6
Commit
dc85e5b6
authored
8 years ago
by
Lauri Himanen
Browse files
Options
Downloads
Patches
Plain Diff
Made archive.py compatible with python>2.6.
parent
7bbbea2d
No related branches found
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
common/python/nomadcore/archive.py
+55
-29
55 additions, 29 deletions
common/python/nomadcore/archive.py
with
55 additions
and
29 deletions
common/python/nomadcore/archive.py
+
55
−
29
View file @
dc85e5b6
from
__future__
import
with_statement
from
__future__
import
division
from
__future__
import
absolute_import
import
os
import
math
import
json
...
...
@@ -5,9 +8,11 @@ import string
import
h5py
import
numpy
as
np
from
abc
import
ABCMeta
,
abstractmethod
from
io
import
open
class
ArchiveSection
(
metaclass
=
ABCMeta
):
class
ArchiveSection
(
object
):
__metaclass__
=
ABCMeta
"""
Defines a storage independent, dictionary like interface to a section
inside an archive file with the possibility to do recursive searches and
indexing.
...
...
@@ -404,7 +409,7 @@ class ArchiveHDF5(Archive):
datasets.
"""
def
__init__
(
self
,
filepath
,
use_write_cache
=
False
):
super
().
__init__
(
filepath
,
use_write_cache
)
super
(
ArchiveHDF5
,
self
).
__init__
(
filepath
,
use_write_cache
)
h5_root
=
h5py
.
File
(
filepath
,
"
r
"
)
self
.
index_cache
=
{}
self
.
setup
(
h5_root
)
...
...
@@ -434,7 +439,7 @@ class ArchiveSectionHDF5(ArchiveSection):
BASE64DIGITS
=
string
.
ascii_uppercase
+
string
.
ascii_lowercase
+
string
.
digits
+
"
+
"
+
"
/
"
def
__init__
(
self
,
data
,
path
,
archive
,
index_datas
,
local_index
):
super
().
__init__
(
data
,
path
,
archive
)
super
(
ArchiveSectionHDF5
,
self
).
__init__
(
data
,
path
,
archive
)
_
,
names
,
indices
=
self
.
get_path_parts
(
path
)
# Here we drop out the indices of the repository and calculation
# section, as they are "None"
...
...
@@ -641,37 +646,58 @@ class ArchiveSectionHDF5(ArchiveSection):
"
Could not find value at path
'
{}
'
.
"
.
format
(
child_path
)
)
if
test_index
.
size
>
1
:
raise
ValueError
(
"
The HDF file contains more than one dataset for the
"
"
path
'
{}
'
.
"
.
format
(
child_path
)
)
index_row
=
index_data
[
test_index
[
0
]]
# This error is currently disabled, because it seems that the
# metainfo system supports repeating scalar values for one section.
# if test_index.size > 1:
# raise ValueError(
# "The HDF file contains more than one dataset for the "
# "path '{}'. "
# .format(child_path)
# )
index_rows
=
index_data
[
test_index
]
# If the value can have multiple shapes, the values are split into
# different tables. For each table there is a local index in the
# second column of the index table that we must use.
if
index_row
.
shape
!=
(
1
,):
data_index
=
index_row
[
1
]
else
:
data_index
=
test_index
[
0
]
data
=
[]
for
index_row
in
index_rows
:
if
index_row
.
shape
!=
(
1
,):
data_index
=
index_row
[
1
]
else
:
data_index
=
test_index
[
0
]
# The data name may depend on the shape, and if so, the
# shape is appended to the name as base64 fields
data_path
=
name
+
"
-v
"
index_shape
=
index_data
.
shape
if
index_shape
[
1
]
>
2
:
# The data name may depend on the shape, and if so, the
# shape is appended to the name as base64 fields
data_path
=
name
+
"
-v
"
for
dim
in
index_data
[
data_index
][
2
:]:
base64dim
=
self
.
base64convert
(
dim
)
data_path
+=
"
.{}
"
.
format
(
base64dim
)
data
=
self
.
_data
[
data_path
][
data_index
]
index_shape
=
index_data
.
shape
if
index_shape
[
1
]
>
2
:
data_path
=
name
+
"
-v
"
for
dim
in
index_data
[
data_index
][
2
:]:
base64dim
=
self
.
base64convert
(
dim
)
data_path
+=
"
.{}
"
.
format
(
base64dim
)
i_data
=
self
.
_data
[
data_path
][
data_index
]
# Convert bytestrings to regular strings
if
i_data
.
dtype
==
np
.
object
:
i_data
=
np
.
array
([
x
.
decode
(
"
utf-8
"
)
for
x
in
i_data
])
# Gather scalar values to a 1D list
if
i_data
.
shape
==
(
1
,):
data
.
append
(
i_data
[
0
])
else
:
data
.
append
(
i_data
)
# Convert bytestrings to regular strings
if
data
.
dtype
==
np
.
object
:
data
=
np
.
array
(
data
,
dtype
=
np
.
str
)
# If one object returned, remove the outermost list
if
len
(
index_rows
)
==
1
:
if
data
[
0
].
shape
==
():
data
=
np
.
array
([
data
[
0
]])
else
:
data
=
data
[
0
]
else
:
data
=
np
.
array
(
data
)
return
data
...
...
@@ -687,7 +713,7 @@ class ArchiveSectionHDF5(ArchiveSection):
digits
.
append
(
ArchiveSectionHDF5
.
BASE64DIGITS
[
x
%
base
])
x
=
math
.
floor
(
x
/
base
)
return
''
.
join
(
digits
)
return
""
.
join
(
digits
)
class
ArchiveJSON
(
Archive
):
...
...
@@ -698,7 +724,7 @@ class ArchiveJSON(Archive):
become a problem with big files and parallel execution on the same machine.
"""
def
__init__
(
self
,
filepath
,
use_write_cache
=
False
):
super
().
__init__
(
filepath
,
use_write_cache
)
super
(
ArchiveJSON
,
self
).
__init__
(
filepath
,
use_write_cache
)
with
open
(
filepath
,
"
r
"
)
as
fin
:
json_root
=
json
.
load
(
fin
)
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment