Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
nomad-lab
nomad-FAIR
Commits
e26e3610
Commit
e26e3610
authored
Feb 10, 2019
by
Markus Scheidgen
Browse files
Refactored datemodel.
parent
1bd83d7f
Pipeline
#43564
failed with stages
in 18 minutes and 15 seconds
Changes
23
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
docs/datamodel_dataflow.png
0 → 100644
View file @
e26e3610
69.9 KB
docs/datamodel_transformations.png
0 → 100644
View file @
e26e3610
16.9 KB
docs/dev_guidelines.rst
View file @
e26e3610
...
...
@@ -90,6 +90,9 @@ Terms:
- repo entry: Some quantities of a calculation that are used to represent that calculation in the repository.
- archive data: The normalized data of one calculation in nomad's meta-info-based format.
.. _id-reference-label:
Ids
---
...
...
docs/introduction.md
View file @
e26e3610
...
...
@@ -189,6 +189,27 @@ the *archive data* (a hierarchy of all parsed quantities), and the uploaded *raw
-
Materials aggregate calculations based on common system properties
(e.g. system type, atoms, lattice, space group, etc.).
### Data
We distinguish various forms of calculation data:
-
raw data: The raw files provided by nomad users
-
archive data: The data extracted from raw files by nomad parsers and normalizers.
This data is represented in the
*meta-info*
format.
-
materials data: Aggregated information about calculations that simulated the
*same*
material.
### Metadata
Metadata refers to those pieces of data, those quantities/attributes that we use
to represent, identify, and index uploads and calculations in the API, search, GUI, etc.
There are three catergories of metadata:
-
ids: attributes that are necessary to uniquely identify entities. See also :ref:
`id-reference-label`
.
-
user metadata: attributes provided by the user, e.g. comments, references, coauthors, datasets, etc.
-
calculation metadata: metadata parsed from raw files that describe calculations on a high level, e.g. code name, basis set, system type, etc.
Those sets of metadata along with the actual raw and archive data are often transformed,
passed, stored, etc. by the various nomad modules.
.. figure:: datamodel_dataflow.png
:alt: nomad's data flow
### Implementation
The different entities have often multiple implementations for different storage systems.
For example, aspects of calculations are stored in files (raw files, calc metadata, archive data),
...
...
docs/reference.rst
View file @
e26e3610
...
...
@@ -52,4 +52,8 @@ nomad.client
nomad.utils
-----------
.. automodule:: nomad.utils
\ No newline at end of file
.. automodule:: nomad.utils
nomad.migration
---------------
.. automodule:: nomad.migration
gui/src/components/RepoCalcView.js
View file @
e26e3610
...
...
@@ -67,34 +67,13 @@ class RepoCalcView extends React.Component {
})
}
data
(
quantity
)
{
const
path
=
quantity
.
split
(
'
.
'
)
let
data
=
this
.
state
.
calcData
for
(
let
i
=
0
;
i
<
path
.
length
;
i
++
)
{
if
(
data
)
{
data
=
data
[
path
[
i
]]
}
}
return
data
}
renderQuantity
(
quantity
,
label
,
defaultValue
)
{
const
value
=
this
.
data
(
quantity
)
||
defaultValue
||
''
return
(
<
div
key
=
{
quantity
}
>
<
Typography
variant
=
"
caption
"
>
{
label
}
<
/Typography
>
<
Typography
variant
=
"
body1
"
>
{
value
}
<
/Typography
>
<
/div
>
)
}
render
()
{
const
{
classes
,
...
calcProps
}
=
this
.
props
const
{
uploadId
,
calcId
}
=
calcProps
const
calcData
=
this
.
state
.
calcData
||
{}
const
filePaths
=
this
.
data
(
'
section_repository_info.repository_filepaths
'
)
||
[]
const
mainfile
=
this
.
data
(
'
section_calculation_info
.main
_
file
'
)
const
filePaths
=
calcData
.
files
||
[]
const
mainfile
=
calcData
.
mainfile
const
calcPath
=
mainfile
?
mainfile
.
substring
(
0
,
mainfile
.
lastIndexOf
(
'
/
'
))
:
null
return
(
...
...
@@ -111,48 +90,48 @@ class RepoCalcView extends React.Component {
<
/Download
>
<
div
className
=
{
classes
.
quantityRow
}
>
<
CalcQuantity
label
=
"
chemical formula
"
typography
=
"
h4
"
>
{
this
.
data
(
'
section_repository_info.section_repository_parserdata.repository_chemical_
formula
'
)
}
{
calcData
.
formula
}
<
/CalcQuantity
>
<
/div
>
<
div
className
=
{
classes
.
quantityRow
}
>
<
CalcQuantity
label
=
'
dft code
'
>
{
this
.
data
(
'
section_repository_info.section_repository_parserdata.repository_program
_name
'
)
}
{
calcData
.
code
_name
}
<
/CalcQuantity
>
<
CalcQuantity
label
=
'
dft code version
'
>
{
this
.
data
(
'
section_repository_info.section_repository_parserdata.repository_
code_version
'
)
}
{
calcData
.
code_version
}
<
/CalcQuantity
>
<
/div
>
<
div
className
=
{
classes
.
quantityRow
}
>
<
CalcQuantity
label
=
'
basis set
'
>
{
this
.
data
(
'
section_repository_info.section_repository_parserdata.repository_
basis_set
_type
'
)
}
{
calcData
.
basis_set
}
<
/CalcQuantity
>
<
CalcQuantity
label
=
'
xc functional
'
>
{
this
.
data
(
'
section_repository_info.section_repository_parserdata.repository_xc_treatment
'
)
}
{
calcData
.
xc_functional
}
<
/CalcQuantity
>
<
/div
>
<
div
className
=
{
classes
.
quantityRow
}
>
<
CalcQuantity
label
=
'
system type
'
>
{
this
.
data
(
'
section_repository_info.section_repository_parserdata.repository_system_type
'
)
}
{
calcData
.
system
}
<
/CalcQuantity
>
<
CalcQuantity
label
=
'
crystal system
'
>
{
this
.
data
(
'
section_repository_info.section_repository_parserdata.repository_
crystal_system
'
)
}
{
calcData
.
crystal_system
}
<
/CalcQuantity
>
<
CalcQuantity
label
=
'
spacegroup
'
>
{
this
.
data
(
'
section_repository_info.section_repository_parserdata.repository_
spacegroup
_nr
'
)
}
{
calcData
.
spacegroup
}
<
/CalcQuantity
>
<
/div
>
<
div
className
=
{
classes
.
quantityRow
}
>
<
CalcQuantity
label
=
'
upload id
'
>
{
this
.
data
(
'
section_calculation_info
.upload_id
'
)
}
{
calcData
.
upload_id
}
<
/CalcQuantity
>
<
CalcQuantity
label
=
'
calculation id
'
>
{
this
.
data
(
'
section_calculation_info
.calc_id
'
)
}
{
calcData
.
calc_id
}
<
/CalcQuantity
>
<
CalcQuantity
label
=
'
mainfile
'
>
{
mainfile
}
<
/CalcQuantity
>
<
CalcQuantity
label
=
'
calculation hash
'
>
{
this
.
data
(
'
section_calculation_info
.calc_hash
'
)
}
{
calcData
.
calc_hash
}
<
/CalcQuantity
>
<
/div
>
<
Divider
/>
...
...
nomad/api/upload.py
View file @
e26e3610
...
...
@@ -51,20 +51,26 @@ proc_model = api.model('Processing', {
'process_running'
:
fields
.
Boolean
,
})
dataset_model
=
api
.
model
(
'DataSet'
,
{
'id'
:
fields
.
Integer
(
required
=
True
,
description
=
'The repository db dataset id'
),
'_doi'
:
fields
.
String
(
description
=
'The DOI of the dataset'
),
'_name'
:
fields
.
String
(
description
=
'The unique dataset name'
)
})
metadata_model
=
api
.
model
(
'MetaData'
,
{
'with_embargo'
:
fields
.
Boolean
(
default
=
False
,
description
=
'Data with embargo is only visible to the upload until the embargo period ended.'
),
'comment'
:
fields
.
String
(
description
=
'The comment are shown in the repository for each calculation.'
),
'references'
:
fields
.
List
(
fields
.
String
,
descriptions
=
'References allow to link calculations to external source, e.g. URLs.'
),
'coauthors'
:
fields
.
List
(
fields
.
String
,
description
=
'A list of co-authors given by user_id.'
),
'shared_with'
:
fields
.
List
(
fields
.
String
,
description
=
'A list of users to share calculations with given by user_id.'
),
'coauthors'
:
fields
.
List
(
fields
.
Integer
,
description
=
'A list of co-authors given by user_id.'
),
'shared_with'
:
fields
.
List
(
fields
.
Integer
,
description
=
'A list of users to share calculations with given by user_id.'
),
'_upload_time'
:
fields
.
DateTime
(
dt_format
=
'iso8601'
,
description
=
'Overrride the upload time.'
),
'_uploader'
:
fields
.
String
(
description
=
'Override the uploader with the given user id.'
)
'_uploader'
:
fields
.
Integer
(
description
=
'Override the uploader with the given user id.'
),
'datasets'
:
fields
.
List
(
fields
.
Nested
(
model
=
dataset_model
),
description
=
'A list of datasets.'
)
})
calc_metadata_model
=
api
.
inherit
(
'CalcMetaData'
,
metadata_model
,
{
'mainfile'
:
fields
.
String
(
description
=
'The calculation main output file is used to identify the calculation in the upload.'
),
'_checksum'
:
fields
.
String
(
description
=
'Override the calculation checksum'
),
'_pid'
:
fields
.
String
(
description
=
'Assign a specific pid. It must be unique.'
)
'_pid'
:
fields
.
Integer
(
description
=
'Assign a specific pid. It must be unique.'
)
})
upload_metadata_model
=
api
.
inherit
(
'UploadMetaData'
,
metadata_model
,
{
...
...
nomad/coe_repo/base.py
View file @
e26e3610
...
...
@@ -24,6 +24,8 @@ from sqlalchemy.ext.declarative import declarative_base
from
sqlalchemy.dialects.postgresql
import
BYTEA
from
sqlalchemy.ext.declarative
import
declarative_base
from
nomad
import
utils
Base
=
declarative_base
()
...
...
@@ -140,5 +142,5 @@ class Citation(Base): # type: ignore
value
=
Column
(
String
)
kind
=
Column
(
Enum
(
'INTERNAL'
,
'EXTERNAL'
,
name
=
'citation_kind_enum'
))
def
to_
dict
(
self
)
->
dict
:
return
dict
(
id
=
self
.
citation_id
,
value
=
self
.
value
)
def
to_
popo
(
self
)
->
utils
.
POPO
:
return
utils
.
POPO
(
id
=
self
.
citation_id
,
value
=
self
.
value
)
nomad/coe_repo/calc.py
View file @
e26e3610
...
...
@@ -18,16 +18,17 @@ from sqlalchemy import Column, Integer, String, ForeignKey
from
sqlalchemy.orm
import
relationship
,
aliased
from
sqlalchemy.sql.expression
import
literal
from
nomad
import
infrastructure
,
datamodel
from
nomad
import
infrastructure
,
utils
from
nomad.datamodel
import
CalcWithMetadata
from
.
import
base
from
.user
import
User
from
.base
import
Base
,
calc_citation_association
,
ownership
,
co_authorship
,
shareship
,
\
Tag
,
Topics
,
CalcSet
,
calc_dataset_containment
,
Citation
Tag
,
Topics
,
CalcSet
,
calc_dataset_containment
,
Citation
,
Spacegroup
,
CalcMetaData
,
\
CodeVersion
,
StructRatio
,
UserMetaData
class
Calc
(
Base
,
datamodel
.
Calc
):
# type: ignore
class
Calc
(
Base
):
__tablename__
=
'calculations'
coe_calc_id
=
Column
(
'calc_id'
,
Integer
,
primary_key
=
True
,
autoincrement
=
True
)
...
...
@@ -61,7 +62,7 @@ class Calc(Base, datamodel.Calc): # type: ignore
return
self
.
calc_metadata
.
location
@
property
def
pid
(
self
):
def
pid
(
self
)
->
int
:
return
self
.
coe_calc_id
@
property
...
...
@@ -86,13 +87,17 @@ class Calc(Base, datamodel.Calc): # type: ignore
return
self
.
user_metadata
.
permission
==
1
@
property
def
chemical_
formula
(
self
)
->
str
:
def
formula
(
self
)
->
str
:
return
self
.
calc_metadata
.
chemical_formula
@
property
def
filenames
(
self
)
->
List
[
str
]:
filenames
=
self
.
calc_metadata
.
filenames
.
decode
(
'utf-8'
)
return
json
.
loads
(
filenames
)
def
files
(
self
)
->
List
[
str
]:
if
self
.
calc_metadata
is
not
None
:
if
self
.
calc_metadata
.
filenames
is
not
None
:
filenames
=
self
.
calc_metadata
.
filenames
.
decode
(
'utf-8'
)
return
json
.
loads
(
filenames
)
return
[]
@
property
def
all_datasets
(
self
)
->
List
[
'DataSet'
]:
...
...
@@ -116,7 +121,7 @@ class Calc(Base, datamodel.Calc): # type: ignore
def
direct_datasets
(
self
)
->
List
[
'DataSet'
]:
return
[
DataSet
(
dataset_calc
)
for
dataset_calc
in
self
.
parents
]
def
set_value
(
self
,
topic_cid
:
int
,
value
:
str
)
->
None
:
def
_
set_value
(
self
,
topic_cid
:
int
,
value
:
str
)
->
None
:
if
value
is
None
:
return
...
...
@@ -131,24 +136,129 @@ class Calc(Base, datamodel.Calc): # type: ignore
_dataset_cache
:
dict
=
{}
def
to_calc_with_metadata
(
self
):
def
apply_calc_with_metadata
(
self
,
calc
:
CalcWithMetadata
)
->
None
:
"""
Applies the data from ``source`` to this coe Calc object.
"""
repo_db
=
infrastructure
.
repository_db
self
.
checksum
=
calc
.
calc_id
source_code_version
=
calc
.
code_version
# TODO shorten version names
code_version_obj
=
repo_db
.
query
(
CodeVersion
).
filter_by
(
content
=
source_code_version
).
first
()
if
code_version_obj
is
None
:
code_version_obj
=
CodeVersion
(
content
=
source_code_version
)
repo_db
.
add
(
code_version_obj
)
metadata
=
CalcMetaData
(
calc
=
self
,
added
=
calc
.
upload_time
if
calc
.
upload_time
is
not
None
else
self
.
upload
.
upload_time
,
chemical_formula
=
calc
.
formula
,
filenames
=
(
'[%s]'
%
','
.
join
([
'"%s"'
%
filename
for
filename
in
calc
.
files
])).
encode
(
'utf-8'
),
location
=
calc
.
mainfile
,
version
=
code_version_obj
)
repo_db
.
add
(
metadata
)
struct_ratio
=
StructRatio
(
calc
=
self
,
chemical_formula
=
calc
.
formula
,
formula_units
=
1
,
nelem
=
len
(
calc
.
atoms
))
repo_db
.
add
(
struct_ratio
)
user_metadata
=
UserMetaData
(
calc
=
self
,
label
=
calc
.
comment
,
permission
=
(
1
if
calc
.
with_embargo
else
0
))
repo_db
.
add
(
user_metadata
)
spacegroup
=
Spacegroup
(
calc
=
self
,
n
=
calc
.
spacegroup
)
repo_db
.
add
(
spacegroup
)
# topic based properties
self
.
_set_value
(
base
.
topic_code
,
calc
.
code_name
)
for
atom
in
set
(
calc
.
atoms
):
self
.
_set_value
(
base
.
topic_atoms
,
str
(
atom
))
self
.
_set_value
(
base
.
topic_system_type
,
calc
.
system
)
self
.
_set_value
(
base
.
topic_xc_treatment
,
calc
.
xc_functional
)
self
.
_set_value
(
base
.
topic_crystal_system
,
calc
.
crystal_system
)
self
.
_set_value
(
base
.
topic_basis_set_type
,
calc
.
basis_set
)
# user relations
if
calc
.
uploader
is
not
None
:
uploader
=
repo_db
.
query
(
User
).
get
(
calc
.
uploader
.
id
)
else
:
uploader
=
self
.
upload
.
user
self
.
owners
.
append
(
uploader
)
for
coauthor
in
calc
.
coauthors
:
self
.
coauthors
.
append
(
repo_db
.
query
(
User
).
get
(
coauthor
.
id
))
for
shared_with
in
calc
.
shared_with
:
self
.
shared_with
.
append
(
repo_db
.
query
(
User
).
get
(
shared_with
.
id
))
# datasets
for
dataset
in
calc
.
datasets
:
dataset_id
=
dataset
.
id
coe_dataset
=
repo_db
.
query
(
Calc
).
get
(
dataset_id
)
if
coe_dataset
is
None
:
coe_dataset
=
Calc
(
coe_calc_id
=
dataset_id
)
repo_db
.
add
(
coe_dataset
)
metadata
=
CalcMetaData
(
calc
=
coe_dataset
,
added
=
self
.
upload
.
upload_time
,
chemical_formula
=
dataset
.
name
)
repo_db
.
add
(
metadata
)
if
dataset
.
doi
is
not
None
:
self
.
_add_citation
(
coe_dataset
,
dataset
.
doi
[
'value'
],
'INTERNAL'
)
# cause a flush to avoid future inconsistencies
coe_dataset
=
repo_db
.
query
(
Calc
).
get
(
dataset_id
)
dataset
=
CalcSet
(
parent_calc_id
=
dataset_id
,
children_calc_id
=
self
.
coe_calc_id
)
repo_db
.
add
(
dataset
)
# references
for
reference
in
calc
.
references
:
self
.
_add_citation
(
self
,
reference
[
'value'
],
'EXTERNAL'
)
def
_add_citation
(
self
,
coe_calc
:
'Calc'
,
value
:
str
,
kind
:
str
)
->
None
:
repo_db
=
infrastructure
.
repository_db
citation
=
repo_db
.
query
(
Citation
).
filter_by
(
value
=
value
,
kind
=
kind
).
first
()
if
citation
is
None
:
citation
=
Citation
(
value
=
value
,
kind
=
kind
)
repo_db
.
add
(
citation
)
coe_calc
.
citations
.
append
(
citation
)
def
to_calc_with_metadata
(
self
)
->
CalcWithMetadata
:
"""
Creates a :class:`CalcWithMetadata` instance with UCPM ids, and all UMD/CMD.
Be aware that ``upload_id`` and ``calc_id``, might be old coe repository
``upload_name`` and calculation ``checksum`` depending on the context, i.e. used
database.
"""
result
=
CalcWithMetadata
(
upload_id
=
self
.
upload
.
upload_id
if
self
.
upload
else
None
,
calc_id
=
self
.
c
alc_id
)
calc_id
=
self
.
c
hecksum
)
result
.
calc_hash
=
self
.
checksum
result
.
pid
=
self
.
pid
result
.
mainfile
=
self
.
mainfile
result
.
files
=
self
.
files
for
topic
in
[
tag
.
topic
for
tag
in
self
.
tags
]:
if
topic
.
cid
==
base
.
topic_code
:
result
.
program
_name
=
topic
.
topic
result
.
code
_name
=
topic
.
topic
elif
topic
.
cid
==
base
.
topic_basis_set_type
:
result
.
basis_set
_type
=
topic
.
topic
result
.
basis_set
=
topic
.
topic
elif
topic
.
cid
==
base
.
topic_xc_treatment
:
result
.
XC
_functional
_name
=
topic
.
topic
result
.
xc
_functional
=
topic
.
topic
elif
topic
.
cid
==
base
.
topic_system_type
:
result
.
system
_type
=
topic
.
topic
result
.
system
=
topic
.
topic
elif
topic
.
cid
==
base
.
topic_atoms
:
result
.
setdefault
(
'atom_labels'
,
[])
.
append
(
topic
.
topic
)
result
.
atoms
.
append
(
topic
.
topic
)
elif
topic
.
cid
==
base
.
topic_crystal_system
:
result
.
crystal_system
=
topic
.
topic
elif
topic
.
cid
in
[
1996
,
1994
,
703
,
702
,
701
,
100
]:
...
...
@@ -157,10 +267,10 @@ class Calc(Base, datamodel.Calc): # type: ignore
else
:
raise
KeyError
(
'topic cid %s.'
%
str
(
topic
.
cid
))
result
.
program
_version
=
self
.
calc_metadata
.
version
.
content
result
.
chemical_composition
=
self
.
calc_metadata
.
chemical_formula
result
.
space
_
group
_number
=
self
.
spacegroup
.
n
result
.
setdefault
(
'atom_labels'
,
[])
.
sort
()
result
.
code
_version
=
self
.
calc_metadata
.
version
.
content
result
.
formula
=
self
.
calc_metadata
.
chemical_formula
result
.
spacegroup
=
self
.
spacegroup
.
n
result
.
atoms
.
sort
()
datasets
:
List
[
DataSet
]
=
[]
for
parent
in
self
.
parents
:
...
...
@@ -172,25 +282,22 @@ class Calc(Base, datamodel.Calc): # type: ignore
datasets
.
extend
(
parents
)
result
.
pid
=
self
.
pid
result
.
uploader
=
self
.
uploader
.
to_
dict
()
result
.
uploader
=
self
.
uploader
.
to_
popo
()
result
.
upload_time
=
self
.
calc_metadata
.
added
result
.
datasets
=
list
(
dict
(
id
=
ds
.
id
,
doi
s
=
ds
.
doi
s
,
name
=
ds
.
name
)
utils
.
POPO
(
id
=
ds
.
id
,
doi
=
ds
.
doi
.
to_popo
()
,
name
=
ds
.
name
)
for
ds
in
datasets
)
result
.
with_embargo
=
self
.
with_embargo
result
.
comment
=
self
.
comment
result
.
references
=
list
(
citation
.
to_
dict
()
for
citation
in
self
.
citations
citation
.
to_
popo
()
for
citation
in
self
.
citations
if
citation
.
kind
==
'EXTERNAL'
)
result
.
coauthors
=
list
(
user
.
to_
dict
()
for
user
in
self
.
coauthors
)
result
.
shared_with
=
list
(
user
.
to_
dict
()
for
user
in
self
.
shared_with
)
result
.
coauthors
=
list
(
user
.
to_
popo
()
for
user
in
self
.
coauthors
)
result
.
shared_with
=
list
(
user
.
to_
popo
()
for
user
in
self
.
shared_with
)
return
result
CalcWithMetadata
.
register_mapping
(
Calc
,
Calc
.
to_calc_with_metadata
)
class
DataSet
:
def
__init__
(
self
,
dataset_calc
:
Calc
)
->
None
:
self
.
_dataset_calc
=
dataset_calc
...
...
@@ -200,10 +307,15 @@ class DataSet:
return
self
.
_dataset_calc
.
coe_calc_id
@
property
def
dois
(
self
)
->
List
[
Citation
]:
return
list
(
citation
.
to_dict
()
for
citation
in
self
.
_dataset_calc
.
citations
if
citation
.
kind
==
'INTERNAL'
)
def
doi
(
self
)
->
Citation
:
doi
=
None
for
citation
in
self
.
_dataset_calc
.
citations
:
if
citation
.
kind
==
'INTERNAL'
:
if
doi
is
not
None
:
utils
.
get_logger
(
__name__
).
warning
(
'dataset with multiple dois'
,
dataset_id
=
self
.
id
)
doi
=
citation
return
doi
@
property
def
name
(
self
):
...
...
nomad/coe_repo/upload.py
View file @
e26e3610
...
...
@@ -47,14 +47,12 @@ import datetime
from
sqlalchemy
import
Column
,
Integer
,
String
,
Boolean
,
DateTime
,
ForeignKey
from
sqlalchemy.orm
import
relationship
from
nomad
import
utils
,
infrastructure
,
datamodel
from
nomad.datamodel
import
Calc
WithMetadata
from
nomad
import
utils
,
infrastructure
from
nomad.datamodel
import
Upload
WithMetadata
from
.
import
base
from
.user
import
User
from
.calc
import
Calc
from
.base
import
Base
,
CalcMetaData
,
UserMetaData
,
StructRatio
,
CodeVersion
,
Spacegroup
,
\
CalcSet
,
Citation
from
.base
import
Base
from
.user
import
User
class
UploadMetaData
:
...
...
@@ -79,7 +77,7 @@ class UploadMetaData:
return
self
.
_calc_data
.
get
(
mainfile
,
self
.
_upload_data
)
class
Upload
(
Base
,
datamodel
.
Upload
):
# type: ignore
class
Upload
(
Base
):
# type: ignore
__tablename__
=
'uploads'
coe_upload_id
=
Column
(
'upload_id'
,
Integer
,
primary_key
=
True
,
autoincrement
=
True
)
...
...
@@ -91,10 +89,6 @@ class Upload(Base, datamodel.Upload): # type: ignore
user
=
relationship
(
'User'
)
calcs
=
relationship
(
'Calc'
)
@
classmethod
def
load_from
(
cls
,
obj
):
return
Upload
.
from_upload_id
(
str
(
obj
.
upload_id
))
@
staticmethod
def
from_upload_id
(
upload_id
:
str
)
->
'Upload'
:
repo_db
=
infrastructure
.
repository_db
...
...
@@ -107,7 +101,7 @@ class Upload(Base, datamodel.Upload): # type: ignore
return
self
.
upload_name
@
property
def
uploader
(
self
)
->
'
User
'
:
def
uploader
(
self
)
->
User
:
return
self
.
user
@
property
...
...
@@ -115,21 +109,15 @@ class Upload(Base, datamodel.Upload): # type: ignore
return
self
.
created
@
staticmethod
def
add
(
upload
:
datamodel
.
Upload
,
m
etadata
:
dict
=
{}
)
->
int
:
def
add
(
upload
:
Upload
WithM
etadata
)
->
int
:
"""
Add the upload to the NOMAD-coe repository db. It creates an
uploads-entry, respective calculation and property entries. Everything in one
transaction.
Triggers and updates the NOMAD-coe repository elastic search index after
success (TODO).
Arguments:
upload: The upload to add.
upload_metadata: A dictionary with additional meta data (e.g. user provided
meta data) that should be added to upload and calculations.
upload: The upload to add, including calculations with respective IDs, UMD, CMD.
"""
upload_metadata
=
UploadMetaData
(
metadata
)
assert
upload
.
uploader
is
not
None
repo_db
=
infrastructure
.
repository_db
...
...
@@ -143,20 +131,24 @@ class Upload(Base, datamodel.Upload): # type: ignore
# create upload
coe_upload
=
Upload
(
upload_name
=
upload
.
upload_id
,
created
=
metadata
.
get
(
'_upload_time'
,
upload
.
upload_time
)
,
user
=
upload
.
uploader
,
created
=
upload
.
upload_time
,
user
_id
=
upload
.
uploader
.
id
,
is_processed
=
True
)
repo_db
.
add
(
coe_upload
)
# add calculations and metadata
calcs
=
[]
has_
calcs
=
False
for
calc
in
upload
.
calcs
:
calcs
.
append
(
coe_upload
.
_add_calculation
(
calc
.
to
(
CalcWithMetadata
),
upload_metadata
.
get
(
calc
.
mainfile
)))
has_calcs
=
True
coe_calc
=
Calc
(
coe_calc_id
=
calc
.
pid
,
checksum
=
calc
.
calc_id
,
upload
=
coe_upload
)
repo_db
.
add
(
coe_calc
)
coe_calc
.
apply_calc_with_metadata
(
calc
)
# commit
if
len
(
calcs
)
>
0
:
if
has_
calcs
:
# empty upload case
repo_db
.
commit
()
result
=
coe_upload
.
coe_upload_id
...
...
@@ -167,114 +159,4 @@ class Upload(Base, datamodel.Upload): # type: ignore
repo_db
.
rollback
()
raise
e
# TODO trigger index update
pass
return
result
def
_add_calculation
(
self
,
calc
:
CalcWithMetadata
,
calc_metadata
:
dict
)
->
Calc
:
repo_db
=
infrastructure
.
repository_db
# table based properties
coe_calc_id
=
calc_metadata
.
get
(
'_pid'
,
None
)
coe_calc
=
Calc
(
coe_calc_id
=
coe_calc_id
,
checksum
=
calc_metadata
.
get
(
'_checksum'
,
calc
.
calc_hash
),
upload
=
self
)
repo_db
.
add
(
coe_calc
)
program_version
=
calc
.
program_version
# TODO shorten version names
code_version
=
repo_db
.
query
(
CodeVersion
).
filter_by
(
content
=
program_version
).
first
()
if
code_version
is
None
:
code_version
=
CodeVersion
(
content
=
program_version
)
repo_db
.
add
(
code_version
)
metadata
=
CalcMetaData
(