calc.py 7.43 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
# Copyright 2018 Markus Scheidgen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import List
import json
from sqlalchemy import Column, Integer, String, ForeignKey
18
19
from sqlalchemy.orm import relationship, aliased
from sqlalchemy.sql.expression import literal
20
21

from nomad import infrastructure, datamodel
22
from nomad.datamodel import CalcWithMetadata
23

24
from . import base
25
26
from .user import User
from .base import Base, calc_citation_association, ownership, co_authorship, shareship, \
27
    Tag, Topics, CalcSet, calc_dataset_containment, Citation
28
29
30
31
32


class Calc(Base, datamodel.Calc):  # type: ignore
    __tablename__ = 'calculations'

33
    coe_calc_id = Column('calc_id', Integer, primary_key=True, autoincrement=True)
34
35
36
37
    origin_id = Column(Integer, ForeignKey('uploads.upload_id'))
    upload = relationship('Upload')
    checksum = Column(String)

38
39
    calc_metadata = relationship('CalcMetaData', uselist=False, lazy='joined')
    user_metadata = relationship('UserMetaData', uselist=False, lazy='joined')
40
41
42
43
    citations = relationship('Citation', secondary=calc_citation_association, lazy='joined')
    owners = relationship('User', secondary=ownership, lazy='joined')
    coauthors = relationship('User', secondary=co_authorship, lazy='joined')
    shared_with = relationship('User', secondary=shareship, lazy='joined')
44
    tags = relationship('Tag', lazy='subquery', join_depth=1)
45
    spacegroup = relationship('Spacegroup', lazy='joined', uselist=False)
46
47
48
49

    parents = relationship(
        'Calc',
        secondary=calc_dataset_containment,
50
51
        primaryjoin=calc_dataset_containment.c.children_calc_id == coe_calc_id,
        secondaryjoin=calc_dataset_containment.c.parent_calc_id == coe_calc_id,
52
        backref='children', lazy='subquery', join_depth=1)
53
54

    @classmethod
55
    def load_from(cls, obj):
56
        repo_db = infrastructure.repository_db
57
        return repo_db.query(Calc).filter_by(coe_calc_id=int(obj.pid)).first()
58
59
60

    @property
    def mainfile(self) -> str:
61
        return self.calc_metadata.location
62
63
64

    @property
    def pid(self):
65
        return self.coe_calc_id
66
67
68

    @property
    def comment(self) -> str:
69
        return self.user_metadata.label
70
71

    @property
72
    def calc_id(self) -> str:
73
74
75
76
77
78
79
80
        return self.checksum

    @property
    def references(self) -> List[str]:
        return list(citation.value for citation in self.citations if citation.kind == 'EXTERNAL')

    @property
    def uploader(self) -> User:
81
        assert len(self.owners) == 1, 'A calculation must have exactly one owner.'
82
83
84
85
        return self.owners[0]

    @property
    def with_embargo(self) -> bool:
86
        return self.user_metadata.permission == 1
87
88
89

    @property
    def chemical_formula(self) -> str:
90
        return self.calc_metadata.chemical_formula
91
92
93

    @property
    def filenames(self) -> List[str]:
94
        filenames = self.calc_metadata.filenames.decode('utf-8')
95
96
        return json.loads(filenames)

97
    @property
98
    def all_datasets(self) -> List['DataSet']:
99
        assert self.coe_calc_id is not None
100
        repo_db = infrastructure.repository_db
101
        query = repo_db.query(literal(self.coe_calc_id).label('coe_calc_id')).cte(recursive=True)
102
103
104
        right = aliased(query)
        left = aliased(CalcSet)
        query = query.union_all(repo_db.query(left.parent_calc_id).join(
105
            right, right.c.coe_calc_id == left.children_calc_id))
106
        query = repo_db.query(query)
107
        dataset_calc_ids = list(r[0] for r in query if not r[0] == self.coe_calc_id)
108
        if len(dataset_calc_ids) > 0:
109
            return [
110
                DataSet(dataset_calc)
111
                for dataset_calc in repo_db.query(Calc).filter(Calc.coe_calc_id.in_(dataset_calc_ids))]
112
113
114
        else:
            return []

115
116
117
118
    @property
    def direct_datasets(self) -> List['DataSet']:
        return [DataSet(dataset_calc) for dataset_calc in self.parents]

119
120
121
122
123
124
125
126
127
128
129
130
    def set_value(self, topic_cid: int, value: str) -> None:
        if value is None:
            return

        repo_db = infrastructure.repository_db
        topic = repo_db.query(Topics).filter_by(topic=value).first()
        if not topic:
            topic = Topics(cid=topic_cid, topic=value)
            repo_db.add(topic)

        tag = Tag(calc=self, topic=topic)
        repo_db.add(tag)
131

132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
    _dataset_cache: dict = {}

    def to_calc_with_metadata(self):
        result = CalcWithMetadata(
            upload_id=self.upload.upload_id if self.upload else None,
            calc_id=self.calc_id)

        for topic in [tag.topic for tag in self.tags]:
            if topic.cid == base.topic_code:
                result.program_name = topic.topic
            elif topic.cid == base.topic_basis_set_type:
                result.basis_set_type = topic.topic
            elif topic.cid == base.topic_xc_treatment:
                result.XC_functional_name = topic.topic
            elif topic.cid == base.topic_system_type:
                result.system_type = topic.topic
            elif topic.cid == base.topic_atoms:
149
                result.setdefault('atom_labels', []).append(topic.topic)
150
151
            elif topic.cid == base.topic_crystal_system:
                result.crystal_system = topic.topic
Markus Scheidgen's avatar
Markus Scheidgen committed
152
153
154
            elif topic.cid in [1996, 1994, 703, 702, 701, 100]:
                # user/author, restriction, formulas?, another category
                pass 
155
156
157
158
159
160
            else:
                raise KeyError('topic cid %s.' % str(topic.cid))

        result.program_version = self.calc_metadata.version.content
        result.chemical_composition = self.calc_metadata.chemical_formula
        result.space_group_number = self.spacegroup.n
161
        result.setdefault('atom_labels', []).sort()
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183

        datasets: List[DataSet] = []
        for parent in self.parents:
            parents = Calc._dataset_cache.get(parent, None)
            if parents is None:
                parents = parent.all_datasets
                Calc._dataset_cache[parent] = parents
            datasets.append(DataSet(parent))
            datasets.extend(parents)

        result.pid = self.pid
        result.uploader = self.uploader.user_id
        result.upload_time = self.calc_metadata.added
        result.datasets = list(
            dict(id=ds.id, dois=ds.dois, name=ds.name)
            for ds in datasets)
        result.with_embargo = self.with_embargo
        result.comment = self.comment
        result.references = self.references
        result.coauthors = list(user.user_id for user in self.coauthors)
        result.shared_with = list(user.user_id for user in self.shared_with)

184
        return result
185
186
187
188


CalcWithMetadata.register_mapping(Calc, Calc.to_calc_with_metadata)

189
190
191
192
193
194
195

class DataSet:
    def __init__(self, dataset_calc: Calc) -> None:
        self._dataset_calc = dataset_calc

    @property
    def id(self):
196
        return self._dataset_calc.coe_calc_id
197
198
199

    @property
    def dois(self) -> List[Citation]:
200
        return list(citation.value for citation in self._dataset_calc.citations if citation.kind == 'INTERNAL')
201
202
203

    @property
    def name(self):
204
        return self._dataset_calc.calc_metadata.chemical_formula