parser_octopus.py 13 KB
Newer Older
1
2
3
4
5
6
7
8
from __future__ import print_function
import os
import sys
from glob import glob
from contextlib import contextmanager

import numpy as np

9
import setup_paths
10

11
from nomadcore.local_meta_info import loadJsonFile, InfoKindEl
12
13
14
from nomadcore.parser_backend import JsonParseEventsWriterBackend
from nomadcore.unit_conversion.unit_conversion import convert_unit, \
    register_userdefined_quantity
15

16
from aseoct import Octopus, parse_input_file, kwargs2cell
17
18
from octopus_info_parser import parse_infofile
from octopus_logfile_parser import parse_logfile
19

20
"""This is the Octopus parser.
21

22
23
24
25
26
27
28
29
30
31
32
33
34
It has to parse many files:
 * input file, 'inp' (ASE does this)
 * output file, 'static/info' (SimpleParser)
   - similar file or files for other calculation modes
 * anything written to stdout (this file could have any name) (SimpleParser)
   - program output can be cooking recipes ("parse *all* outputs")
 * geometry input file, if specified in 'inp'
   - ASE takes care of that
 * output density/potential/etc. if written to files
   - cube
   - xcrysden
   - xyz
   - other candidates: vtk, etsf
35

36
37
38
39
40
There are more output formats but they are non-standard, or for
debugging, not commonly used.  We will only implement support for
those if many uploaded calculations contain those formats.  I think it
is largely irrelevant.
"""
41

42
43
44
45
46

def normalize_names(names):
    return [name.lower() for name in names]


47
48
OCT_ENERGY_UNIT_NAME = 'usrOctEnergyUnit'
OCT_LENGTH_UNIT_NAME = 'usrOctLengthUnit'
49

50
metaInfoPath = os.path.normpath(os.path.join(os.path.dirname(os.path.abspath(__file__)),"../../../../nomad-meta-info/meta_info/nomad_meta_info/octopus.nomadmetainfo.json"))
51
52
53
54
55
56
57
58
59
60
61
62
63
metaInfoEnv, warnings = loadJsonFile(filePath=metaInfoPath,
                                     dependencyLoader=None,
                                     extraArgsHandling=InfoKindEl.ADD_EXTRA_ARGS,
                                     uri=None)

# Dictionary of all meta info:
metaInfoKinds = metaInfoEnv.infoKinds.copy()
all_metadata_names = list(metaInfoKinds.keys())
normalized2real = dict(zip(normalize_names(all_metadata_names), all_metadata_names))
# We need access to this information because we want/need to dynamically convert
# extracted metadata to its correct type.  Thus we need to know the type.
# Also since input is case insensitive, we need to convert normalized (lowercase)
# metadata names to their real names which are normally CamelCase.
64

65

66
parser_info = {
67
68
69
70
  "name": "parser_octopus",
  "version": "1.0"
}

71
72
73
74
75
76
77
78
79
80
81
82
83
84
def read_parser_log(path):
    exec_kwargs = {}
    with open(path) as fd:
        for line in fd:
            # Remove comment:
            line = line.split('#', 1)[0].strip()
            tokens = line.split('=')
            try:
                name, value = tokens
            except ValueError:
                continue # Not an assignment
            name = name.strip().lower()
            value = value.strip()

85
86
87
88
            if ' ' in name:
                # Not a real name
                continue
                #print(name)
89
90
91
92
93
94
95
96
            # Octopus questionably writes this one twice
            #if name != 'speciesprojectorspherethreshold':
            exec_kwargs[name] = value
    return exec_kwargs


def read_input_file(path):
    with open(path) as fd:
97
        names, values = parse_input_file(fd)
98
99
100
101
102
103
104
105
    names = normalize_names(names)

    kwargs = {}
    for name, value in zip(names, values):
        kwargs[name] = value
    return kwargs


106
107
108
def is_octopus_logfile(fname):
    fd = open(fname)
    for lineno in range(20):
109
        line = next(fd)
110
111
112
113
114
115
116
117
118
119
120
121
122
123
        if '|0) ~ (0) |' in line:  # Eyes from Octopus logo
            return True
    return False


def find_octopus_logfile(dirname):
    allfnames = glob('%s/*' % dirname)
    for fname in allfnames:
        if os.path.isfile(fname) and is_octopus_logfile(fname):
            return fname
    return None


def override_keywords(kwargs, parser_log_kwargs, fd):
124
125
126
127
128
129
130
131
132
    # Some variables we can get from the input file, but they may
    # contain arithmetic and variable assignments which cannot
    # just be parsed into a final value.  The output of the
    # Octopus parser (exec/parser.log) is reduced to pure numbers,
    # so that is useful, except when the variable was a name (such
    # as an eigensolver).  In that case we should definitely not
    # rely on the number!  We will take some variables from the
    # exec/parser.log but most will just be verbatim from the
    # input file whether they can be parsed or not.
133
134
135
    exec_override_keywords = set(['radius',
                                  #'lsize',
                                  'spacing'])
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154

    outkwargs = kwargs.copy()

    # Now override any relevant keywords:
    for name in exec_override_keywords:
        if name in kwargs:
            if name == 'lsize':
                lsize = []
                for i in range(3):
                    # (This is relatively horrible.  We are looking for
                    # lines like "Lsize (0, 1) = 5.0" or similar)
                    lsize_tmp = 'lsize (0, %d)' % i
                    assert lsize_tmp in parser_log_kwargs
                    lsize.append(parser_log_kwargs[lsize_tmp])
                outkwargs[name] = [lsize]
                continue

            print('Keyword %s with value %s overridden by value '
                  '%s obtained from parser log'
155
156
                  % (name, kwargs[name], parser_log_kwargs[name]),
                  file=fd)
157
158
159
160
161

            outkwargs[name] = parser_log_kwargs[name]
    return outkwargs


162
def register_units(kwargs, fd):
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
    units = kwargs.get('units', 'atomic').lower()
    if units == 'atomic':
        length_unit = 'bohr'
        energy_unit = 'hartree'
    elif units == 'ev_angstrom':
        length_unit = 'angstrom'
        energy_unit = 'eV'
    else:
        raise ValueError('Unknown units: %s' % units)

    if 'unitsinput' in kwargs:
        raise ValueError('UnitsInput not supported')
    if 'unitsoutput' in kwargs:
        raise ValueError('UnitsOutput not supported')

178
179
    print('Set units: energy=%s, length=%s' % (energy_unit, length_unit),
          file=fd)
180
181
182
183
    register_userdefined_quantity(OCT_ENERGY_UNIT_NAME, energy_unit)
    register_userdefined_quantity(OCT_LENGTH_UNIT_NAME, length_unit)


184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
metadata_dtypes = {'b': bool,
                   'C': str,
                   'f': float}  # Integer?


# Convert (<normalized name>, <extracted string>) into
# (<real metadata name>, <value of correct type>)
def regularize_metadata_entry(normalized_name, value):
    realname = normalized2real[normalized_name]
    assert realname in metaInfoEnv, 'No such metadata: %s' % realname
    metainfo = metaInfoEnv[realname]
    dtype = metainfo['dtypeStr']
    converted_value = metadata_dtypes[dtype](value)
    return realname, converted_value


200
def register_octopus_keywords(pew, category, kwargs):
201
    skip = set(['mixingpreconditioner', 'mixinterval'])
202
    for keyword in kwargs:
203
204
        if keyword in skip:  # XXXX
            continue
205
        # How do we get the metadata type?
206
207
208
209
        normalized_name = 'x_octopus_%s_%s' % (category, keyword)
        name, value = regularize_metadata_entry(normalized_name, kwargs[keyword])
        pew.addValue(name, value)

210

211
212
def parse(fname, fd):
    # fname refers to the static/info file.
213
    pew = JsonParseEventsWriterBackend(metaInfoEnv)
214
    pew.startedParsingSession(fname, parser_info)
215
                                       #fileOut=open('json-writer.log', 'w'))
216
217
218
219
220
221
222
223
224
225
226

    # this context manager shamelessly copied from GPAW parser
    # Where should Python code be put if it is used by multiple parsers?
    @contextmanager
    def open_section(name):
        gid = pew.openSection(name)
        yield
        pew.closeSection(name, gid)

    with open_section('section_run'):
        pew.addValue('program_name', 'Octopus')
227
        print(file=fd)
228
229
230
231
232
233

        staticdirname, _basefname = os.path.split(fname)
        dirname, _static = os.path.split(staticdirname)
        inp_path = os.path.join(dirname, 'inp')
        parser_log_path = os.path.join(dirname, 'exec', 'parser.log')

234
235
        print('Read Octopus keywords from input file %s' % inp_path,
              file=fd)
236
        kwargs = read_input_file(inp_path)
237
238
        #with open_section('x_octopus_input'):
        #    register_octopus_keywords(pew, 'input', kwargs)
239
240

        print('Read processed Octopus keywords from octparse logfile %s'
241
              % parser_log_path, file=fd)
242
        parser_log_kwargs = read_parser_log(parser_log_path)
243
        #register_octopus_keywords(pew, 'parserlog', parser_log_kwargs)
244

245
246
        print('Override certain keywords with processed keywords', file=fd)
        kwargs = override_keywords(kwargs, parser_log_kwargs, fd)
247

248
        register_units(kwargs, fd)
249

250
        print('Read as ASE calculator', file=fd)
251
        calc = Octopus(dirname, check_keywords=False)
252
253
254
255
256
257
258
259
260
261
262
263
264
        atoms = calc.get_atoms()

        #with open_section('section_basis_set_cell_dependent'):
            # XXX FIXME spacing can very rarely be 3 numbers!
            # uuh there is no meaningful way to set grid spacing
        #    pass
        cubefiles = glob('staticdirname/*.cube')
        cubefiles.sort()

        nbands = calc.get_number_of_bands()
        nspins = calc.get_number_of_spins()
        nkpts = len(calc.get_k_point_weights())

265
266
        #print('Parse info file using SimpleMatcher', file=fd)
        #parse_infofile(metaInfoEnv, pew, fname)
267

268
269
270
271
272
273
274
        #logfile = find_octopus_logfile(dirname)
        #if logfile is None:
        #    print('No stdout logfile found', file=fd)
        #else:
        #    print('Found stdout logfile %s' % logfile, file=fd)
        #    print('Parse logfile using SimpleMatcher', file=fd)
        #    parse_logfile(metaInfoEnv, pew, logfile)
275
276

        print('Add parsed values', file=fd)
277
        with open_section('section_system'):
278
279
280
            # The Atoms object will always have a cell, even if it was not
            # used in the Octopus calculation!  Thus, to be more honest,
            # we re-extract the cell at a level where we can distinguish:
281
            cell, _unused = kwargs2cell(kwargs)
282
283
284
285
286
287
288
289
290
291
            if cell is not None:
                pew.addArrayValues('simulation_cell', cell)

            # XXX FIXME atoms can be labeled in ways not compatible with ASE.
            pew.addArrayValues('atom_labels',
                               np.array(atoms.get_chemical_symbols()))
            pew.addArrayValues('atom_positions',
                               convert_unit(atoms.get_positions(), 'angstrom'))
            pew.addArrayValues('configuration_periodic_dimensions',
                               np.array(atoms.pbc))
292
        with open_section('section_single_configuration_calculation'):
293
294
            with open_section('section_method'):
                pew.addValue('number_of_spin_channels', nspins)
295
296
                pew.addValue('total_charge',
                             float(parser_log_kwargs['excesscharge']))
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
                oct_theory_level = kwargs.get('theorylevel', 'dft')

                theory_levels = dict(#independent_particles='',
                                     #hartree='',
                                     #hartree_fock='',
                                     dft='DFT')
                                     #classical='',
                                     #rdmft='')
                # TODO how do we warn if we get an unexpected theory level?
                nomad_theory_level = theory_levels[oct_theory_level]

                pew.addValue('electronic_structure_method', 'DFT')

                # XXXXXXXXXXXX read XC functional from text output instead
                if oct_theory_level == 'dft':
                    ndim = int(kwargs.get('dimensions', 3))
                    assert ndim in range(1, 4), ndim
                    default_xc = ['lda_x_1d + lda_c_1d_csc',
                                  'lda_x_2d + lda_c_2d_amgb',
                                  'lda_x + lda_c_pz_mod'][ndim - 1]
317
318
                    xcfunctional = kwargs.get('xcfunctional', default_xc)
                    xcfunctional = ''.join(xcfunctional.split()).upper()
319
320
                    with open_section('section_XC_functionals'):
                        pew.addValue('XC_functional_name', xcfunctional)
321

322
323
                # Convergence parameters?

324
325
326
            with open_section('section_eigenvalues'):
                if kwargs.get('theorylevel', 'dft') == 'dft':
                    pew.addValue('eigenvalues_kind', 'normal')
327
328
329
330
331
332
333
334
335

                eig = np.zeros((nspins, nkpts, nbands))
                occ = np.zeros((nspins, nkpts, nbands))

                for s in range(nspins):
                    for k in range(nkpts):
                        eig[s, k, :] = calc.get_eigenvalues(kpt=k, spin=s)
                        occ[s, k, :] = calc.get_occupation_numbers(kpt=k,
                                                                   spin=s)
336
337
338
339
                pew.addArrayValues('eigenvalues_values',
                                   convert_unit(eig, 'eV'))
                pew.addArrayValues('eigenvalues_occupation', occ)
    pew.finishedParsingSession('ParseSuccess', None)
340
341
342

if __name__ == '__main__':
    fname = sys.argv[1]
343
344
345
    logfname = 'parse.log'
    with open(logfname, 'w') as fd:
        parse(fname, fd)