hands-on-tutorial.bkr 297 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
{
    "beaker": "2",
    "evaluators": [
        {
            "name": "HTML",
            "plugin": "HTML",
            "view": {
                "cm": {
                    "mode": "htmlmixed"
                }
            }
        },
        {
            "name": "JavaScript",
            "plugin": "JavaScript",
            "view": {
                "cm": {
                    "mode": "javascript",
                    "background": "#FFE0F0"
                }
            },
            "languageVersion": "ES2015"
        },
        {
            "name": "IPython",
            "plugin": "IPython",
            "setup": "%matplotlib inline\nimport numpy\nimport matplotlib\nfrom matplotlib import pylab, mlab, pyplot\nnp = numpy\nplt = pyplot\nfrom IPython.display import display\nfrom IPython.core.pylabtools import figsize, getfigs\nfrom pylab import *\nfrom numpy import *\n",
            "view": {
                "cm": {
                    "mode": "python"
                }
            }
        },
        {
            "name": "TeX",
            "plugin": "TeX",
            "view": {
                "cm": {
                    "mode": "stex"
                }
            }
        }
    ],
    "cells": [
45
        {
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
            "id": "markdowndtwMRP",
            "type": "markdown",
            "body": [
                "<style type=\"text/css\">",
                "/*!",
                " * Nomad Beaker Notebook Template",
                " *",
                " * @copyright  Copyright 2017 Fritz Haber Institute of the Max Planck Society,",
                " *             Benjamin Regler - Apache 2.0 License",
                " * @license    http://www.apache.org/licenses/LICENSE-2.0",
                " * @author     Benjamin Regler",
                " * @version    1.0.0",
                " *",
                " * Licensed under the Apache License, Version 2.0 (the \"License\");",
                " * you may not use this file except in compliance with the License.",
                " * You may obtain a copy of the License at",
                " * ",
                " *     http://www.apache.org/licenses/LICENSE-2.0",
                " *",
                " * Unless required by applicable law or agreed to in writing, software",
                " * distributed under the License is distributed on an \"AS IS\" BASIS,",
                " * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.",
                " * See the License for the specific language governing permissions and",
                " * limitations under the License.",
                " */",
                "p{margin-bottom:1.3em}h1,h2,h3,h4{margin:1.414em 0 .5em;font-weight:inherit;line-height:1.2}h1{margin-top:0;font-size:3.998em}h2{font-size:2.827em}h3{font-size:1.999em}h4{font-size:1.414em}.font_small,small{font-size:.707em}notebook-container{font-size:16px}.nomad--header h2{color:#20335d;font-weight:700;margin:0 0 .2em}.nomad--header h3{color:#20335d;font-weight:700;margin-top:0;text-indent:-1em;padding-left:1em}.nomad--header h3:before{content:\"\\2014\";padding-right:.25em}.nomad--header .nomad--description{margin:-1em 0 0 2em}.atomic-data--block,.nomad--last-updated{display:inline-block;margin-top:1em}.nomad--last-updated{color:grey;float:right;position:relative;z-index:1}.nomad--last-updated::before{bottom:-75%;content:attr(data-version);font-size:4em;font-weight:700;opacity:.2;position:absolute;right:0}.atomic-data label{display:block;font-size:medium;font-weight:700}.atomic-data--select,.chosen-container{width:100%!important}.atomic-data--select:disabled{color:#d3d3d3}.atomic-data--reset-buton{display:inline-block;margin-top:1.6em;width:100%}.modal-dialog{max-width:1000px;width:80%}.modal-header h1{font-size:2em;line-height:1.2}.modal-dialog h2{font-size:1.414em}.modal-dialog h2:first-child{margin-top:0}.modal-dialog h3{font-size:1.2em}.modal-dialog dt{font-size:larger;margin-top:1.414em}.modal-dialog img{width:100%}.modal-dialog .authors{text-transform:uppercase}",
                "summary{list-style:disc; margin: 2px;padding: 10px;border: 0px;border:8px double   green; font-size:16px;padding-left: 32px;padding-right: 22px; width:89%}",
                "</style>",
                "",
                "<div id=\"teaser\" style='background-color: rgba(149,170,79, 1.0); background-position:  right center; background-size: 200px; background-repeat: no-repeat; ",
                "    padding-top: 20px;",
                "    padding-right: 10px;",
                "    padding-bottom: 50px;",
                "    padding-left: 80px;' > ",
                "",
                "  <div class=\"nomad--header\">",
                "   <div style=\"text-align:center\">",
                "    <h2> <img id=\"nomad\" src=\"https://nomad-coe.eu/uploads/nomad/images/NOMAD_Logo2.png\" height=\"100\" alt=\"NOMAD Logo\">  NOMAD Analytics Toolkit  <img id=\"nomad\" src=\"https://www.nomad-coe.eu/uploads/nomad/backgrounds/head_big-data_analytics_2.png\" height=\"80\" alt=\"NOMAD Logo\"> </h2>",
                "  </div>",
                "    <h3>Hands-on Workshop Density-Functional Theory and Beyond:<br> Compressed sensing for identifying descriptors ",
                "    </h3>",
                "    <p class=\"nomad--description\">",
                "      created by:",
                " Emre Ahmetcik<sup>1</sup>, ",
                " Angelo Ziletti<sup> 1</sup>,",
                " Runhai Ouyang<sup>1</sup>,",
                " Luca Ghiringhelli<sup>1</sup>,",
                " and Matthias Scheffler<sup>1</sup> <br><br>",
                "   ",
                "      <sup>1</sup> Fritz Haber Institute of the Max Planck Society, Faradayweg 4-6, D-14195 Berlin, Germany <br>",
Emre Ahmetcik's avatar
Emre Ahmetcik committed
96
                "      <span class=\"nomad--last-updated\" data-version=\"v1.0.0\">[Last updated: August 8, 2018]</span>",
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
                "    </p>",
                "</div>",
                "</div>",
                "",
                "<div style='text-align: right;'>",
                "<a href=\"https://analytics-toolkit.nomad-coe.eu/home/\" class=\"btn btn-primary\" style=\"font-size:larger;\">Back to Analytics Home</a> ",
                "<a href=\"https://www.nomad-coe.eu/\" class=\"btn btn-primary\" style=\"font-size:larger;\">Back to NOMAD CoE Home</a> ",
                "</div>  ",
                "",
                "",
                "",
                "<br><br><br>",
                "This tutorial shows how to find descriptive parameters (short formulas) to predict materials properties using compressed sensing tools. As an example we adress the prediction of the crystal structure stability of 82 octet binary compounds. We provide scripts which access the relevant data from the NOMAD Archive and determine descriptors for both regression models (predicting the rocksalt (RS) vs zincblende (ZB) structure energy difference) and classification (a compound is predicted to be most stable in either RS, ZB, CsCl, NiAs or CrB structure) .",
                "",
                "The idea of using compressed sensing tools: Starting from simple physical quantities (\"building blocks\", here properties of the constituent free atoms such as orbital radii), millions (or billions) of candidate formulas are generated by applying arithmetic operations combining building blocks, for example forming sums and products of them. These candidate formulas constitute the so-called \"feature space\". Then a feature selection method is used to select only a few of these formulas that explain the data. In this tutorial we use the methods LASSO+$\\ell_0$ as introduced in ",
                "<div style=\"padding: 1ex; margin-top: 1ex; margin-bottom: 1ex; border-style: dotted; border-width: 1pt; border-color: blue; border-radius: 3px;\">",
                "L. M. Ghiringhelli, J. Vybiral, S. V. Levchenko, C. Draxl, M. Scheffler: <span style=\"font-style: italic;\">Big Data of Materials Science: Critical Role of the Descriptor</span>,  Phys. Rev. Lett. 114, 105503 (2015) <a href=\"http://journals.aps.org/prl/abstract/10.1103/PhysRevLett.114.105503\" target=\"_blank\">[PDF]</a></div>",
                " and Sure Independence Screening Sparse Operator  (SISSO) as proposed in",
                "<div style=\"padding: 1ex; margin-top: 1ex; margin-bottom: 1ex; border-style: dotted; border-width: 1pt; border-color: blue; border-radius: 3px;\">",
Emre Ahmetcik's avatar
Emre Ahmetcik committed
116
                "R. Ouyang, S. Curtarolo, E. Ahmetcik, M. Scheffler, L. M. Ghiringhelli: <span style=\"font-style: italic;\">SISSO: a compressed-sensing method for identifying the best low-dimensional descriptor in an immensity of offered candidates</span>, Phys Rev. Materials, in print (2018) <a href=\"https://arxiv.org/abs/1710.03319\" target=\"_blank\">https://arxiv.org/abs/1710.03319</a> .",
117
118
119
120
                "</div>",
                ""
            ],
            "evaluatorReader": false
121
122
        },
        {
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
            "id": "sectionFrtJgy",
            "type": "section",
            "title": "Introduction to the compressed sensing methods",
            "level": 1,
            "evaluatorReader": false,
            "collapsed": true
        },
        {
            "id": "markdownVA6agA",
            "type": "markdown",
            "body": [
                "The feature space is generated by creating a list of analytical expressions (the derived features), obtained by combining the primary features and arithmetic operations. We put all $m$ derived features into a descriptor matrix $\\mathbf{D} \\in \\mathbb{R}^{82 \\times m}$ where each column stands for a derived feature and each row for a compound. An $\\ell_0$-regularization ",
                "",
                "$\\text{argmin}_{\\mathbf{c} \\in \\mathbb{R}^{m}} \\{\\|\\mathbf{P} - \\mathbf{D}\\mathbf{c}\\|^2_2 +\\lambda \\|\\mathbf{c}\\|_0\\}$",
                "",
                "determines those few feature columns which approximate a property vector $\\mathbf{P} \\in \\mathbb{R}^{82}$ (i.e RS vs. ZB energy differences) best. The subscript 0 stays for the $\\ell_0$-quasinorm, that counts the number of non-zero elements of $\\mathbf{c}$ and $\\lambda > 0$ is called the regularization parameter. Performing the $\\ell_0$-regularization becomes fast computational infeasable and often approximations (i.e. LASSO, LASSO+L0, SIS+L0) are needed since in practice the $\\ell_0$-regularization needs to be solved combinatorial: All singletons, pairs, triplets, ... $n$-tuples (up to the selected maximum dimension of the descriptor) are listed and for each set a least-square regression is performed. The $n$-tuple that gives the lowest mean square error for the least-square regression fit is selected as the resulting $n$-dimensional descriptor."
            ],
            "evaluatorReader": false
        },
        {
            "id": "sectionS8rXSG",
            "type": "section",
            "title": "The LASSO+$\\ell_0$ method",
            "level": 2,
            "evaluatorReader": false,
            "collapsed": false
        },
        {
            "id": "markdown28rftM",
            "type": "markdown",
            "body": [
                " LASSO+$\\ell_0$  combines the Least Absolute Shrinkage and Selection Operator (LASSO)  and the $\\ell_0$-regularization. In the first step the LASSO minimization",
                "",
                "$\\text{argmin}_{\\mathbf{c} \\in \\mathbb{R}^{m}} \\{\\|\\mathbf{P} - \\mathbf{D}\\mathbf{c}\\|^2_2 +\\lambda \\|\\mathbf{c}\\|_1\\}$",
                "",
                "is performed repetitively by decreasing $\\lambda$ in small steps starting from the largest value that gives one non-zero element in $\\mathbf{c}$,  until a desired number of features (i.e. 30) that have non-zero coefficient in $\\mathbf{c}$ are collected/saved.  Using these collected features  the $\\ell_0$-regularization is performed subsequently.",
                ""
            ],
            "evaluatorReader": false
        },
        {
            "id": "sectionbYEx1l",
            "type": "section",
            "title": "The SISSO for regression",
            "level": 2,
            "evaluatorReader": false,
            "collapsed": false
        },
        {
            "id": "markdownyyqzw4",
            "type": "markdown",
            "body": [
                "SISSO works iteratively. In the first iteration, a number $k$ of features is collected that have the largest correlation (scalar product) with $\\mathbf{P}$. The feature with the largest correlation is simply the 1D descriptor. Next, a residual is constructed as the error made at the first iteration. A new set of $k$ features is now selected as those having the largest correlation with the residual. The 2D descriptor is the pair of features that yield the smallest fitting error upon least-square regression, among all possible pairs contained in the union of the sets selected in this and the first iteration. In each next iteration a new residual is constructed as the error made in the previous iteration, then a new set of $k$ features is extracted as those that have largest correlation with each new residual. The $n$D descriptor is the $n$-tuple of features that yield the smallest fitting error upon least square regression, among all possible $n$-tuples contained in the union of the sets obtained in each new iteration and all the previous iterations. If $k=1$ the method collapses to the so-called orthogonal matching pursuit."
            ],
            "evaluatorReader": false
        },
        {
            "id": "sectionH1x7Pk",
            "type": "section",
            "title": "The SISSO for classification",
            "level": 2,
            "evaluatorReader": false,
            "collapsed": false
        },
        {
            "id": "markdownNO8Ito",
            "type": "markdown",
            "body": [
                "For classification (categorial target),  the SISSO is applied with a different loss function. Given the convex hull of each target class, now,  the number of data points located in overlapping convex hull regions is minimized. "
            ],
            "evaluatorReader": false
        },
        {
            "id": "sectionB3Bqbw",
            "type": "section",
            "title": "Get the data from the NOMAD Archive",
            "level": 1,
            "evaluatorReader": false,
            "collapsed": true
        },
        {
            "id": "markdownOfpScv",
            "type": "markdown",
            "body": [
                "<summary>",
                "<li>See that the data from the repository can be accessed easily with simple python scripts.</li>",
                "<li>Consider the specific case of 82 octet binary compounds in the RS and ZB structure. Process the data to build the target property $\\mathbf{P} \\in \\mathbb{R}^{82}$ of RS vs ZB energy differences.</li>",
                "<li>Construct the descriptor matrix $\\mathbf{D} \\in \\mathbb{R}^{82 \\times m}$ using primary features from the NOMAD atomic data collection.</li>",
                "</summary>"
            ],
            "evaluatorReader": false
214
215
216
217
218
        },
        {
            "id": "markdownocHinB",
            "type": "markdown",
            "body": [
219
                "In the following cell we declare the paths to the JSON files of the DFT calculations. The list provides 2 x 82 paths of single point calculations at (close to) the equilibrium of the RS and ZB crystal symmetry."
220
221
222
            ],
            "evaluatorReader": false
        },
223
224
225
226
227
228
        {
            "id": "codeOi5KIx",
            "type": "code",
            "evaluator": "IPython",
            "input": {
                "body": [
229
                    "json_list = [ ",
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PudDm0on_-EHhn0SHX20l2vdbSQ1x.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/Pp4wUDDucIEdS9euDT89Y6xQA_JPq.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/Pd5Tx2nPg7dFY-jys9XwKne6OQtKX.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PyukHM_doowQLr1Ipwa8feMxPVmI2.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PHiW0XWZCN8j4FL20b8tZzv7Vz59s.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PnhDURE4i9Q5yUaSUbEmarpPFd-oP.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/Phw2RDlr8RJrjY8nb2PfCE6Bf--N0.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PxJnNtspUIcqGhneVuSJKposdVxH_.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/Pl7aTuAjyxpsJM7vLAOVHYwJm-QE6.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PvkvzEExTn8uE2HYyp39OAr0XeTVs.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/P5q6OPnbkCI9OZnxRMmigkwjECTEe.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/Pq_S1pMWXyVEwLQtS_CRQLruINQc7.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PN0Q_OXA7e5yO6EkDKkOpGHM6hyCj.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/P49a5P12dU5LYRyFCHIIWWy_T06lE.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/Ph7OdXYR4ndMpTcR0zX4mqRoBZpP9.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PAlWHa4oJtvotPEJZkbrlNC_sn0h2.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/P-R6dmrIaT8iFyy2ObACxIHGWnNOy.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PKhAdygZBTTTF8uvQjKv1RdaX-cR-.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PKu0vasdF5E6n3C3QydIjCtGOIla4.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PZpgpSkSltbUhJUCSbPqBKkIvQi8v.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PctpEiY3VdEmVYwH7UjqZpiDWZtpM.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PItWNNwGWlKJ12UjZAOfNetX3xlkd.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PrTcNbJ50u8bqAFWGjPJKqnuuEvY7.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PfFPGDGw6-cK5ARIXtsKl496E6A0G.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PeXgyF2iElVtNWSX9xZhroKK8nJJ4.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/P7kHL_6prXXdx5_MzMVmwsmStNoc0.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PaMvDXJtsbfDgbdvrFdiFSKpXNYyC.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/P9sheCSX6Gol5L-IsCvDlnmT_MEGG.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PSy2hA53Gi7wXfU9SCowqkmFWD2gp.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/P51AKUeSNYXrRBK_-uc8y1-bCfUNg.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PaqU_UGMSvVN6niB3zqMYchjsLHRX.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PcyDh6nCotXyohIHh5k1dx5L5D5X9.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PudM2fYFckG7O5R4BJqg04tK_l1bd.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/P9Yuhn2S6hqpJ0cf9E9uw5G5bJlzV.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PqmzGiDuYJ-Q8j-KfLDlQBvWb2Gjt.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PCeh79N53GyBSPmZQQJ97G0eAHaDT.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PfgDGZGQLelhbTh9ZtsKqWGFxvhJ9.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PB0yzgD_PWA0LKTKGJ8ZZuD33YEUG.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PAy8gmVjhXEQxzCJ8LOWZakyvudO8.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PxHkLq1KNzXy50hbEPelPan0cCrsH.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PWkXOKoiw0iAE585QkElUZNCYOqYI.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PgemHvDiNrY7gsuhCx0VTL8kPb7AM.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PlIiyctCzbm5lbDOxpwEi3GbORHRD.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PGRvHpDj8bRbzvIL0c9yfOmeZjfah.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PHG50HtPjrXvkxeCITsjzFtq9N4hK.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PZuBrsUzsdX__rAeKn_JQgfX-YGoo.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PshguFKUbvULqOUN80QC-M3xQvrmJ.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PXoBZDFv4BhXvWTO29YBcGBXXu1vS.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PsmYUa8-6qr40jG7XJhUIynL1Ue8b.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/P-M1B6jU_t-kPPKkoFU9kZkEbx332.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/Pq1Tvh6kPeJ-PO77jXvOsp92PMK4P.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PfA960McVCueQzY_t-TVT0wgbZCC5.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/P-eZyP4BB8uo0pdmQIIrat2mhXQBN.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PmenrFglDQWoTWLNvVVobyI3dmkIe.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PoEQrdbxDvcCldS5_cpSOcAS57svB.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PQUYoBu1tULTvysw2jz8XwBnIeewS.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PYOw5h3ttt0tMyUPOvqDPc6yArPTy.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/Pzl8jOEFAC45VXxnxMJ7_nf2xS6v2.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PmkSmszyXIzY3yIzTUnkvOCwqNFFg.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/P7JW4GQVa_xQ4YKM88F9LFzyVoXke.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PsL8fyWYvrq6V0pE3zTfoNsWAVUd1.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PbVMALFnpGdoEyabKhI_3DtbUX6W7.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PME2sPwrfVW7U0veuObWai6ryPqou.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PSzD2TkeF0Gg9fnlc1cNvyK7NL24E.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PQB49fu8BN3kua7uLKQLlT5dWdHi0.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PzkPWSKWCQ14F1io7eGkOhK7h0O_Q.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/P_18Q5thpEagvBD-4tUdeTopJwCTV.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PKmJzuNjx3TVhnGoqyqoWSnwEgjGm.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/P7oSkZrV7zs3KG6S8IZyTSE_FmwBQ.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PIwvZmuUIIrtn2HcPLoozMo73I4uz.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/P-70iigSqFlM9BO7d8xReToc2yoJL.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PS3i4SxrlnWE9AUgOmy00D3f5dMgE.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PKSpxMXqdSstTt6Es26kroYBYENnq.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/P_DFu-YobOdcOb1mfdI22vrtaSQAh.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PZQkSWOat-bIQV5IVle0tBtpUg_u-.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/Po4TFqVuLhanZRORPTA7dDA2sdbrg.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PvVLlR_Pq2Ibks3hWK2HOSJ1GgVRY.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PmmsPJ6ouZjFnoIdGfis_3AHs9clP.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PQESlzgesuFywpq09x-vZ0gikcjPf.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PKfJHS4WQGppgde2dACUjMuVoL2sB.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PcEzW227FYLGI2t3jk-gLCgcEZWZe.json',",
311
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/P36pL30yblwhze_vHZYZ_cybqeH4V.json',",
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PjGykEyzLOFynTPTNDcycF0GYg1PE.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PRzzJL7OHYeejsIvgfG1ph6BAeS_q.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PNavIaZhgwAeZM0-QhWHe_38iUgEF.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PpxTrTc6NkExiq1nzQkhd3cJ1-yRY.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PVYIDqiD6OslrGDcpUvuqvc0bD8Jr.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PiZTl1-v3bdCUDjxt-w2VxKMGW3-6.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PomJQS9nQ4WsUIr718n6H4YbM0Fi7.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PEC0GOHh7MviqeJkG1qukjk4bALIS.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PNXyczNslCGZT642R9ZFYGvidFvua.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PpyJZtNPyNqX3ofwfNMpvJU_9PEKI.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PA3s37bS9VLUzI5wYL_ntZ6RIM6IJ.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PvEU0OOwFN7eFqiwt9m9S_SmhCJUm.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PsJUIeSLEotoIZk6R54H-G2JWPnQG.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PUT_umVDXGUamLH1R7nkazwKz95dz.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/P_rlThO8Jv0C2YIgYKLbCTM1rvfW-.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PyizrsR40QyxopYKKk2jUtl7nElXJ.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PQhb3_h4Bo9e5xjhhTUBY_8uOEtTM.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/P0tj3NYHfrit7NB0ewfG-fIjRWuJD.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PlrW3wNN03bq-G8TLkXHpALwSTUUg.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/P3hSpXydSB6z3p79OEIOQK6llto1K.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/Pa7ltCNBQsk7Owlu0bJnsE8iY-Rmw.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PYeYlDJb7j4qJ9ol38GSM_eYJsiSe.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PX39jEfgLeDPddrkPuTvUfVv4_thl.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PWNXU92VwL7KkuoxItglRiuifcOnk.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PoWlbXJuGJ4-22DclM15L_g44LN3P.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PEYkIqgUpWfoq4Tcsy8_bFVUs9mko.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/Pst86UVhV07OfKDhwlp0PNxiMtXki.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PVc9Pn-w-6MEpu41jV4p_keLeM-Yy.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/P81vIYtJtOEx4n865B86z-KvUb6hA.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/P8BMwwn-g_0Xezs6oK3ay6ZRXIRR-.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/P8nQ1bSGP4pyRMOa4i5Uhjpb1Mord.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PCmzvvjhPSzkp_bhkAuDWK375Fs9g.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PhaEAJ72mzGm65KpjGcnVVlFax_l7.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PvXy3VrpadhZLQAwphJE6GVB_0OUp.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/P3486jK16L4wlXG0B4v1-csMQ6oJ3.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/POL8lsjGueHjkLb9hMpptti6c8Phg.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PULp85M-UCJ52e4UaGcyeLDmUeq2Z.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/Pr6E85ezTMa4WX-GTFoms6w0Rb0hT.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/P0e8gRRxOvcJquPDa7SeYFk2OCiFS.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/Px8VpfH0LzX99ht06ME-0EsmmrSqe.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PM3KjHYJjTA26va4uYXD8homH7pUm.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/P6GMfPCT_Fa40hXkVKpqnygEBt4PG.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/Pu9uI6ldU3ZVgwfmy-Um0D7IBxTCK.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PN0ekL4w0k3A5OXacRBScDJ9KlGMf.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/P39GmzBY478BzuXrZM-0i2Z-njyb9.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PEhAEeu8aSPA_d3_dHCjTlAi1y09j.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PVshjrYqjAg_8QtgfGW2ABnR-mlIP.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PDjDFM8OWbY0TtHCY-DBYRJmRQ8fO.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PxkyksXrwYuxnJ_wD7qN890rycd67.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PUDiBRLe2pg5Hjvd3kc_20wMbinMI.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PpikjM2BVj1atNlsbkcJzK9TkUIox.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PcYC-NeMnx_goUeYg8PmaNVo0chDc.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PQFhJGR6USZg0MrTttQvXm1IiHIdq.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PGvuYKpzVCcWnf33I4uy8fGyJVxXq.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PV--hzM8rvSS8a6LZBuuW6IPbqvY6.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PHQ9WIyu3N7whYM_ykFZunv5to3l2.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/P2_H4gO4T7T7jknE474jzrc1Y4Tjm.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PFs1SxWG79Zqj0jssdOIqUicZk9aH.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PiOIHShEKCjdganj-Sd0MkJaLglGr.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/Pn-jKNaG1IM7sKsxBh-ekfl3M3hIa.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/P_80H6vlBefw1U3rKFDPPtpJAX1GH.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PdHoVKHCES7XtBpTVk0eihbo0kqmR.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PAzsSjYMU1-CdGulNpG_KzgFlfRrK.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PVlh5JoGn6jHWlE96SKt6eRMYTIVK.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PHfNgOoPEHjzs9iOh900vIUv-GVJl.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PVTUPQyCTvrAWV0DEN_xnPgrBPmM2.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/Pws96oc5f7jIltD9Vvqc3svzL4mcW.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PLJBz0uY-AywnUhGMCXMounM-_Af3.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/Pkole11VWAOiu91qHeq6lOzIM2Y1Y.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/P2R4Ds9DFm8USF_AgHtQnWK1TkQiR.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PxrF4NRKjX9jsmVIocs7uQuLwD_cS.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PMxYGoRCMDXQWrNytWJHc-vUgRKTT.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PK3-3e-av7nkv5AOEwjZyyjkI9Hgy.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PC8N-y0PPPHeAwhkYGyYYI9H1UUHy.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PqWPF7Pn3u9LPGyrxipPfrpfm31zz.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PM_ADyGOaL4e2biSXvxQWrEDM78Z3.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PfGXdJkORwLQ-aX-d9bla7obqtnkt.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PmILc9BsSYjJ9OKH4MkPr0D4LGYGC.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/Py-J0ezaQ_Fdsh_196hT-XgsYNQAs.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/PUJUPZHk2jrE1KVUS7H13mKBH4oVR.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/POIYfYCEIron9yzowfHWhVea-VEFW.json',",
                    "'/parsed/prod-022/FhiAimsParser2.0.0-2-gf9335c4/RWApItBGtGUDsfMVlHKqrjUQ4rShT/P_1mfRE8eDZ7zCLQwGT_3n8YC34dE.json'",
Emre Ahmetcik's avatar
Emre Ahmetcik committed
394
395
                    "]",
                    "print \"Done\""
396
                ]
397
398
399
            },
            "output": {
                "state": {},
Emre Ahmetcik's avatar
Emre Ahmetcik committed
400
                "selectedType": "Results",
401
                "pluginName": "IPython",
Emre Ahmetcik's avatar
Emre Ahmetcik committed
402
403
                "shellId": "CFDBBC64C18149AA8C1F32F183239C1A",
                "height": 0
404
405
            },
            "evaluatorReader": true,
Emre Ahmetcik's avatar
Emre Ahmetcik committed
406
            "lineCount": 167,
407
408
409
410
411
412
            "tags": "json_list"
        },
        {
            "id": "markdownjLPoKc",
            "type": "markdown",
            "body": [
413
                "Now, import and use the class 'NOMADStructure'  to get the chemical formula, the total energy and the space group from each json file. You can type 'help(NOMADStructure)' to find out, what else can be extracted from a structure."
414
415
416
417
418
419
420
421
422
423
            ],
            "evaluatorReader": false
        },
        {
            "id": "code7GfA9p",
            "type": "code",
            "evaluator": "IPython",
            "input": {
                "body": [
                    "from nomad_sim.nomad_structures import NOMADStructure",
Emre Ahmetcik's avatar
Emre Ahmetcik committed
424
425
                    "import warnings",
                    "warnings.filterwarnings('ignore')",
426
                    "",
427
                    "nomad_structure_list = [NOMADStructure(in_file=json_path, file_format='NOMAD', take_first='False') for json_path in json_list]",
428
                    "",
429
430
431
432
433
434
435
                    "# Consider first element of nomad_structure_list as an example",
                    "first_structure = nomad_structure_list[0]",
                    "print first_structure.chemical_symbols[0,0]",
                    "print first_structure.chemical_formula[0,0]",
                    "print first_structure.spacegroup_analyzer[0,0].get_space_group_number() ",
                    "print first_structure.energy_total__eV[0,0] # energy per unit cell",
                    "print first_structure.energy_total__eV[0,0]/ len(first_structure.atoms[0,0]) # energy per atom"
436
437
438
439
440
441
                ]
            },
            "output": {
                "state": {},
                "selectedType": "Results",
                "pluginName": "IPython",
Emre Ahmetcik's avatar
Emre Ahmetcik committed
442
443
                "shellId": "CFDBBC64C18149AA8C1F32F183239C1A",
                "height": 0
444
445
            },
            "evaluatorReader": true,
Emre Ahmetcik's avatar
Emre Ahmetcik committed
446
            "lineCount": 13,
447
448
            "tags": "nomad_structure_list"
        },
449
450
451
452
453
454
455
456
        {
            "id": "markdownI6KjT2",
            "type": "markdown",
            "body": [
                "Find the function 'get_energies'  in the following cell which returns a data frame of  2 x 82 rows containing the chemical formula, the total energy per atom in eV, the space group and the json path."
            ],
            "evaluatorReader": false
        },
457
458
459
460
461
462
463
        {
            "id": "codeE3BU1L",
            "type": "code",
            "evaluator": "IPython",
            "input": {
                "body": [
                    "import pandas as pd",
464
                    "",
465
466
467
468
469
470
471
                    "def get_energies(nomad_structure_list):",
                    "    chemical_formula_list = [nomad_structure.chemical_formula[0,0] for nomad_structure in nomad_structure_list]",
                    "    energy_list = [nomad_structure.energy_total__eV[0,0] / len(nomad_structure.atoms[0,0]) for nomad_structure in nomad_structure_list ]",
                    "    space_group_list = [nomad_structure.spacegroup_analyzer[0,0].get_space_group_number() for nomad_structure in nomad_structure_list]",
                    "    ",
                    "    data = zip(chemical_formula_list, energy_list, space_group_list, json_list)",
                    "    df_out = pd.DataFrame(data, columns=['chemical_formula', 'energy', 'space_group', 'json_path'])",
Emre Ahmetcik's avatar
Emre Ahmetcik committed
472
473
                    "    return df_out",
                    "print \"Done\""
474
475
476
477
                ]
            },
            "output": {
                "state": {},
Emre Ahmetcik's avatar
Emre Ahmetcik committed
478
                "selectedType": "Results",
479
                "pluginName": "IPython",
Emre Ahmetcik's avatar
Emre Ahmetcik committed
480
481
                "shellId": "CFDBBC64C18149AA8C1F32F183239C1A",
                "height": 0
482
483
            },
            "evaluatorReader": true,
Emre Ahmetcik's avatar
Emre Ahmetcik committed
484
            "lineCount": 11,
485
486
            "tags": "get_energies"
        },
487
488
489
490
491
492
493
494
        {
            "id": "markdownbEHiV5",
            "type": "markdown",
            "body": [
                "Call 'get_energies' and print the data frame sorted by the chemical formula."
            ],
            "evaluatorReader": false
        },
495
496
497
498
499
500
        {
            "id": "codeheXP3D",
            "type": "code",
            "evaluator": "IPython",
            "input": {
                "body": [
501
                    "# set pandas option to display whole data frame",
502
503
                    "pd.set_option('display.max_rows', 200)",
                    "pd.set_option('display.expand_frame_repr', False)",
504
505
506
                    "",
                    "df_energies = get_energies(nomad_structure_list)",
                    "print df_energies.sort_values('chemical_formula')"
507
508
509
510
511
512
                ]
            },
            "output": {
                "state": {},
                "selectedType": "Results",
                "pluginName": "IPython",
Emre Ahmetcik's avatar
Emre Ahmetcik committed
513
514
                "shellId": "CFDBBC64C18149AA8C1F32F183239C1A",
                "height": 0
515
516
517
518
519
520
521
522
            },
            "evaluatorReader": true,
            "lineCount": 6
        },
        {
            "id": "markdownnT2K2b",
            "type": "markdown",
            "body": [
523
                "In the following cell, a function to construct the property vector $\\mathbf{P}$ of RS vs ZB energy differences is defined using the just introduced function 'get_energies'. The structures are separated by their space groups: 225 and 221 for RS and 216 and 227 for ZB (the second space group of each structure takes account of the elemental solids). Furthermore the json path of the minimum structure (RS or ZB) is returned."
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
            ],
            "evaluatorReader": false
        },
        {
            "id": "code2z75Py",
            "type": "code",
            "evaluator": "IPython",
            "input": {
                "body": [
                    "def get_energy_diffs(nomad_structure_list):    ",
                    "    df = get_energies(nomad_structure_list)",
                    "    ",
                    "    spacegroup_tuples = [(225, 221), (216, 227)] # [(RS), (ZB)]",
                    "    selected_space_groups, spacegroups_to_be_replace = zip(*spacegroup_tuples)",
                    "    ",
539
                    "    # replace all 221 by 225 and all 227 by 216 to get one RS and one ZB column",
540
                    "    df['space_group'] = df['space_group'].replace( spacegroups_to_be_replace , selected_space_groups )   ",
541
                    "",
542
543
544
545
546
547
548
549
550
                    "    # df with json_path of minimum energy per chemical_formula",
                    "    df_path_of_min = df.sort_values(by='energy').groupby(['chemical_formula'], as_index=True).first()['json_path']",
                    "    ",
                    "    # transform space_group values to column names",
                    "    df = df.pivot_table('energy', 'chemical_formula', 'space_group')",
                    "    df.columns = [216, 225]",
                    "    ",
                    "    df['energy_diff'] = df[225] - df[216]",
                    "    df['json_path'] = df_path_of_min   ",
Emre Ahmetcik's avatar
Emre Ahmetcik committed
551
552
                    "    return df[['energy_diff', 'json_path']]",
                    "print \"Done\""
553
                ]
554
555
556
            },
            "output": {
                "state": {},
Emre Ahmetcik's avatar
Emre Ahmetcik committed
557
                "selectedType": "Results",
558
                "pluginName": "IPython",
Emre Ahmetcik's avatar
Emre Ahmetcik committed
559
560
                "shellId": "CFDBBC64C18149AA8C1F32F183239C1A",
                "height": 0
561
562
            },
            "evaluatorReader": true,
Emre Ahmetcik's avatar
Emre Ahmetcik committed
563
            "lineCount": 20,
564
            "tags": "get_energy_diffs"
565
        },
Emre Ahmetcik's avatar
Emre Ahmetcik committed
566
567
568
569
570
571
572
573
574
575
576
        {
            "id": "codexaiFel",
            "type": "code",
            "evaluator": "IPython",
            "input": {
                "body": [
                    "P_df = get_energy_diffs(nomad_structure_list)",
                    "print P_df"
                ]
            },
            "output": {
Emre Ahmetcik's avatar
Emre Ahmetcik committed
577
578
579
580
581
                "state": {},
                "selectedType": "Results",
                "pluginName": "IPython",
                "shellId": "CFDBBC64C18149AA8C1F32F183239C1A",
                "height": 0
Emre Ahmetcik's avatar
Emre Ahmetcik committed
582
583
584
585
            },
            "evaluatorReader": true,
            "lineCount": 2
        },
586
        {
Emre Ahmetcik's avatar
Emre Ahmetcik committed
587
588
589
590
591
592
593
594
595
            "id": "markdownOBfktj",
            "type": "markdown",
            "body": [
                "Now let's consider a function get_descriptors which creates the descriptor matrix $\\mathbf{D}$ . This will use a materials-to-descritpors dictionary (cell below) and a function combine_features (defined two cells below) to create new derived descriptors with arithmetic operations. The functions are rather technical, so you can ignore the definitions in the next three cells. But you need to run them!"
            ],
            "evaluatorReader": false
        },
        {
            "id": "codebV3mYa",
596
            "type": "code",
597
            "evaluator": "IPython",
598
599
            "input": {
                "body": [
Emre Ahmetcik's avatar
Emre Ahmetcik committed
600
601
                    "features_dict = {u'EA(B)': {'SeZn': -2.750999927520752, 'InSb': -1.8466999530792236, 'SZn': -2.844899892807007, 'BN': -1.8674999475479126, 'OSr': -3.0058999061584473, 'BrRb': -3.739300012588501, 'BaTe': -2.6659998893737793, 'BeSe': -2.750999927520752, 'MgS': -2.844899892807007, 'ClRb': -3.9707999229431152, 'BrNa': -3.739300012588501, 'BP': -1.9199999570846558, 'MgSe': -2.750999927520752, 'FK': -4.273499965667725, 'BrLi': -3.739300012588501, 'BSb': -1.8466999530792236, 'AsB': -1.8392000198364258, 'GeSn': -0.9490000009536743, 'GeSi': -0.9929999709129333, 'CaTe': -2.6659998893737793, 'ClK': -3.9707999229431152, 'CsI': -3.5134999752044678, 'MgO': -3.0058999061584473, 'BrCs': -3.739300012588501, 'CsF': -4.273499965667725, 'BrCu': -3.739300012588501, 'ILi': -3.5134999752044678, 'FLi': -4.273499965667725, 'CuF': -4.273499965667725, 'INa': -3.5134999752044678, 'Ge2': -0.9490000009536743, 'FNa': -4.273499965667725, 'C2': -0.8723999857902527, 'AgBr': -3.739300012588501, 'AsGa': -1.8392000198364258, 'CuI': -3.5134999752044678, 'AlN': -1.8674999475479126, 'Si2': -0.9929999709129333, 'SiSn': -0.9929999709129333, 'ClLi': -3.9707999229431152, 'ClNa': -3.9707999229431152, 'AsIn': -1.8392000198364258, 'OZn': -3.0058999061584473, 'CGe': -0.8723999857902527, 'CdO': -3.0058999061584473, 'InP': -1.9199999570846558, 'SSr': -2.844899892807007, 'InN': -1.8674999475479126, 'BaSe': -2.750999927520752, 'BrK': -3.739300012588501, 'BeTe': -2.6659998893737793, 'CdS': -2.844899892807007, 'CdTe': -2.6659998893737793, 'TeZn': -2.6659998893737793, 'GaP': -1.9199999570846558, 'CdSe': -2.750999927520752, 'MgTe': -2.6659998893737793, 'AlP': -1.9199999570846558, 'BeO': -3.0058999061584473, 'CaSe': -2.750999927520752, 'FRb': -4.273499965667725, 'SeSr': -2.750999927520752, 'CSi': -0.8723999857902527, 'AgCl': -3.9707999229431152, 'AgI': -3.5134999752044678, 'GaN': -1.8674999475479126, 'CaS': -2.844899892807007, 'AgF': -4.273499965667725, 'GaSb': -1.8466999530792236, 'IK': -3.5134999752044678, 'IRb': -3.5134999752044678, 'BaS': -2.844899892807007, 'CaO': -3.0058999061584473, 'AlAs': -1.8392000198364258, 'Sn2': -1.039199948310852, 'ClCu': -3.9707999229431152, 'CSn': -0.8723999857902527, 'BaO': -3.0058999061584473, 'ClCs': -3.9707999229431152, 'AlSb': -1.8466999530792236, 'SrTe': -2.6659998893737793, 'BeS': -2.844899892807007}, u'r_p(A)': {'SeZn': 1.5499999523162842, 'InSb': 1.5, 'SZn': 1.5499999523162842, 'BN': 0.8299999833106995, 'OSr': 2.549999952316284, 'BrRb': 3.200000047683716, 'BaTe': 2.630000114440918, 'BeSe': 1.2100000381469727, 'MgS': 1.899999976158142, 'ClRb': 3.200000047683716, 'BrNa': 2.5999999046325684, 'BP': 0.8299999833106995, 'MgSe': 1.899999976158142, 'FK': 2.440000057220459, 'BrLi': 2.0, 'BSb': 0.8299999833106995, 'AsB': 0.8299999833106995, 'GeSn': 1.340000033378601, 'GeSi': 1.159999966621399, 'CaTe': 2.319999933242798, 'ClK': 2.440000057220459, 'CsI': 3.1600000858306885, 'MgO': 1.899999976158142, 'BrCs': 3.1600000858306885, 'CsF': 3.1600000858306885, 'BrCu': 1.6799999475479126, 'ILi': 2.0, 'FLi': 2.0, 'CuF': 1.6799999475479126, 'INa': 2.5999999046325684, 'Ge2': 1.159999966621399, 'FNa': 2.5999999046325684, 'C2': 0.6299999952316284, 'AgBr': 1.8799999952316284, 'AsGa': 1.3300000429153442, 'CuI': 1.6799999475479126, 'AlN': 1.3899999856948853, 'Si2': 1.1299999952316284, 'SiSn': 1.340000033378601, 'ClLi': 2.0, 'ClNa': 2.5999999046325684, 'AsIn': 1.5, 'OZn': 1.5499999523162842, 'CGe': 1.159999966621399, 'CdO': 1.7400000095367432, 'InP': 1.5, 'SSr': 2.549999952316284, 'InN': 1.5, 'BaSe': 2.630000114440918, 'BrK': 2.440000057220459, 'BeTe': 1.2100000381469727, 'CdS': 1.7400000095367432, 'CdTe': 1.7400000095367432, 'TeZn': 1.5499999523162842, 'GaP': 1.3300000429153442, 'CdSe': 1.7400000095367432, 'MgTe': 1.899999976158142, 'AlP': 1.3899999856948853, 'BeO': 1.2100000381469727, 'CaSe': 2.319999933242798, 'FRb': 3.200000047683716, 'SeSr': 2.549999952316284, 'CSi': 1.1299999952316284, 'AgCl': 1.8799999952316284, 'AgI': 1.8799999952316284, 'GaN': 1.3300000429153442, 'CaS': 2.319999933242798, 'AgF': 1.8799999952316284, 'GaSb': 1.3300000429153442, 'IK': 2.440000057220459, 'IRb': 3.200000047683716, 'BaS': 2.630000114440918, 'CaO': 2.319999933242798, 'AlAs': 1.3899999856948853, 'Sn2': 1.340000033378601, 'ClCu': 1.6799999475479126, 'CSn': 1.340000033378601, 'BaO': 2.630000114440918, 'ClCs': 3.1600000858306885, 'AlSb': 1.3899999856948853, 'SrTe': 2.549999952316284, 'BeS': 1.2100000381469727}, u'IP(A)': {'SeZn': -10.135499954223633, 'InSb': -5.537399768829346, 'SZn': -10.135499954223633, 'BN': -8.1899995803833, 'OSr': -6.031599998474121, 'BrRb': -4.288899898529053, 'BaTe': -5.515699863433838, 'BeSe': -9.459400177001953, 'MgS': -8.037099838256836, 'ClRb': -4.288899898529053, 'BrNa': -5.223100185394287, 'BP': -8.1899995803833, 'MgSe': -8.037099838256836, 'FK': -4.433199882507324, 'BrLi': -5.329100131988525, 'BSb': -8.1899995803833, 'AsB': -8.1899995803833, 'GeSn': -7.042799949645996, 'GeSi': -7.566999912261963, 'CaTe': -6.427999973297119, 'ClK': -4.433199882507324, 'CsI': -4.006199836730957, 'MgO': -8.037099838256836, 'BrCs': -4.006199836730957, 'CsF': -4.006199836730957, 'BrCu': -8.388799667358398, 'ILi': -5.329100131988525, 'FLi': -5.329100131988525, 'CuF': -8.388799667358398, 'INa': -5.223100185394287, 'Ge2': -7.566999912261963, 'FNa': -5.223100185394287, 'C2': -10.851699829101562, 'AgBr': -8.058099746704102, 'AsGa': -5.81820011138916, 'CuI': -8.388799667358398, 'AlN': -5.7804999351501465, 'Si2': -7.757699966430664, 'SiSn': -7.042799949645996, 'ClLi': -5.329100131988525, 'ClNa': -5.223100185394287, 'AsIn': -5.537399768829346, 'OZn': -10.135499954223633, 'CGe': -7.566999912261963, 'CdO': -9.581399917602539, 'InP': -5.537399768829346, 'SSr': -6.031599998474121, 'InN': -5.537399768829346, 'BaSe': -5.515699863433838, 'BrK': -4.433199882507324, 'BeTe': -9.459400177001953, 'CdS': -9.581399917602539, 'CdTe': -9.581399917602539, 'TeZn': -10.135499954223633, 'GaP': -5.81820011138916, 'CdSe': -9.581399917602539, 'MgTe': -8.037099838256836, 'AlP': -5.7804999351501465, 'BeO': -9.459400177001953, 'CaSe': -6.427999973297119, 'FRb': -4.288899898529053, 'SeSr': -6.031599998474121, 'CSi': -7.757699966430664, 'AgCl': -8.058099746704102, 'AgI': -8.058099746704102, 'GaN': -5.81820011138916, 'CaS': -6.427999973297119, 'AgF': -8.058099746704102, 'GaSb': -5.81820011138916, 'IK': -4.433199882507324, 'IRb': -4.288899898529053, 'BaS': -5.515699863433838, 'CaO': -6.427999973297119, 'AlAs': -5.7804999351501465, 'Sn2': -7.042799949645996, 'ClCu': -8.388799667358398, 'CSn': -7.042799949645996, 'BaO': -5.515699863433838, 'ClCs': -4.006199836730957, 'AlSb': -5.7804999351501465, 'SrTe': -6.031599998474121, 'BeS': -9.459400177001953}, u'r_p(B)': {'SeZn': 0.949999988079071, 'InSb': 1.2300000190734863, 'SZn': 0.8500000238418579, 'BN': 0.5099999904632568, 'OSr': 0.4300000071525574, 'BrRb': 0.8799999952316284, 'BaTe': 1.1399999856948853, 'BeSe': 0.949999988079071, 'MgS': 0.8500000238418579, 'ClRb': 0.7599999904632568, 'BrNa': 0.8799999952316284, 'BP': 0.9700000286102295, 'MgSe': 0.949999988079071, 'FK': 0.3700000047683716, 'BrLi': 0.8799999952316284, 'BSb': 1.2300000190734863, 'AsB': 1.0399999618530273, 'GeSn': 1.159999966621399, 'GeSi': 1.1299999952316284, 'CaTe': 1.1399999856948853, 'ClK': 0.7599999904632568, 'CsI': 1.0700000524520874, 'MgO': 0.4300000071525574, 'BrCs': 0.8799999952316284, 'CsF': 0.3700000047683716, 'BrCu': 0.8799999952316284, 'ILi': 1.0700000524520874, 'FLi': 0.3700000047683716, 'CuF': 0.3700000047683716, 'INa': 1.0700000524520874, 'Ge2': 1.159999966621399, 'FNa': 0.3700000047683716, 'C2': 0.6299999952316284, 'AgBr': 0.8799999952316284, 'AsGa': 1.0399999618530273, 'CuI': 1.0700000524520874, 'AlN': 0.5099999904632568, 'Si2': 1.1299999952316284, 'SiSn': 1.1299999952316284, 'ClLi': 0.7599999904632568, 'ClNa': 0.7599999904632568, 'AsIn': 1.0399999618530273, 'OZn': 0.4300000071525574, 'CGe': 0.6299999952316284, 'CdO': 0.4300000071525574, 'InP': 0.9700000286102295, 'SSr': 0.8500000238418579, 'InN': 0.5099999904632568, 'BaSe': 0.949999988079071, 'BrK': 0.8799999952316284, 'BeTe': 1.1399999856948853, 'CdS': 0.8500000238418579, 'CdTe': 1.1399999856948853, 'TeZn': 1.1399999856948853, 'GaP': 0.9700000286102295, 'CdSe': 0.949999988079071, 'MgTe': 1.1399999856948853, 'AlP': 0.9700000286102295, 'BeO': 0.4300000071525574, 'CaSe': 0.949999988079071, 'FRb': 0.3700000047683716, 'SeSr': 0.949999988079071, 'CSi': 0.6299999952316284, 'AgCl': 0.7599999904632568, 'AgI': 1.0700000524520874, 'GaN': 0.5099999904632568, 'CaS': 0.8500000238418579, 'AgF': 0.3700000047683716, 'GaSb': 1.2300000190734863, 'IK': 1.0700000524520874, 'IRb': 1.0700000524520874, 'BaS': 0.8500000238418579, 'CaO': 0.4300000071525574, 'AlAs': 1.0399999618530273, 'Sn2': 1.340000033378601, 'ClCu': 0.7599999904632568, 'CSn': 0.6299999952316284, 'BaO': 0.4300000071525574, 'ClCs': 0.7599999904632568, 'AlSb': 1.2300000190734863, 'SrTe': 1.1399999856948853, 'BeS': 0.8500000238418579}, u'E_LUMO(B)': {'SeZn': 1.315999984741211, 'InSb': 0.10499999672174454, 'SZn': 0.6420000195503235, 'BN': 3.056999921798706, 'OSr': 2.5409998893737793, 'BrRb': 0.7080000042915344, 'BaTe': 0.0989999994635582, 'BeSe': 1.315999984741211, 'MgS': 0.6420000195503235, 'ClRb': 0.5740000009536743, 'BrNa': 0.7080000042915344, 'BP': 0.18299999833106995, 'MgSe': 1.315999984741211, 'FK': 1.2510000467300415, 'BrLi': 0.7080000042915344, 'BSb': 0.10499999672174454, 'AsB': 0.06400000303983688, 'GeSn': 2.174999952316284, 'GeSi': 0.4399999976158142, 'CaTe': 0.0989999994635582, 'ClK': 0.5740000009536743, 'CsI': 0.21299999952316284, 'MgO': 2.5409998893737793, 'BrCs': 0.7080000042915344, 'CsF': 1.2510000467300415, 'BrCu': 0.7080000042915344, 'ILi': 0.21299999952316284, 'FLi': 1.2510000467300415, 'CuF': 1.2510000467300415, 'INa': 0.21299999952316284, 'Ge2': 2.174999952316284, 'FNa': 1.2510000467300415, 'C2': 1.9919999837875366, 'AgBr': 0.7080000042915344, 'AsGa': 0.06400000303983688, 'CuI': 0.21299999952316284, 'AlN': 3.056999921798706, 'Si2': 0.4399999976158142, 'SiSn': 0.4399999976158142, 'ClLi': 0.5740000009536743, 'ClNa': 0.5740000009536743, 'AsIn': 0.06400000303983688, 'OZn': 2.5409998893737793, 'CGe': 1.9919999837875366, 'CdO': 2.5409998893737793, 'InP': 0.18299999833106995, 'SSr': 0.6420000195503235, 'InN': 3.056999921798706, 'BaSe': 1.315999984741211, 'BrK': 0.7080000042915344, 'BeTe': 0.0989999994635582, 'CdS': 0.6420000195503235, 'CdTe': 0.0989999994635582, 'TeZn': 0.0989999994635582, 'GaP': 0.18299999833106995, 'CdSe': 1.315999984741211, 'MgTe': 0.0989999994635582, 'AlP': 0.18299999833106995, 'BeO': 2.5409998893737793, 'CaSe': 1.315999984741211, 'FRb': 1.2510000467300415, 'SeSr': 1.315999984741211, 'CSi': 1.9919999837875366, 'AgCl': 0.5740000009536743, 'AgI': 0.21299999952316284, 'GaN': 3.056999921798706, 'CaS': 0.6420000195503235, 'AgF': 1.2510000467300415, 'GaSb': 0.10499999672174454, 'IK': 0.21299999952316284, 'IRb': 0.21299999952316284, 'BaS': 0.6420000195503235, 'CaO': 2.5409998893737793, 'AlAs': 0.06400000303983688, 'Sn2': 0.00800000037997961, 'ClCu': 0.5740000009536743, 'CSn': 1.9919999837875366, 'BaO': 2.5409998893737793, 'ClCs': 0.5740000009536743, 'AlSb': 0.10499999672174454, 'SrTe': 0.0989999994635582, 'BeS': 0.6420000195503235}, u'IP(B)': {'SeZn': -10.946000099182129, 'InSb': -8.468299865722656, 'SZn': -11.795100212097168, 'BN': -13.585200309753418, 'OSr': -16.43320083618164, 'BrRb': -12.6496000289917, 'BaTe': -9.866700172424316, 'BeSe': -10.946000099182129, 'MgS': -11.795100212097168, 'ClRb': -13.901800155639648, 'BrNa': -12.6496000289917, 'BP': -9.75059986114502, 'MgSe': -10.946000099182129, 'FK': -19.404300689697266, 'BrLi': -12.6496000289917, 'BSb': -8.468299865722656, 'AsB': -9.261899948120117, 'GeSn': -7.566999912261963, 'GeSi': -7.757699966430664, 'CaTe': -9.866700172424316, 'ClK': -13.901800155639648, 'CsI': -11.257100105285645, 'MgO': -16.43320083618164, 'BrCs': -12.6496000289917, 'CsF': -19.404300689697266, 'BrCu': -12.6496000289917, 'ILi': -11.257100105285645, 'FLi': -19.404300689697266, 'CuF': -19.404300689697266, 'INa': -11.257100105285645, 'Ge2': -7.566999912261963, 'FNa': -19.404300689697266, 'C2': -10.851699829101562, 'AgBr': -12.6496000289917, 'AsGa': -9.261899948120117, 'CuI': -11.257100105285645, 'AlN': -13.585200309753418, 'Si2': -7.757699966430664, 'SiSn': -7.757699966430664, 'ClLi': -13.901800155639648, 'ClNa': -13.901800155639648, 'AsIn': -9.261899948120117, 'OZn': -16.43320083618164, 'CGe': -10.851699829101562, 'CdO': -16.43320083618164, 'InP': -9.75059986114502, 'SSr': -11.795100212097168, 'InN': -13.585200309753418, 'BaSe': -10.946000099182129, 'BrK': -12.6496000289917, 'BeTe': -9.866700172424316, 'CdS': -11.795100212097168, 'CdTe': -9.866700172424316, 'TeZn': -9.866700172424316, 'GaP': -9.75059986114502, 'CdSe': -10.946000099182129, 'MgTe': -9.866700172424316, 'AlP': -9.75059986114502, 'BeO': -16.43320083618164, 'CaSe': -10.946000099182129, 'FRb': -19.404300689697266, 'SeSr': -10.946000099182129, 'CSi': -10.851699829101562, 'AgCl': -13.901800155639648, 'AgI': -11.257100105285645, 'GaN': -13.585200309753418, 'CaS': -11.795100212097168, 'AgF': -19.404300689697266, 'GaSb': -8.468299865722656, 'IK': -11.257100105285645, 'IRb': -11.257100105285645, 'BaS': -11.795100212097168, 'CaO': -16.43320083618164, 'AlAs': -9.261899948120117, 'Sn2': -7.042799949645996, 'ClCu': -13.901800155639648, 'CSn': -10.851699829101562, 'BaO': -16.43320083618164, 'ClCs': -13.901800155639648, 'AlSb': -8.468299865722656, 'SrTe': -9.866700172424316, 'BeS': -11.795100212097168}, u'r_s(A)': {'SeZn': 1.100000023841858, 'InSb': 1.1299999952316284, 'SZn': 1.100000023841858, 'BN': 0.8100000023841858, 'OSr': 1.909999966621399, 'BrRb': 2.240000009536743, 'BaTe': 2.1500000953674316, 'BeSe': 1.0800000429153442, 'MgS': 1.3300000429153442, 'ClRb': 2.240000009536743, 'BrNa': 1.7100000381469727, 'BP': 0.8100000023841858, 'MgSe': 1.3300000429153442, 'FK': 2.130000114440918, 'BrLi': 1.649999976158142, 'BSb': 0.8100000023841858, 'AsB': 0.8100000023841858, 'GeSn': 1.059999942779541, 'GeSi': 0.9200000166893005, 'CaTe': 1.7599999904632568, 'ClK': 2.130000114440918, 'CsI': 2.4600000381469727, 'MgO': 1.3300000429153442, 'BrCs': 2.4600000381469727, 'CsF': 2.4600000381469727, 'BrCu': 1.2000000476837158, 'ILi': 1.649999976158142, 'FLi': 1.649999976158142, 'CuF': 1.2000000476837158, 'INa': 1.7100000381469727, 'Ge2': 0.9200000166893005, 'FNa': 1.7100000381469727, 'C2': 0.6399999856948853, 'AgBr': 1.3200000524520874, 'AsGa': 0.9900000095367432, 'CuI': 1.2000000476837158, 'AlN': 1.090000033378601, 'Si2': 0.9399999976158142, 'SiSn': 1.059999942779541, 'ClLi': 1.649999976158142, 'ClNa': 1.7100000381469727, 'AsIn': 1.1299999952316284, 'OZn': 1.100000023841858, 'CGe': 0.9200000166893005, 'CdO': 1.2300000190734863, 'InP': 1.1299999952316284, 'SSr': 1.909999966621399, 'InN': 1.1299999952316284, 'BaSe': 2.1500000953674316, 'BrK': 2.130000114440918, 'BeTe': 1.0800000429153442, 'CdS': 1.2300000190734863, 'CdTe': 1.2300000190734863, 'TeZn': 1.100000023841858, 'GaP': 0.9900000095367432, 'CdSe': 1.2300000190734863, 'MgTe': 1.3300000429153442, 'AlP': 1.090000033378601, 'BeO': 1.0800000429153442, 'CaSe': 1.7599999904632568, 'FRb': 2.240000009536743, 'SeSr': 1.909999966621399, 'CSi': 0.9399999976158142, 'AgCl': 1.3200000524520874, 'AgI': 1.3200000524520874, 'GaN': 0.9900000095367432, 'CaS': 1.7599999904632568, 'AgF': 1.3200000524520874, 'GaSb': 0.9900000095367432, 'IK': 2.130000114440918, 'IRb': 2.240000009536743, 'BaS': 2.1500000953674316, 'CaO': 1.7599999904632568, 'AlAs': 1.090000033378601, 'Sn2': 1.059999942779541, 'ClCu': 1.2000000476837158, 'CSn': 1.059999942779541, 'BaO': 2.1500000953674316, 'ClCs': 2.4600000381469727, 'AlSb': 1.090000033378601, 'SrTe': 1.909999966621399, 'BeS': 1.0800000429153442}, u'd(A)': {'SeZn': 1.409999966621399, 'InSb': 1.4800000190734863, 'SZn': 1.409999966621399, 'BN': 0.8100000023841858, 'OSr': 2.2200000286102295, 'BrRb': 2.0399999618530273, 'BaTe': 2.359999895095825, 'BeSe': 1.2000000476837158, 'MgS': 1.7100000381469727, 'ClRb': 2.0399999618530273, 'BrNa': 1.4900000095367432, 'BP': 0.8100000023841858, 'MgSe': 1.7100000381469727, 'FK': 1.909999966621399, 'BrLi': 1.350000023841858, 'BSb': 0.8100000023841858, 'AsB': 0.8100000023841858, 'GeSn': 1.3700000047683716, 'GeSi': 1.159999966621399, 'CaTe': 1.9900000095367432, 'ClK': 1.909999966621399, 'CsI': 2.25, 'MgO': 1.7100000381469727, 'BrCs': 2.25, 'CsF': 2.25, 'BrCu': 1.0700000524520874, 'ILi': 1.350000023841858, 'FLi': 1.350000023841858, 'CuF': 1.0700000524520874, 'INa': 1.4900000095367432, 'Ge2': 1.159999966621399, 'FNa': 1.4900000095367432, 'C2': 0.6299999952316284, 'AgBr': 1.2400000095367432, 'AsGa': 1.2300000190734863, 'CuI': 1.0700000524520874, 'AlN': 1.2699999809265137, 'Si2': 1.090000033378601, 'SiSn': 1.3700000047683716, 'ClLi': 1.350000023841858, 'ClNa': 1.4900000095367432, 'AsIn': 1.4800000190734863, 'OZn': 1.409999966621399, 'CGe': 1.159999966621399, 'CdO': 1.5499999523162842, 'InP': 1.4800000190734863, 'SSr': 2.2200000286102295, 'InN': 1.4800000190734863, 'BaSe': 2.359999895095825, 'BrK': 1.909999966621399, 'BeTe': 1.2000000476837158, 'CdS': 1.5499999523162842, 'CdTe': 1.5499999523162842, 'TeZn': 1.409999966621399, 'GaP': 1.2300000190734863, 'CdSe': 1.5499999523162842, 'MgTe': 1.7100000381469727, 'AlP': 1.2699999809265137, 'BeO': 1.2000000476837158, 'CaSe': 1.9900000095367432, 'FRb': 2.0399999618530273, 'SeSr': 2.2200000286102295, 'CSi': 1.090000033378601, 'AgCl': 1.2400000095367432, 'AgI': 1.2400000095367432, 'GaN': 1.2300000190734863, 'CaS': 1.9900000095367432, 'AgF': 1.2400000095367432, 'GaSb': 1.2300000190734863, 'IK': 1.909999966621399, 'IRb': 2.0399999618530273, 'BaS': 2.359999895095825, 'CaO': 1.9900000095367432, 'AlAs': 1.2699999809265137, 'Sn2': 1.3700000047683716, 'ClCu': 1.0700000524520874, 'CSn': 1.3700000047683716, 'BaO': 2.359999895095825, 'ClCs': 2.25, 'AlSb': 1.2699999809265137, 'SrTe': 2.2200000286102295, 'BeS': 1.2000000476837158}, u'period(B)': {'SeZn': 4.0, 'InSb': 5.0, 'SZn': 3.0, 'BN': 2.0, 'OSr': 2.0, 'BrRb': 4.0, 'BaTe': 5.0, 'BeSe': 4.0, 'MgS': 3.0, 'ClRb': 3.0, 'BrNa': 4.0, 'BP': 3.0, 'MgSe': 4.0, 'FK': 2.0, 'BrLi': 4.0, 'BSb': 5.0, 'AsB': 4.0, 'GeSn': 4.0, 'GeSi': 3.0, 'CaTe': 5.0, 'ClK': 3.0, 'CsI': 5.0, 'MgO': 2.0, 'BrCs': 4.0, 'CsF': 2.0, 'BrCu': 4.0, 'ILi': 5.0, 'FLi': 2.0, 'CuF': 2.0, 'INa': 5.0, 'Ge2': 4.0, 'FNa': 2.0, 'C2': 2.0, 'AgBr': 4.0, 'AsGa': 4.0, 'CuI': 5.0, 'AlN': 2.0, 'Si2': 3.0, 'SiSn': 3.0, 'ClLi': 3.0, 'ClNa': 3.0, 'AsIn': 4.0, 'OZn': 2.0, 'CGe': 2.0, 'CdO': 2.0, 'InP': 3.0, 'SSr': 3.0, 'InN': 2.0, 'BaSe': 4.0, 'BrK': 4.0, 'BeTe': 5.0, 'CdS': 3.0, 'CdTe': 5.0, 'TeZn': 5.0, 'GaP': 3.0, 'CdSe': 4.0, 'MgTe': 5.0, 'AlP': 3.0, 'BeO': 2.0, 'CaSe': 4.0, 'FRb': 2.0, 'SeSr': 4.0, 'CSi': 2.0, 'AgCl': 3.0, 'AgI': 5.0, 'GaN': 2.0, 'CaS': 3.0, 'AgF': 2.0, 'GaSb': 5.0, 'IK': 5.0, 'IRb': 5.0, 'BaS': 3.0, 'CaO': 2.0, 'AlAs': 4.0, 'Sn2': 5.0, 'ClCu': 3.0, 'CSn': 2.0, 'BaO': 2.0, 'ClCs': 3.0, 'AlSb': 5.0, 'SrTe': 5.0, 'BeS': 3.0}, u'E_HOMO(A)': {'SeZn': -6.2170000076293945, 'InSb': -2.697000026702881, 'SZn': -6.2170000076293945, 'BN': -3.7149999141693115, 'OSr': -3.6410000324249268, 'BrRb': -2.359999895095825, 'BaTe': -3.3459999561309814, 'BeSe': -5.599999904632568, 'MgS': -4.7820000648498535, 'ClRb': -2.359999895095825, 'BrNa': -2.819000005722046, 'BP': -3.7149999141693115, 'MgSe': -4.7820000648498535, 'FK': -2.4260001182556152, 'BrLi': -2.874000072479248, 'BSb': -3.7149999141693115, 'AsB': -3.7149999141693115, 'GeSn': -3.865999937057495, 'GeSi': -4.046000003814697, 'CaTe': -3.864000082015991, 'ClK': -2.4260001182556152, 'CsI': -2.2200000286102295, 'MgO': -4.7820000648498535, 'BrCs': -2.2200000286102295, 'CsF': -2.2200000286102295, 'BrCu': -4.855999946594238, 'ILi': -2.874000072479248, 'FLi': -2.874000072479248, 'CuF': -4.855999946594238, 'INa': -2.819000005722046, 'Ge2': -4.046000003814697, 'FNa': -2.819000005722046, 'C2': -5.415999889373779, 'AgBr': -4.710000038146973, 'AsGa': -2.7320001125335693, 'CuI': -4.855999946594238, 'AlN': -2.7839999198913574, 'Si2': -4.163000106811523, 'SiSn': -3.865999937057495, 'ClLi': -2.874000072479248, 'ClNa': -2.819000005722046, 'AsIn': -2.697000026702881, 'OZn': -6.2170000076293945, 'CGe': -4.046000003814697, 'CdO': -5.952000141143799, 'InP': -2.697000026702881, 'SSr': -3.6410000324249268, 'InN': -2.697000026702881, 'BaSe': -3.3459999561309814, 'BrK': -2.4260001182556152, 'BeTe': -5.599999904632568, 'CdS': -5.952000141143799, 'CdTe': -5.952000141143799, 'TeZn': -6.2170000076293945, 'GaP': -2.7320001125335693, 'CdSe': -5.952000141143799, 'MgTe': -4.7820000648498535, 'AlP': -2.7839999198913574, 'BeO': -5.599999904632568, 'CaSe': -3.864000082015991, 'FRb': -2.359999895095825, 'SeSr': -3.6410000324249268, 'CSi': -4.163000106811523, 'AgCl': -4.710000038146973, 'AgI': -4.710000038146973, 'GaN': -2.7320001125335693, 'CaS': -3.864000082015991, 'AgF': -4.710000038146973, 'GaSb': -2.7320001125335693, 'IK': -2.4260001182556152, 'IRb': -2.359999895095825, 'BaS': -3.3459999561309814, 'CaO': -3.864000082015991, 'AlAs': -2.7839999198913574, 'Sn2': -3.865999937057495, 'ClCu': -4.855999946594238, 'CSn': -3.865999937057495, 'BaO': -3.3459999561309814, 'ClCs': -2.2200000286102295, 'AlSb': -2.7839999198913574, 'SrTe': -3.6410000324249268, 'BeS': -5.599999904632568}, u'period(A)': {'SeZn': 4.0, 'InSb': 5.0, 'SZn': 4.0, 'BN': 2.0, 'OSr': 5.0, 'BrRb': 5.0, 'BaTe': 6.0, 'BeSe': 2.0, 'MgS': 3.0, 'ClRb': 5.0, 'BrNa': 3.0, 'BP': 2.0, 'MgSe': 3.0, 'FK': 4.0, 'BrLi': 2.0, 'BSb': 2.0, 'AsB': 2.0, 'GeSn': 5.0, 'GeSi': 4.0, 'CaTe': 4.0, 'ClK': 4.0, 'CsI': 6.0, 'MgO': 3.0, 'BrCs': 6.0, 'CsF': 6.0, 'BrCu': 4.0, 'ILi': 2.0, 'FLi': 2.0, 'CuF': 4.0, 'INa': 3.0, 'Ge2': 4.0, 'FNa': 3.0, 'C2': 2.0, 'AgBr': 5.0, 'AsGa': 4.0, 'CuI': 4.0, 'AlN': 3.0, 'Si2': 3.0, 'SiSn': 5.0, 'ClLi': 2.0, 'ClNa': 3.0, 'AsIn': 5.0, 'OZn': 4.0, 'CGe': 4.0, 'CdO': 5.0, 'InP': 5.0, 'SSr': 5.0, 'InN': 5.0, 'BaSe': 6.0, 'BrK': 4.0, 'BeTe': 2.0, 'CdS': 5.0, 'CdTe': 5.0, 'TeZn': 4.0, 'GaP': 4.0, 'CdSe': 5.0, 'MgTe': 3.0, 'AlP': 3.0, 'BeO': 2.0, 'CaSe': 4.0, 'FRb': 5.0, 'SeSr': 5.0, 'CSi': 3.0, 'AgCl': 5.0, 'AgI': 5.0, 'GaN': 4.0, 'CaS': 4.0, 'AgF': 5.0, 'GaSb': 4.0, 'IK': 4.0, 'IRb': 5.0, 'BaS': 6.0, 'CaO': 4.0, 'AlAs': 3.0, 'Sn2': 5.0, 'ClCu': 4.0, 'CSn': 5.0, 'BaO': 6.0, 'ClCs': 6.0, 'AlSb': 3.0, 'SrTe': 5.0, 'BeS': 2.0}, u'd(B)': {'SeZn': 1.0800000429153442, 'InSb': 1.2300000190734863, 'SZn': 0.949999988079071, 'BN': 0.550000011920929, 'OSr': 0.6000000238418579, 'BrRb': 1.1399999856948853, 'BaTe': 1.2699999809265137, 'BeSe': 1.0800000429153442, 'MgS': 0.949999988079071, 'ClRb': 0.9900000095367432, 'BrNa': 1.1399999856948853, 'BP': 0.9399999976158142, 'MgSe': 1.0800000429153442, 'FK': 0.6899999976158142, 'BrLi': 1.1399999856948853, 'BSb': 1.2300000190734863, 'AsB': 1.0399999618530273, 'GeSn': 1.159999966621399, 'GeSi': 1.090000033378601, 'CaTe': 1.2699999809265137, 'ClK': 0.9900000095367432, 'CsI': 1.3200000524520874, 'MgO': 0.6000000238418579, 'BrCs': 1.1399999856948853, 'CsF': 0.6899999976158142, 'BrCu': 1.1399999856948853, 'ILi': 1.3200000524520874, 'FLi': 0.6899999976158142, 'CuF': 0.6899999976158142, 'INa': 1.3200000524520874, 'Ge2': 1.159999966621399, 'FNa': 0.6899999976158142, 'C2': 0.6299999952316284, 'AgBr': 1.1399999856948853, 'AsGa': 1.0399999618530273, 'CuI': 1.3200000524520874, 'AlN': 0.550000011920929, 'Si2': 1.090000033378601, 'SiSn': 1.090000033378601, 'ClLi': 0.9900000095367432, 'ClNa': 0.9900000095367432, 'AsIn': 1.0399999618530273, 'OZn': 0.6000000238418579, 'CGe': 0.6299999952316284, 'CdO': 0.6000000238418579, 'InP': 0.9399999976158142, 'SSr': 0.949999988079071, 'InN': 0.550000011920929, 'BaSe': 1.0800000429153442, 'BrK': 1.1399999856948853, 'BeTe': 1.2699999809265137, 'CdS': 0.949999988079071, 'CdTe': 1.2699999809265137, 'TeZn': 1.2699999809265137, 'GaP': 0.9399999976158142, 'CdSe': 1.0800000429153442, 'MgTe': 1.2699999809265137, 'AlP': 0.9399999976158142, 'BeO': 0.6000000238418579, 'CaSe': 1.0800000429153442, 'FRb': 0.6899999976158142, 'SeSr': 1.0800000429153442, 'CSi': 0.6299999952316284, 'AgCl': 0.9900000095367432, 'AgI': 1.3200000524520874, 'GaN': 0.550000011920929, 'CaS': 0.949999988079071, 'AgF': 0.6899999976158142, 'GaSb': 1.2300000190734863, 'IK': 1.3200000524520874, 'IRb': 1.3200000524520874, 'BaS': 0.949999988079071, 'CaO': 0.6000000238418579, 'AlAs': 1.0399999618530273, 'Sn2': 1.3700000047683716, 'ClCu': 0.9900000095367432, 'CSn': 0.6299999952316284, 'BaO': 0.6000000238418579, 'ClCs': 0.9900000095367432, 'AlSb': 1.2300000190734863, 'SrTe': 1.2699999809265137, 'BeS': 0.949999988079071}, u'Z(A)': {'SeZn': 30.0, 'InSb': 49.0, 'SZn': 30.0, 'BN': 5.0, 'OSr': 38.0, 'BrRb': 37.0, 'BaTe': 56.0, 'BeSe': 4.0, 'MgS': 12.0, 'ClRb': 37.0, 'BrNa': 11.0, 'BP': 5.0, 'MgSe': 12.0, 'FK': 19.0, 'BrLi': 3.0, 'BSb': 5.0, 'AsB': 5.0, 'GeSn': 50.0, 'GeSi': 32.0, 'CaTe': 20.0, 'ClK': 19.0, 'CsI': 55.0, 'MgO': 12.0, 'BrCs': 55.0, 'CsF': 55.0, 'BrCu': 29.0, 'ILi': 3.0, 'FLi': 3.0, 'CuF': 29.0, 'INa': 11.0, 'Ge2': 32.0, 'FNa': 11.0, 'C2': 6.0, 'AgBr': 47.0, 'AsGa': 31.0, 'CuI': 29.0, 'AlN': 13.0, 'Si2': 14.0, 'SiSn': 50.0, 'ClLi': 3.0, 'ClNa': 11.0, 'AsIn': 49.0, 'OZn': 30.0, 'CGe': 32.0, 'CdO': 48.0, 'InP': 49.0, 'SSr': 38.0, 'InN': 49.0, 'BaSe': 56.0, 'BrK': 19.0, 'BeTe': 4.0, 'CdS': 48.0, 'CdTe': 48.0, 'TeZn': 30.0, 'GaP': 31.0, 'CdSe': 48.0, 'MgTe': 12.0, 'AlP': 13.0, 'BeO': 4.0, 'CaSe': 20.0, 'FRb': 37.0, 'SeSr': 38.0, 'CSi': 14.0, 'AgCl': 47.0, 'AgI': 47.0, 'GaN': 31.0, 'CaS': 20.0, 'AgF': 47.0, 'GaSb': 31.0, 'IK': 19.0, 'IRb': 37.0, 'BaS': 56.0, 'CaO': 20.0, 'AlAs': 13.0, 'Sn2': 50.0, 'ClCu': 29.0, 'CSn': 50.0, 'BaO': 56.0, 'ClCs': 55.0, 'AlSb': 13.0, 'SrTe': 38.0, 'BeS': 4.0}, u'r_d(A)': {'SeZn': 2.25, 'InSb': 3.109999895095825, 'SZn': 2.25, 'BN': 1.9500000476837158, 'OSr': 1.2000000476837158, 'BrRb': 1.9600000381469727, 'BaTe': 1.350000023841858, 'BeSe': 2.880000114440918, 'MgS': 3.1700000762939453, 'ClRb': 1.9600000381469727, 'BrNa': 6.570000171661377, 'BP': 1.9500000476837158, 'MgSe': 3.1700000762939453, 'FK': 1.7899999618530273, 'BrLi': 6.929999828338623, 'BSb': 1.9500000476837158, 'AsB': 1.9500000476837158, 'GeSn': 2.0299999713897705, 'GeSi': 2.369999885559082, 'CaTe': 0.6800000071525574, 'ClK': 1.7899999618530273, 'CsI': 1.9700000286102295, 'MgO': 3.1700000762939453, 'BrCs': 1.9700000286102295, 'CsF': 1.9700000286102295, 'BrCu': 2.5799999237060547, 'ILi': 6.929999828338623, 'FLi': 6.929999828338623, 'CuF': 2.5799999237060547, 'INa': 6.570000171661377, 'Ge2': 2.369999885559082, 'FNa': 6.570000171661377, 'C2': 1.6299999952316284, 'AgBr': 2.9700000286102295, 'AsGa': 2.1600000858306885, 'CuI': 2.5799999237060547, 'AlN': 1.940000057220459, 'Si2': 1.8899999856948853, 'SiSn': 2.0299999713897705, 'ClLi': 6.929999828338623, 'ClNa': 6.570000171661377, 'AsIn': 3.109999895095825, 'OZn': 2.25, 'CGe': 2.369999885559082, 'CdO': 2.5999999046325684, 'InP': 3.109999895095825, 'SSr': 1.2000000476837158, 'InN': 3.109999895095825, 'BaSe': 1.350000023841858, 'BrK': 1.7899999618530273, 'BeTe': 2.880000114440918, 'CdS': 2.5999999046325684, 'CdTe': 2.5999999046325684, 'TeZn': 2.25, 'GaP': 2.1600000858306885, 'CdSe': 2.5999999046325684, 'MgTe': 3.1700000762939453, 'AlP': 1.940000057220459, 'BeO': 2.880000114440918, 'CaSe': 0.6800000071525574, 'FRb': 1.9600000381469727, 'SeSr': 1.2000000476837158, 'CSi': 1.8899999856948853, 'AgCl': 2.9700000286102295, 'AgI': 2.9700000286102295, 'GaN': 2.1600000858306885, 'CaS': 0.6800000071525574, 'AgF': 2.9700000286102295, 'GaSb': 2.1600000858306885, 'IK': 1.7899999618530273, 'IRb': 1.9600000381469727, 'BaS': 1.350000023841858, 'CaO': 0.6800000071525574, 'AlAs': 1.940000057220459, 'Sn2': 2.0299999713897705, 'ClCu': 2.5799999237060547, 'CSn': 2.0299999713897705, 'BaO': 1.350000023841858, 'ClCs': 1.9700000286102295, 'AlSb': 1.940000057220459, 'SrTe': 1.2000000476837158, 'BeS': 2.880000114440918}, u'r_s(B)': {'SeZn': 0.800000011920929, 'InSb': 1.0, 'SZn': 0.7400000095367432, 'BN': 0.5400000214576721, 'OSr': 0.46000000834465027, 'BrRb': 0.75, 'BaTe': 0.9399999976158142, 'BeSe': 0.800000011920929, 'MgS': 0.7400000095367432, 'ClRb': 0.6800000071525574, 'BrNa': 0.75, 'BP': 0.8299999833106995, 'MgSe': 0.800000011920929, 'FK': 0.4099999964237213, 'BrLi': 0.75, 'BSb': 1.0, 'AsB': 0.8500000238418579, 'GeSn': 0.9200000166893005, 'GeSi': 0.9399999976158142, 'CaTe': 0.9399999976158142, 'ClK': 0.6800000071525574, 'CsI': 0.8999999761581421, 'MgO': 0.46000000834465027, 'BrCs': 0.75, 'CsF': 0.4099999964237213, 'BrCu': 0.75, 'ILi': 0.8999999761581421, 'FLi': 0.4099999964237213, 'CuF': 0.4099999964237213, 'INa': 0.8999999761581421, 'Ge2': 0.9200000166893005, 'FNa': 0.4099999964237213, 'C2': 0.6399999856948853, 'AgBr': 0.75, 'AsGa': 0.8500000238418579, 'CuI': 0.8999999761581421, 'AlN': 0.5400000214576721, 'Si2': 0.9399999976158142, 'SiSn': 0.9399999976158142, 'ClLi': 0.6800000071525574, 'ClNa': 0.6800000071525574, 'AsIn': 0.8500000238418579, 'OZn': 0.46000000834465027, 'CGe': 0.6399999856948853, 'CdO': 0.46000000834465027, 'InP': 0.8299999833106995, 'SSr': 0.7400000095367432, 'InN': 0.5400000214576721, 'BaSe': 0.800000011920929, 'BrK': 0.75, 'BeTe': 0.9399999976158142, 'CdS': 0.7400000095367432, 'CdTe': 0.9399999976158142, 'TeZn': 0.9399999976158142, 'GaP': 0.8299999833106995, 'CdSe': 0.800000011920929, 'MgTe': 0.9399999976158142, 'AlP': 0.8299999833106995, 'BeO': 0.46000000834465027, 'CaSe': 0.800000011920929, 'FRb': 0.4099999964237213, 'SeSr': 0.800000011920929, 'CSi': 0.6399999856948853, 'AgCl': 0.6800000071525574, 'AgI': 0.8999999761581421, 'GaN': 0.5400000214576721, 'CaS': 0.7400000095367432, 'AgF': 0.4099999964237213, 'GaSb': 1.0, 'IK': 0.8999999761581421, 'IRb': 0.8999999761581421, 'BaS': 0.7400000095367432, 'CaO': 0.46000000834465027, 'AlAs': 0.8500000238418579, 'Sn2': 1.059999942779541, 'ClCu': 0.6800000071525574, 'CSn': 0.6399999856948853, 'BaO': 0.46000000834465027, 'ClCs': 0.6800000071525574, 'AlSb': 1.0, 'SrTe': 0.9399999976158142, 'BeS': 0.7400000095367432}, u'r_d(B)': {'SeZn': 2.180000066757202, 'InSb': 2.059999942779541, 'SZn': 2.369999885559082, 'BN': 1.5399999618530273, 'OSr': 2.2200000286102295, 'BrRb': 1.8700000047683716, 'BaTe': 1.8300000429153442, 'BeSe': 2.180000066757202, 'MgS': 2.369999885559082, 'ClRb': 1.6699999570846558, 'BrNa': 1.8700000047683716, 'BP': 1.7699999809265137, 'MgSe': 2.180000066757202, 'FK': 1.4299999475479126, 'BrLi': 1.8700000047683716, 'BSb': 2.059999942779541, 'AsB': 2.0199999809265137, 'GeSn': 2.369999885559082, 'GeSi': 1.8899999856948853, 'CaTe': 1.8300000429153442, 'ClK': 1.6699999570846558, 'CsI': 1.7200000286102295, 'MgO': 2.2200000286102295, 'BrCs': 1.8700000047683716, 'CsF': 1.4299999475479126, 'BrCu': 1.8700000047683716, 'ILi': 1.7200000286102295, 'FLi': 1.4299999475479126, 'CuF': 1.4299999475479126, 'INa': 1.7200000286102295, 'Ge2': 2.369999885559082, 'FNa': 1.4299999475479126, 'C2': 1.6299999952316284, 'AgBr': 1.8700000047683716, 'AsGa': 2.0199999809265137, 'CuI': 1.7200000286102295, 'AlN': 1.5399999618530273, 'Si2': 1.8899999856948853, 'SiSn': 1.8899999856948853, 'ClLi': 1.6699999570846558, 'ClNa': 1.6699999570846558, 'AsIn': 2.0199999809265137, 'OZn': 2.2200000286102295, 'CGe': 1.6299999952316284, 'CdO': 2.2200000286102295, 'InP': 1.7699999809265137, 'SSr': 2.369999885559082, 'InN': 1.5399999618530273, 'BaSe': 2.180000066757202, 'BrK': 1.8700000047683716, 'BeTe': 1.8300000429153442, 'CdS': 2.369999885559082, 'CdTe': 1.8300000429153442, 'TeZn': 1.8300000429153442, 'GaP': 1.7699999809265137, 'CdSe': 2.180000066757202, 'MgTe': 1.8300000429153442, 'AlP': 1.7699999809265137, 'BeO': 2.2200000286102295, 'CaSe': 2.180000066757202, 'FRb': 1.4299999475479126, 'SeSr': 2.180000066757202, 'CSi': 1.6299999952316284, 'AgCl': 1.6699999570846558, 'AgI': 1.7200000286102295, 'GaN': 1.5399999618530273, 'CaS': 2.369999885559082, 'AgF': 1.4299999475479126, 'GaSb': 2.059999942779541, 'IK': 1.7200000286102295, 'IRb': 1.7200000286102295, 'BaS': 2.369999885559082, 'CaO': 2.2200000286102295, 'AlAs': 2.0199999809265137, 'Sn2': 2.0299999713897705, 'ClCu': 1.6699999570846558, 'CSn': 1.6299999952316284, 'BaO': 2.2200000286102295, 'ClCs': 1.6699999570846558, 'AlSb': 2.059999942779541, 'SrTe': 1.8300000429153442, 'BeS': 2.369999885559082}, u'EA(A)': {'SeZn': 1.0807000398635864, 'InSb': -0.2563000023365021, 'SZn': 1.0807000398635864, 'BN': -0.10740000009536743, 'OSr': 0.34310001134872437, 'BrRb': -0.590399980545044, 'BaTe': 0.27799999713897705, 'BeSe': 0.6305000185966492, 'MgS': 0.6924999952316284, 'ClRb': -0.590399980545044, 'BrNa': -0.7156999707221985, 'BP': -0.10740000009536743, 'MgSe': 0.6924999952316284, 'FK': -0.6212999820709229, 'BrLi': -0.6980999708175659, 'BSb': -0.10740000009536743, 'AsB': -0.10740000009536743, 'GeSn': -1.039199948310852, 'GeSi': -0.9490000009536743, 'CaTe': 0.30390000343322754, 'ClK': -0.6212999820709229, 'CsI': -0.569599986076355, 'MgO': 0.6924999952316284, 'BrCs': -0.569599986076355, 'CsF': -0.569599986076355, 'BrCu': -1.6384999752044678, 'ILi': -0.6980999708175659, 'FLi': -0.6980999708175659, 'CuF': -1.6384999752044678, 'INa': -0.7156999707221985, 'Ge2': -0.9490000009536743, 'FNa': -0.7156999707221985, 'C2': -0.8723999857902527, 'AgBr': -1.666599988937378, 'AsGa': -0.10809999704360962, 'CuI': -1.6384999752044678, 'AlN': -0.3125, 'Si2': -0.9929999709129333, 'SiSn': -1.039199948310852, 'ClLi': -0.6980999708175659, 'ClNa': -0.7156999707221985, 'AsIn': -0.2563000023365021, 'OZn': 1.0807000398635864, 'CGe': -0.9490000009536743, 'CdO': 0.838699996471405, 'InP': -0.2563000023365021, 'SSr': 0.34310001134872437, 'InN': -0.2563000023365021, 'BaSe': 0.27799999713897705, 'BrK': -0.6212999820709229, 'BeTe': 0.6305000185966492, 'CdS': 0.838699996471405, 'CdTe': 0.838699996471405, 'TeZn': 1.0807000398635864, 'GaP': -0.10809999704360962, 'CdSe': 0.838699996471405, 'MgTe': 0.6924999952316284, 'AlP': -0.3125, 'BeO': 0.6305000185966492, 'CaSe': 0.30390000343322754, 'FRb': -0.590399980545044, 'SeSr': 0.34310001134872437, 'CSi': -0.9929999709129333, 'AgCl': -1.666599988937378, 'AgI': -1.666599988937378, 'GaN': -0.10809999704360962, 'CaS': 0.30390000343322754, 'AgF': -1.666599988937378, 'GaSb': -0.10809999704360962, 'IK': -0.6212999820709229, 'IRb': -0.590399980545044, 'BaS': 0.27799999713897705, 'CaO': 0.30390000343322754, 'AlAs': -0.3125, 'Sn2': -1.039199948310852, 'ClCu': -1.6384999752044678, 'CSn': -1.039199948310852, 'BaO': 0.27799999713897705, 'ClCs': -0.569599986076355, 'AlSb': -0.3125, 'SrTe': 0.34310001134872437, 'BeS': 0.6305000185966492}, u'E_LUMO(A)': {'SeZn': -1.194000005722046, 'InSb': 0.36800000071525574, 'SZn': -1.194000005722046, 'BN': 2.247999906539917, 'OSr': -1.378999948501587, 'BrRb': -0.7049999833106995, 'BaTe': -2.128999948501587, 'BeSe': -2.0980000495910645, 'MgS': -1.3580000400543213, 'ClRb': -0.7049999833106995, 'BrNa': -0.7179999947547913, 'BP': 2.247999906539917, 'MgSe': -1.3580000400543213, 'FK': -0.6970000267028809, 'BrLi': -0.9779999852180481, 'BSb': 2.247999906539917, 'AsB': 2.247999906539917, 'GeSn': 0.00800000037997961, 'GeSi': 2.174999952316284, 'CaTe': -2.132999897003174, 'ClK': -0.6970000267028809, 'CsI': -0.5479999780654907, 'MgO': -1.3580000400543213, 'BrCs': -0.5479999780654907, 'CsF': -0.5479999780654907, 'BrCu': -0.640999972820282, 'ILi': -0.9779999852180481, 'FLi': -0.9779999852180481, 'CuF': -0.640999972820282, 'INa': -0.7179999947547913, 'Ge2': 2.174999952316284, 'FNa': -0.7179999947547913, 'C2': 1.9919999837875366, 'AgBr': -0.4790000021457672, 'AsGa': 0.12999999523162842, 'CuI': -0.640999972820282, 'AlN': 0.6949999928474426, 'Si2': 0.4399999976158142, 'SiSn': 0.00800000037997961, 'ClLi': -0.9779999852180481, 'ClNa': -0.7179999947547913, 'AsIn': 0.36800000071525574, 'OZn': -1.194000005722046, 'CGe': 2.174999952316284, 'CdO': -1.309000015258789, 'InP': 0.36800000071525574, 'SSr': -1.378999948501587, 'InN': 0.36800000071525574, 'BaSe': -2.128999948501587, 'BrK': -0.6970000267028809, 'BeTe': -2.0980000495910645, 'CdS': -1.309000015258789, 'CdTe': -1.309000015258789, 'TeZn': -1.194000005722046, 'GaP': 0.12999999523162842, 'CdSe': -1.309000015258789, 'MgTe': -1.3580000400543213, 'AlP': 0.6949999928474426, 'BeO': -2.0980000495910645, 'CaSe': -2.132999897003174, 'FRb': -0.7049999833106995, 'SeSr': -1.378999948501587, 'CSi': 0.4399999976158142, 'AgCl': -0.4790000021457672, 'AgI': -0.4790000021457672, 'GaN': 0.12999999523162842, 'CaS': -2.132999897003174, 'AgF': -0.4790000021457672, 'GaSb': 0.12999999523162842, 'IK': -0.6970000267028809, 'IRb': -0.7049999833106995, 'BaS': -2.128999948501587, 'CaO': -2.132999897003174, 'AlAs': 0.6949999928474426, 'Sn2': 0.00800000037997961, 'ClCu': -0.640999972820282, 'CSn': 0.00800000037997961, 'BaO': -2.128999948501587, 'ClCs': -0.5479999780654907, 'AlSb': 0.6949999928474426, 'SrTe': -1.378999948501587, 'BeS': -2.0980000495910645}, u'Z(B)': {'SeZn': 34.0, 'InSb': 51.0, 'SZn': 16.0, 'BN': 7.0, 'OSr': 8.0, 'BrRb': 35.0, 'BaTe': 52.0, 'BeSe': 34.0, 'MgS': 16.0, 'ClRb': 17.0, 'BrNa': 35.0, 'BP': 15.0, 'MgSe': 34.0, 'FK': 9.0, 'BrLi': 35.0, 'BSb': 51.0, 'AsB': 33.0, 'GeSn': 32.0, 'GeSi': 14.0, 'CaTe': 52.0, 'ClK': 17.0, 'CsI': 53.0, 'MgO': 8.0, 'BrCs': 35.0, 'CsF': 9.0, 'BrCu': 35.0, 'ILi': 53.0, 'FLi': 9.0, 'CuF': 9.0, 'INa': 53.0, 'Ge2': 32.0, 'FNa': 9.0, 'C2': 6.0, 'AgBr': 35.0, 'AsGa': 33.0, 'CuI': 53.0, 'AlN': 7.0, 'Si2': 14.0, 'SiSn': 14.0, 'ClLi': 17.0, 'ClNa': 17.0, 'AsIn': 33.0, 'OZn': 8.0, 'CGe': 6.0, 'CdO': 8.0, 'InP': 15.0, 'SSr': 16.0, 'InN': 7.0, 'BaSe': 34.0, 'BrK': 35.0, 'BeTe': 52.0, 'CdS': 16.0, 'CdTe': 52.0, 'TeZn': 52.0, 'GaP': 15.0, 'CdSe': 34.0, 'MgTe': 52.0, 'AlP': 15.0, 'BeO': 8.0, 'CaSe': 34.0, 'FRb': 9.0, 'SeSr': 34.0, 'CSi': 6.0, 'AgCl': 17.0, 'AgI': 53.0, 'GaN': 7.0, 'CaS': 16.0, 'AgF': 9.0, 'GaSb': 51.0, 'IK': 53.0, 'IRb': 53.0, 'BaS': 16.0, 'CaO': 8.0, 'AlAs': 33.0, 'Sn2': 50.0, 'ClCu': 17.0, 'CSn': 6.0, 'BaO': 8.0, 'ClCs': 17.0, 'AlSb': 51.0, 'SrTe': 52.0, 'BeS': 16.0}, u'E_HOMO(B)': {'SeZn': -6.6539998054504395, 'InSb': -4.991000175476074, 'SZn': -7.105999946594238, 'BN': -7.238999843597412, 'OSr': -9.196999549865723, 'BrRb': -8.00100040435791, 'BaTe': -6.109000205993652, 'BeSe': -6.6539998054504395, 'MgS': -7.105999946594238, 'ClRb': -8.699999809265137, 'BrNa': -8.00100040435791, 'BP': -5.5960001945495605, 'MgSe': -6.6539998054504395, 'FK': -11.293999671936035, 'BrLi': -8.00100040435791, 'BSb': -4.991000175476074, 'AsB': -5.341000080108643, 'GeSn': -4.046000003814697, 'GeSi': -4.163000106811523, 'CaTe': -6.109000205993652, 'ClK': -8.699999809265137, 'CsI': -7.236000061035156, 'MgO': -9.196999549865723, 'BrCs': -8.00100040435791, 'CsF': -11.293999671936035, 'BrCu': -8.00100040435791, 'ILi': -7.236000061035156, 'FLi': -11.293999671936035, 'CuF': -11.293999671936035, 'INa': -7.236000061035156, 'Ge2': -4.046000003814697, 'FNa': -11.293999671936035, 'C2': -5.415999889373779, 'AgBr': -8.00100040435791, 'AsGa': -5.341000080108643, 'CuI': -7.236000061035156, 'AlN': -7.238999843597412, 'Si2': -4.163000106811523, 'SiSn': -4.163000106811523, 'ClLi': -8.699999809265137, 'ClNa': -8.699999809265137, 'AsIn': -5.341000080108643, 'OZn': -9.196999549865723, 'CGe': -5.415999889373779, 'CdO': -9.196999549865723, 'InP': -5.5960001945495605, 'SSr': -7.105999946594238, 'InN': -7.238999843597412, 'BaSe': -6.6539998054504395, 'BrK': -8.00100040435791, 'BeTe': -6.109000205993652, 'CdS': -7.105999946594238, 'CdTe': -6.109000205993652, 'TeZn': -6.109000205993652, 'GaP': -5.5960001945495605, 'CdSe': -6.6539998054504395, 'MgTe': -6.109000205993652, 'AlP': -5.5960001945495605, 'BeO': -9.196999549865723, 'CaSe': -6.6539998054504395, 'FRb': -11.293999671936035, 'SeSr': -6.6539998054504395, 'CSi': -5.415999889373779, 'AgCl': -8.699999809265137, 'AgI': -7.236000061035156, 'GaN': -7.238999843597412, 'CaS': -7.105999946594238, 'AgF': -11.293999671936035, 'GaSb': -4.991000175476074, 'IK': -7.236000061035156, 'IRb': -7.236000061035156, 'BaS': -7.105999946594238, 'CaO': -9.196999549865723, 'AlAs': -5.341000080108643, 'Sn2': -3.865999937057495, 'ClCu': -8.699999809265137, 'CSn': -5.415999889373779, 'BaO': -9.196999549865723, 'ClCs': -8.699999809265137, 'AlSb': -4.991000175476074, 'SrTe': -6.109000205993652, 'BeS': -7.105999946594238}}",
                    "print \"Done\""
Emre Ahmetcik's avatar
Emre Ahmetcik committed
602
603
                ],
                "hidden": true
604
605
606
            },
            "output": {
                "state": {},
Emre Ahmetcik's avatar
Emre Ahmetcik committed
607
                "selectedType": "Results",
608
                "pluginName": "IPython",
Emre Ahmetcik's avatar
Emre Ahmetcik committed
609
610
                "shellId": "CFDBBC64C18149AA8C1F32F183239C1A",
                "height": 0
611
612
            },
            "evaluatorReader": true,
Emre Ahmetcik's avatar
Emre Ahmetcik committed
613
            "lineCount": 2,
Emre Ahmetcik's avatar
Emre Ahmetcik committed
614
            "tags": "get_descriptors"
615
616
        },
        {
Emre Ahmetcik's avatar
Emre Ahmetcik committed
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
            "id": "codeNlZOhl",
            "type": "code",
            "evaluator": "IPython",
            "input": {
                "body": [
                    "import itertools",
                    "from math import exp, sqrt",
                    "import math",
                    "",
                    "def _my_power_2(row):",
                    "    return pow(row[0], 2)         ",
                    "",
                    "def _my_power_3(row):",
                    "    return pow(row[0], 3)    ",
                    "",
                    "def _my_power_m1(row):",
                    "    return pow(row[0],-1)",
                    "",
                    "def _my_power_m2(row):",
                    "    return pow(row[0],-2)",
                    "",
                    "def _my_power_m3(row):",
                    "    return pow(row[0],-3)",
                    "",
                    "def _my_abs_sqrt(row):",
                    "    return math.sqrtabs(abs(row[0]))",
                    "    ",
                    "def _my_exp(row):",
                    "    return exp(row[0])",
                    "",
                    "def _my_exp_power_2(row):",
                    "    return exp(pow(row[0], 2))",
                    "",
                    "def _my_exp_power_3(row):",
                    "    return exp(pow(row[0], 3))",
                    "",
                    "def _my_sum(row):",
                    "    return row[0] + row[1]",
                    "    ",
                    "def _my_abs_sum(row):",
                    "    return abs(row[0] + row[1])",
                    "",
                    "def _my_abs_diff(row):",
                    "    return abs(row[0] - row[1])   ",
                    "",
                    "def _my_diff(row):",
                    "    return row[0] - row[1] ",
                    "",
                    "def _my_div(row):",
                    "    return row[0]/row[1]",
                    "    ",
                    "def _my_sum_power_2(row):",
                    "    return pow((row[0] + row[1]), 2)",
                    "",
                    "def _my_sum_power_3(row):",
                    "    return pow((row[0] + row[1]), 3)",
                    "    ",
                    "def _my_sum_exp(row):",
                    "    return exp(row[0] + row[1])",
                    "",
                    "def _my_sum_exp_power_2(row):",
                    "    return exp(pow(row[0] + row[1], 2))",
                    "",
                    "def _my_sum_exp_power_3(row):",
                    "    return exp(pow(row[0] + row[1], 3))",
                    "  ",
                    "def combine_features(df=None, allowed_operations=None):",
                    "    \"\"\"Generate combination of features given a dataframe and a list of allowed operations.",
                    "    ",
                    "    For the exponentials, we introduce a characteristic energy/length",
                    "    converting the ",
                    "    ..todo:: Fix under/overflow errors, and introduce handling of exceptions.",
                    "",
                    "    \"\"\"",
                    "        ",
                    "    if allowed_operations:",
                    "        print('Selected operations:\\n {0}'.format(allowed_operations)) ",
                    "    else:",
                    "        print('No allowed operations selected.') ",
                    "        ",
                    "    columns_ = df.columns.tolist()    ",
                    "    ",
                    "    dict_features = {",
                    "        'period':'a0', ",
                    "        'Z': 'a0', ",
                    "        'group': 'a0', ",
                    "",
                    "        'IP': 'a1', ",
                    "        'EA': 'a1', ",
                    "",
                    "        'E_HOMO': 'a2', ",
                    "        'E_LUMO': 'a2', ",
                    "",
                    "",
                    "        'r_s': 'a3',",
                    "        'r_p': 'a3',",
                    "        'r_d': 'a3',",
                    "        'd': 'a3', ",
                    "   ",
                    "        }",
                    "        ",
                    "",
                    "    df_a0 = df[[col for col in columns_ if dict_features.get(col.split('(', 1)[0])=='a0']].astype('float32')    ",
                    "    df_a1 = df[[col for col in columns_ if dict_features.get(col.split('(', 1)[0])=='a1']].astype('float32')    ",
                    "    df_a2 = df[[col for col in columns_ if dict_features.get(col.split('(', 1)[0])=='a2']].astype('float32')    ",
                    "    df_a3 = df[[col for col in columns_ if dict_features.get(col.split('(', 1)[0])=='a3']].astype('float32')   ",
                    "",
                    "    ",
                    "    col_a0 = df_a0.columns.tolist()",
                    "    col_a1 = df_a1.columns.tolist()",
                    "    col_a2 = df_a2.columns.tolist()",
                    "    col_a3 = df_a3.columns.tolist()",
                    "",
                    "    #  this list will at the end all the dataframes created",
                    "    df_list = []",
                    "",
                    "    df_b0_list = []    ",
                    "    df_b1_list = []",
                    "    df_b2_list = []",
                    "    df_b3_list = []",
                    "    df_c3_list = []",
                    "    df_d3_list = []",
                    "    df_e3_list = []",
                    "    df_f1_list = []",
                    "    df_f2_list = []",
                    "    df_f3_list = []",
                    "    df_x1_list = []",
                    "    df_x2_list = []",
                    "    df_x_list = []",
                    "",
                    "",
                    "    # create b0: absolute differences and sums of a0   ",
                    "    # this is not in the PRL. ",
                    "    for subset in itertools.combinations(col_a0, 2):",
                    "        if '+' in allowed_operations:",
                    "            cols = ['('+subset[0]+'+'+subset[1]+')']        ",
                    "            data = df_a0[list(subset)].apply(_my_sum, axis=1)            ",
                    "            df_b0_list.append(pd.DataFrame(data, columns=cols))         ",
                    "            ",
                    "        if '-' in allowed_operations:",
                    "            cols = ['('+subset[0]+'-'+subset[1]+')']        ",
                    "            data = df_a0[list(subset)].apply(_my_diff, axis=1)            ",
                    "            df_b0_list.append(pd.DataFrame(data, columns=cols))   ",
                    "            ",
                    "            cols = ['('+subset[1]+'-'+subset[0]+')']        ",
                    "            data = df_a0[list(subset)].apply(_my_diff, axis=1)            ",
                    "            df_b0_list.append(pd.DataFrame(data, columns=cols))  ",
                    "        ",
                    "        if '|+|' in allowed_operations:",
                    "            cols = ['|'+subset[0]+'+'+subset[1]+'|']        ",
                    "            data = df_a0[list(subset)].apply(_my_abs_sum, axis=1)            ",
                    "            df_b0_list.append(pd.DataFrame(data, columns=cols))     ",
                    "        ",
                    "        if '|-|' in allowed_operations:",
                    "            cols = ['|'+subset[0]+'-'+subset[1]+'|']        ",
                    "            data = df_a0[list(subset)].apply(_my_abs_diff, axis=1)            ",
                    "            df_b0_list.append(pd.DataFrame(data, columns=cols))  ",
                    "            ",
                    "        if '/' in allowed_operations:",
                    "            cols = [subset[0]+'/'+subset[1]]        ",
                    "            data = df_a0[list(subset)].apply(_my_div, axis=1)            ",
                    "            df_b0_list.append(pd.DataFrame(data, columns=cols))  ",
                    "",
                    "            cols = [subset[1]+'/'+subset[0]]        ",
                    "            data = df_a0[list(subset)].apply(_my_div, axis=1)            ",
                    "            df_b0_list.append(pd.DataFrame(data, columns=cols))  ",
                    "",
                    "    ",
                    "    # we kept itertools.combinations to make the code more uniform with the binary operations",
                    "    for subset in itertools.combinations(col_a0, 1):",
                    "        if '^2' in allowed_operations:",
                    "            cols = [subset[0]+'^2']        ",
                    "            data = df_a0[list(subset)].apply(_my_power_2, axis=1)            ",
                    "            df_b0_list.append(pd.DataFrame(data, columns=cols))    ",
                    "            ",
                    "        if '^3' in allowed_operations:",
                    "            cols = [subset[0]+'^3']   ",
                    "            data = df_a0[list(subset)].apply(_my_power_3, axis=1)            ",
                    "            df_b0_list.append(pd.DataFrame(data, columns=cols)) ",
                    "",
                    "        if 'exp' in allowed_operations:",
                    "            cols = ['exp('+subset[0]+')']       ",
                    "            data = df_a0[list(subset)].apply(_my_exp, axis=1)            ",
                    "            df_b0_list.append(pd.DataFrame(data, columns=cols))        ",
                    "        ",
                    "        ",
                    "    # create b1: absolute differences and sums of a1    ",
                    "    for subset in itertools.combinations(col_a1, 2):",
                    "        if '+' in allowed_operations:",
                    "            cols = ['('+subset[0]+'+'+subset[1]+')']        ",
                    "            data = df_a1[list(subset)].apply(_my_sum, axis=1)            ",
                    "            df_b1_list.append(pd.DataFrame(data, columns=cols))         ",
                    "            ",
                    "        if '-' in allowed_operations:",
                    "            cols = ['('+subset[0]+'-'+subset[1]+')']        ",
                    "            data = df_a1[list(subset)].apply(_my_diff, axis=1)            ",
                    "            df_b1_list.append(pd.DataFrame(data, columns=cols))   ",
                    "",
                    "        if '|+|' in allowed_operations:",
                    "            cols = ['|'+subset[0]+'+'+subset[1]+'|']        ",
                    "            data = df_a1[list(subset)].apply(_my_abs_sum, axis=1)            ",
                    "            df_b1_list.append(pd.DataFrame(data, columns=cols))     ",
                    "            ",
                    "        if '|-|' in allowed_operations:",
                    "            cols = ['|'+subset[0]+'-'+subset[1]+'|']        ",
                    "            data = df_a1[list(subset)].apply(_my_abs_diff, axis=1)            ",
                    "            df_b1_list.append(pd.DataFrame(data, columns=cols))  ",
                    "",
                    "    # create b2: absolute differences and sums of a2    ",
                    "    for subset in itertools.combinations(col_a2, 2):",
                    "        if '+' in allowed_operations:",
                    "            cols = ['('+subset[0]+'+'+subset[1]+')']        ",
                    "            data = df_a2[list(subset)].apply(_my_sum, axis=1)            ",
                    "            df_b2_list.append(pd.DataFrame(data, columns=cols))         ",
                    "            ",
                    "        if '-' in allowed_operations:",
                    "            cols = ['('+subset[0]+'-'+subset[1]+')']        ",
                    "            data = df_a2[list(subset)].apply(_my_diff, axis=1)            ",
                    "            df_b2_list.append(pd.DataFrame(data, columns=cols))   ",
                    "",
                    "        if '|+|' in allowed_operations:",
                    "            cols = ['|'+subset[0]+'+'+subset[1]+'|']        ",
                    "            data = df_a2[list(subset)].apply(_my_abs_sum, axis=1)            ",
                    "            df_b2_list.append(pd.DataFrame(data, columns=cols))         ",
                    "            ",
                    "        if '|-|' in allowed_operations:",
                    "            cols = ['|'+subset[0]+'-'+subset[1]+'|']        ",
                    "            data = df_a2[list(subset)].apply(_my_abs_diff, axis=1)            ",
                    "            df_b2_list.append(pd.DataFrame(data, columns=cols))   ",
                    " ",
                    "    # create b3: absolute differences and sums of a3    ",
                    "    for subset in itertools.combinations(col_a3, 2):",
                    "        if '+' in allowed_operations:",
                    "            cols = ['('+subset[0]+'+'+subset[1]+')']        ",
                    "            data = df_a3[list(subset)].apply(_my_sum, axis=1)            ",
                    "            df_b3_list.append(pd.DataFrame(data, columns=cols))         ",
                    "            ",
                    "        if '-' in allowed_operations:",
                    "            cols = ['('+subset[0]+'-'+subset[1]+')']        ",
                    "            data = df_a3[list(subset)].apply(_my_diff, axis=1)            ",
                    "            df_b3_list.append(pd.DataFrame(data, columns=cols))              ",
                    "",
                    "        if '|+|' in allowed_operations:",
                    "            cols = ['|'+subset[0]+'+'+subset[1]+'|']        ",
                    "            data = df_a3[list(subset)].apply(_my_abs_sum, axis=1)            ",
                    "            df_b3_list.append(pd.DataFrame(data, columns=cols))  ",
                    "            ",
                    "        if '|-|' in allowed_operations:",
                    "            cols = ['|'+subset[0]+'-'+subset[1]+'|']        ",
                    "            data = df_a3[list(subset)].apply(_my_abs_diff, axis=1)            ",
                    "            df_b3_list.append(pd.DataFrame(data, columns=cols))              ",
                    "",
                    "    # create c3: two steps:",
                    "    # 1) squares of a3 - unary operations ",
                    "    # we kept itertools.combinations to make the code more uniform with the binary operations",
                    "    for subset in itertools.combinations(col_a3, 1):",
                    "        if '^2' in allowed_operations:",
                    "            cols = [subset[0]+'^2']        ",
                    "            data = df_a3[list(subset)].apply(_my_power_2, axis=1)            ",
                    "            df_c3_list.append(pd.DataFrame(data, columns=cols))    ",
                    "        if '^3' in allowed_operations:",
                    "            cols = [subset[0]+'^3']   ",
                    "            data = df_a3[list(subset)].apply(_my_power_3, axis=1)            ",
                    "            df_c3_list.append(pd.DataFrame(data, columns=cols)) ",
                    "",
                    "            ",
                    "    # 2) squares of b3 (only sums) --> sum squared of a3",
                    "    for subset in itertools.combinations(col_a3, 2):",
                    "        if '^2' in allowed_operations:",
                    "            cols = ['('+subset[0]+'+'+subset[1]+')^2']   ",
                    "            data = df_a3[list(subset)].apply(_my_sum_power_2, axis=1)            ",
                    "            df_c3_list.append(pd.DataFrame(data, columns=cols))        ",
                    "            ",
                    "        if '^3' in allowed_operations:",
                    "            cols = ['('+subset[0]+'+'+subset[1]+')^3']        ",
                    "            data = df_a3[list(subset)].apply(_my_sum_power_3, axis=1)            ",
                    "            df_c3_list.append(pd.DataFrame(data, columns=cols))",
                    "",
                    "    # create d3: two steps:",
                    "    # 1) exponentials of a3 - unary operations ",
                    "    # we kept itertools.combinations to make the code more uniform with the binary operations",
                    "    for subset in itertools.combinations(col_a3, 1):",
                    "        if 'exp' in allowed_operations:",
                    "            cols = ['exp('+subset[0]+')']      ",
                    "            df_subset = df_a3[list(subset)]",
                    "            data = df_subset.apply(_my_exp, axis=1)            ",
                    "            df_d3_list.append(pd.DataFrame(data, columns=cols))    ",
                    "            ",
                    "    # 2) exponentials of b3 (only sums) --> exponential of sum of a3",
                    "    for subset in itertools.combinations(col_a3, 2):",
                    "        if 'exp' in allowed_operations:",
                    "            cols = ['exp('+subset[0]+'+'+subset[1]+')']    ",
                    "            df_subset = df_a3[list(subset)]",
                    "            data = df_subset.apply(_my_sum_exp, axis=1)               ",
                    "            df_d3_list.append(pd.DataFrame(data, columns=cols))        ",
                    "",
                    "    # create e3: two steps:",
                    "    # 1) exponentials of squared a3 - unary operations ",
                    "    # we kept itertools.combinations to make the code more uniform with the binary operations",
                    "    for subset in itertools.combinations(col_a3, 1):",
                    "        operations={'exp', '^2'}",
                    "        if operations <= set(allowed_operations):",
                    "            cols = ['exp('+subset[0]+'^2)']",
                    "            df_subset = df_a3[list(subset)]",
                    "            data = df_subset.apply(_my_exp_power_2, axis=1)            ",
                    "            df_e3_list.append(pd.DataFrame(data, columns=cols))    ",
                    "            ",
                    "        operations={'exp', '^3'}",
                    "        if operations <= set(allowed_operations):",
                    "            try:",
                    "                cols = ['exp('+subset[0]+'^3)']",
                    "                df_subset = df_a3[list(subset)]",
                    "                data = df_subset.apply(_my_exp_power_3, axis=1)            ",
                    "                df_e3_list.append(pd.DataFrame(data, columns=cols)) ",
                    "            except OverflowError as e:",
                    "                print('Dropping feature combination that caused under/overflow.\\n')",
                    "",
                    "            ",
                    "    # 2) exponentials of b3 (only sums) --> exponential of sum of a3",
                    "    for subset in itertools.combinations(col_a3, 2):",
                    "        operations={'exp', '^2'}",
                    "        if operations <= set(allowed_operations):",
                    "            cols = ['exp(('+subset[0]+'+'+subset[1]+')^2)']",
                    "            df_subset = df_a3[list(subset)]",
                    "            data = df_subset.apply(_my_sum_exp_power_2, axis=1)            ",
                    "            df_e3_list.append(pd.DataFrame(data, columns=cols))        ",
                    "",
                    "        operations={'exp', '^3'}",
                    "        if operations <= set(allowed_operations):",
                    "            try:",
                    "                cols = ['exp(('+subset[0]+'+'+subset[1]+')^3)']",
                    "                df_subset = df_a3[list(subset)]",
                    "                data = df_subset.apply(_my_sum_exp_power_3, axis=1)            ",
                    "                df_e3_list.append(pd.DataFrame(data, columns=cols))   ",
                    "            except OverflowError as e:",
                    "                print('Dropping feature combination that caused under/overflow.\\n')",
                    "",
                    "    # make dataframes from lists, check if they are not empty",
                    "    # we make there here because they are going to be used to further",
                    "    # combine the features",
                    "    if not df_a0.empty: ",
                    "        df_list.append(df_a0)",
                    "        ",
                    "    if not df_a1.empty: ",
                    "        df_x1_list.append(df_a1)",
                    "        df_list.append(df_a1)",
                    "",
                    "    if not df_a2.empty: ",
                    "        df_x1_list.append(df_a2)",
                    "        df_list.append(df_a2)",
                    "        ",
                    "    if not df_a3.empty: ",
                    "        df_x1_list.append(df_a3)",
                    "        df_list.append(df_a3)",
                    "",
                    "",
                    "",
                    "    if df_b0_list: ",
                    "        df_b0 = pd.concat(df_b0_list, axis=1)",
                    "        col_b0 = df_b0.columns.tolist()",
                    "        df_b0.to_csv('./df_b0.csv', index=True)",
                    "        df_list.append(df_b0)",
                    "        ",
                    "    if df_b1_list: ",
                    "        df_b1 = pd.concat(df_b1_list, axis=1)",
                    "        col_b1 = df_b1.columns.tolist()",
                    "        df_x1_list.append(df_b1)",
                    "        df_list.append(df_b1)",
                    "",
                    "    if df_b2_list: ",
                    "        df_b2 = pd.concat(df_b2_list, axis=1)",
                    "        col_b2 = df_b2.columns.tolist()",
                    "        df_x1_list.append(df_b2)",
                    "        df_list.append(df_b2)",
                    "        ",
                    "    if df_b3_list: ",
                    "        df_b3 = pd.concat(df_b3_list, axis=1)",
                    "        col_b3 = df_b3.columns.tolist()        ",
                    "        df_x1_list.append(df_b3)",
                    "        df_list.append(df_b3)",
                    "    ",
                    "    if df_c3_list:",
                    "        df_c3 = pd.concat(df_c3_list, axis=1)",
                    "        col_c3 = df_c3.columns.tolist()",
                    "        df_x2_list.append(df_c3)",
                    "        df_list.append(df_c3)",
                    "",
                    "    if df_d3_list:",
                    "        df_d3 = pd.concat(df_d3_list, axis=1)",
                    "        col_d3 = df_d3.columns.tolist()",
                    "        df_x2_list.append(df_d3)",
                    "        df_list.append(df_d3)",
                    "",
                    "    if df_e3_list:",
                    "        df_e3 = pd.concat(df_e3_list, axis=1)",
                    "        col_e3 = df_e3.columns.tolist()",
                    "        df_x2_list.append(df_e3)",
                    "        df_list.append(df_e3)",
                    "",
                    "    if df_x1_list:",
                    "        df_x1 = pd.concat(df_x1_list, axis=1)",
                    "        col_x1 = df_x1.columns.tolist()",
                    "                ",
                    "    if df_x2_list:",
                    "        df_x2 = pd.concat(df_x2_list, axis=1)",
                    "        col_x2 = df_x2.columns.tolist()",
                    "",
                    "    if df_x1_list and df_x2_list:",
                    "        for el_x1 in col_x1:",
                    "            for el_x2 in col_x2:",
                    "                if '/' in allowed_operations:",
                    "                    cols = [el_x1+'/'+el_x2] ",
                    "                    #now the operation is between two dataframes",
                    "                    data = df_x1[el_x1].divide(df_x2[el_x2])     ",
                    "                    df_x_list.append(pd.DataFrame(data, columns=cols))   ",
                    "     ",
                    "",
                    "    if df_f1_list:",
                    "        df_f1 = pd.concat(df_f1_list, axis=1)",
                    "        col_f1 = df_f1.columns.tolist()",
                    "        df_list.append(df_f1)",
                    "",
                    "                ",
                    "    if df_x_list:",
                    "        df_x = pd.concat(df_x_list, axis=1)",
                    "        col_x = df_x.columns.tolist()",
                    "        df_list.append(df_x)",
                    "",
                    "",
                    "",
                    "",
                    "    if df_list:",
                    "        df_combined_features = pd.concat(df_list, axis=1)",
                    "    else:",
                    "        print('No features selected. Please select at least two primary features.')",
                    "        ",
                    "",
                    "    ",
                    "    print('Number of total features generated: {0}'.format(df_combined_features.shape[1]))",
                    "    ",
                    "    return df_combined_features",
Emre Ahmetcik's avatar
Emre Ahmetcik committed
1058
                    "print \"Done\""
Emre Ahmetcik's avatar
Emre Ahmetcik committed
1059
1060
1061
1062
1063
                ],
                "hidden": true
            },
            "output": {
                "state": {},
Emre Ahmetcik's avatar
Emre Ahmetcik committed
1064
                "selectedType": "Results",
Emre Ahmetcik's avatar
Emre Ahmetcik committed
1065
                "pluginName": "IPython",
Emre Ahmetcik's avatar
Emre Ahmetcik committed
1066
1067
                "shellId": "CFDBBC64C18149AA8C1F32F183239C1A",
                "height": 0
Emre Ahmetcik's avatar
Emre Ahmetcik committed
1068
1069
1070
1071
            },
            "evaluatorReader": true,
            "lineCount": 437,
            "tags": "get_descriptors"
1072
1073
        },
        {
Emre Ahmetcik's avatar
Emre Ahmetcik committed
1074
            "id": "codeWJJ3l4",
1075
1076
1077
1078
            "type": "code",
            "evaluator": "IPython",
            "input": {
                "body": [
1079
                    "from nomad_sim.utils_binaries import get_chemical_formula_binaries",
1080
                    "",
1081
                    "def get_descriptors(nomad_structure_list=[] ,selected_feature_list=[], allowed_operations=[], **kwargs):",
Emre Ahmetcik's avatar
Emre Ahmetcik committed
1082
                    "        ",
1083
                    "    # keep only one nomad_structure per chemical_formula since in our case the descriptors of same chemical_formula ",
Emre Ahmetcik's avatar
Emre Ahmetcik committed
1084
1085
1086
1087
                    "    # but different structures are the same     ",
                    "    list_of_chemical_formulas = [get_chemical_formula_binaries(nomad_structure.atoms[0,0]) for nomad_structure in nomad_structure_list]",
                    "    # remove chemical_formula duplicates  ",
                    "    list_of_chemical_formulas = list(set(list_of_chemical_formulas))",
1088
                    "    ",
Emre Ahmetcik's avatar
Emre Ahmetcik committed
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
                    "    # add both '(A)', '(B)' to each feature",
                    "    selected_featureAB_list = [f+A_or_B for f in selected_feature_list for A_or_B in ['(A)', '(B)']]",
                    "",
                    "    # make descriptors DataFrame out of descriptors_dict",
                    "    df_features = pd.DataFrame(features_dict)",
                    "    ",
                    "    # reduce df_descriptors to chemical formulas of passed nomad_structure_list and selected features",
                    "    df_features = df_features[selected_featureAB_list].loc[list_of_chemical_formulas]",
                    "    ",
                    "    # name the index (column of the compounds) 'chemical formula'",
                    "    df_features.index.name = 'chemical_formula'",
1100
1101
1102
                    "",
                    "    # convert numerical columns in float",
                    "    for col in df_features.columns.tolist():",
1103
1104
                    "        df_features[col] = df_features[col].astype(float)",
                    "    ",
Emre Ahmetcik's avatar
Emre Ahmetcik committed
1105
1106
                    "    # arethmetic combinations    ",
                    "    df_combined = combine_features(df=df_features, allowed_operations=allowed_operations)",
1107
                    "    ",
Emre Ahmetcik's avatar
Emre Ahmetcik committed
1108
1109
                    "    return df_combined",
                    "print \"Done\""
1110
                ]
1111
1112
1113
            },
            "output": {
                "state": {},
Emre Ahmetcik's avatar
Emre Ahmetcik committed
1114
                "selectedType": "Results",
1115
                "pluginName": "IPython",
Emre Ahmetcik's avatar
Emre Ahmetcik committed
1116
1117
                "shellId": "CFDBBC64C18149AA8C1F32F183239C1A",
                "height": 0
1118
1119
            },
            "evaluatorReader": true,
Emre Ahmetcik's avatar
Emre Ahmetcik committed
1120
            "lineCount": 31,
1121
            "tags": "get_descriptors"
1122
        },
1123
1124
1125
1126
1127
1128
1129
1130
        {
            "id": "markdownS5NEVZ",
            "type": "markdown",
            "body": [
                "Use 'get_descriptors' to obtain the descriptor matrix $\\mathbf{D}$ of the radii at the maximum value of  the s, p, and d valence radial probability density:  $r_s(A), r_s(B), r_p(A), r_p(B), r_d(A), r_d(B)$. If the list of allowed (arithmetic) operations is empty only the six primary features are returned. If strings of arithmetic operations are included (i.e. \"$+$\", \"$-$\", \"$/$\", \"$\\exp$\") also derived features will be added to the matrix."
            ],
            "evaluatorReader": false
        },
1131
        {
1132
            "id": "codeWjXSr9",
1133
1134
1135
1136
            "type": "code",
            "evaluator": "IPython",
            "input": {
                "body": [
1137
1138
                    "kwargs = {",
                    "          'nomad_structure_list': nomad_structure_list,",
Emre Ahmetcik's avatar
Emre Ahmetcik committed
1139
1140
                    "          'selected_feature_list': ['r_s', 'r_p', 'r_d'],",
                    "          'allowed_operations': [],                ",
1141
1142
                    "         }",
                    "",
Emre Ahmetcik's avatar
Emre Ahmetcik committed
1143
1144
1145
                    "#kwargs['selected_feature_list'] = ['IP', 'EA', 'E_HOMO', 'E_LUMO', 'r_s', 'r_p', 'r_d', 'Z', 'period', 'd']",
                    "#kwargs['allowed_operations'] = ['+', '-', '|-|', '*', '/' '^2', '^3',  'exp']",
                    "",
1146
1147
                    "df_desc = get_descriptors(**kwargs)",
                    "print df_desc"
1148
1149
1150
1151
1152
1153
                ]
            },
            "output": {
                "state": {},
                "selectedType": "Results",
                "pluginName": "IPython",
Emre Ahmetcik's avatar
Emre Ahmetcik committed
1154
1155
                "shellId": "CFDBBC64C18149AA8C1F32F183239C1A",
                "height": 0
1156
1157
            },
            "evaluatorReader": true,
Emre Ahmetcik's avatar
Emre Ahmetcik committed
1158
            "lineCount": 11
1159
1160
1161
1162
1163
        },
        {
            "id": "markdownjES4yc",
            "type": "markdown",
            "body": [
1164
                "Let's summarize the untill now declared functions to a new function 'get_data' which returns the numpy arrays  $\\mathbf{P}$ and $\\mathbf{D}$, the list of feature name strings, the list of compound strings ( the order matches to the ones of the rows of both $\\mathbf{P}$ and $\\mathbf{D}$) and the list of json paths of the minimum energy structures (also in the right order). We will need this function in the next chapters."
1165
1166
            ],
            "evaluatorReader": false
1167
1168
        },
        {
1169
            "id": "code57Huu9",
1170
            "type": "code",
1171
            "evaluator": "IPython",
1172
1173
            "input": {
                "body": [
1174
1175
1176
1177
                    "def get_data(selected_feature_list, allowed_operations):",
                    "    kwargs = {",
                    "          'nomad_structure_list': nomad_structure_list,",
                    "          'selected_feature_list': selected_feature_list,",
Emre Ahmetcik's avatar
Emre Ahmetcik committed
1178
                    "          'allowed_operations': allowed_operations,               ",
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
                    "         }    ",
                    "    D_df = get_descriptors(**kwargs)",
                    "    P_df = get_energy_diffs(nomad_structure_list)",
                    "    ",
                    "    feature_list = D_df.columns.tolist()",
                    "    compounds_list = D_df.index.tolist()",
                    "    P_df = P_df.reindex(compounds_list)",
                    "    ",
                    "    json_paths = P_df['json_path'].tolist()",
                    "    P_df =  P_df['energy_diff']",
                    "    ",
Emre Ahmetcik's avatar
Emre Ahmetcik committed
1190
1191
                    "    return P_df.values, D_df.values, feature_list, compounds_list, json_paths",
                    "print \"Done\""
1192
                ]
1193
1194
1195
            },
            "output": {
                "state": {},
Emre Ahmetcik's avatar
Emre Ahmetcik committed
1196
                "selectedType": "Results",
1197
                "pluginName": "IPython",
Emre Ahmetcik's avatar
Emre Ahmetcik committed
1198
1199
                "shellId": "CFDBBC64C18149AA8C1F32F183239C1A",
                "height": 0
1200
1201
            },
            "evaluatorReader": true,
Emre Ahmetcik's avatar
Emre Ahmetcik committed
1202
            "lineCount": 18,
1203
            "tags": "get_data"
1204
1205
        },
        {
1206
            "id": "code6YvnAC",
1207
1208
1209
1210
            "type": "code",
            "evaluator": "IPython",
            "input": {
                "body": [
Emre Ahmetcik's avatar
Emre Ahmetcik committed
1211
1212
                    "# check that get_data works",
                    "selected_feature_list = ['r_s', 'r_p', 'r_d']",
1213
1214
                    "allowed_operations = []",
                    "P, D, feature_list, compounds_list, json_paths = get_data(selected_feature_list, allowed_operations)",
1215
                    "print P.shape, D.shape, len(feature_list), len(json_paths)"
1216
1217
1218
1219
1220
1221
                ]
            },
            "output": {
                "state": {},
                "selectedType": "Results",
                "pluginName": "IPython",
Emre Ahmetcik's avatar
Emre Ahmetcik committed
1222
1223
                "shellId": "CFDBBC64C18149AA8C1F32F183239C1A",
                "height": 0
1224
1225
            },
            "evaluatorReader": true,
Emre Ahmetcik's avatar
Emre Ahmetcik committed
1226
            "lineCount": 5
1227
1228
        },
        {
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
            "id": "sectionCs89Kj",
            "type": "section",
            "title": "Determining low-dimensional descriptors with the $\\ell_0$ method",
            "level": 1,
            "evaluatorReader": false,
            "collapsed": true
        },
        {
            "id": "markdownUK5ZfK",
            "type": "markdown",
            "body": [
                "<summary>",
                "<li> Perform an $\\ell_0$-regularization to identify the best low dimensional descriptors using the primary features.</li>",
                "<li> Show that non-linear functions of the primary features improve the models significantly. </li>",
                "<li> See that the $\\ell_0$-regularization can rapidly become computational infeasible.</li>",
                "</summary>"
            ],
            "evaluatorReader": false
        },
        {
            "id": "markdownJaNdzs",
            "type": "markdown",
            "body": [
                "For the case you have skipped a chapter, at the beginning of each chapter a JavaScript cell is provided which runs the relevant cells from chapters before with functions we will need in this chapter."
            ],
            "evaluatorReader": false
1255
1256
        },
        {
1257
            "id": "coderkkthv",
1258
            "type": "code",
1259
            "evaluator": "JavaScript",
1260
1261
            "input": {
                "body": [
1262
                    "// 'Import' relevant functions from chapter before",
1263
1264
1265
1266
1267
1268
1269
                    "var functions_list = [\"json_list\", \"nomad_structure_list\", \"get_energies\", \"get_energy_diffs\", \"get_descriptors\", \"get_data\"];",
                    "var n_functions = functions_list.length",
                    "var i;",
                    "for (i = 0; i < n_functions; i++) {",
                    "    beaker.evaluate(functions_list[i]);",
                    "    beaker.print('import '+functions_list[i]);",
                    "}"
1270
                ]
1271
1272
1273
            },
            "output": {
                "state": {},
1274
1275
                "selectedType": "Results",
                "pluginName": "JavaScript",
1276
                "height": 0
1277
1278
            },
            "evaluatorReader": true,
1279
            "lineCount": 8
1280
1281
        },
        {
1282
1283
1284
            "id": "markdownxZfCEL",
            "type": "markdown",
            "body": [
Emre Ahmetcik's avatar
Emre Ahmetcik committed
1285
                "Our target is to find the best low dimensional descriptor for a linear model. The $\\ell_0$ regularization",
1286
1287
1288
1289
                "",
                "$\\text{argmin}_{\\mathbf{c} \\in \\mathbb{R}^{m}} \\{\\|\\mathbf{P} - \\mathbf{D}\\mathbf{c}\\|^2_2 +\\lambda \\|\\mathbf{c}\\|_0\\}$",
                "",
                "provides exactly what we want. It is defined in the follwing and solved combinatorial:"
1290
1291
1292
1293
            ],
            "evaluatorReader": false
        },
        {
1294
            "id": "codeVTzLtQ",
1295
1296
1297
1298
            "type": "code",
            "evaluator": "IPython",
            "input": {
                "body": [
1299
                    "from itertools import combinations",
1300
                    "",
1301
1302
1303
1304
1305
1306
1307
1308
                    "def L0(P, D, dimension):",
                    "    n_rows, n_columns = D.shape",
                    "    D = np.column_stack((D,np.ones(n_rows)))",
                    "    SE_min = np.inner(P,P)",
                    "    coef_min, permu_min = None, None",
                    "    for permu in combinations(range(n_columns),dimension):",
                    "        D_ls = D[:,permu+(-1,)]",
                    "        coef, SE, __1, __2 = np.linalg.lstsq(D_ls,P)",
1309
1310
1311
1312
1313
1314
                    "        try:",
                    "            if SE[0] < SE_min: ",
                    "                SE_min = SE[0]",
                    "                coef_min, permu_min = coef, permu",
                    "        except:",
                    "            pass",
1315
                    "    RMSE = np.sqrt(SE_min/n_rows)",
Emre Ahmetcik's avatar
Emre Ahmetcik committed
1316
1317
                    "    return RMSE, coef_min, permu_min",
                    "print \"Done\""
1318
1319
1320
1321
                ]
            },
            "output": {
                "state": {},
Emre Ahmetcik's avatar
Emre Ahmetcik committed
1322
                "selectedType": "Results",
1323
                "pluginName": "IPython",
Emre Ahmetcik's avatar
Emre Ahmetcik committed
1324
1325
                "shellId": "CFDBBC64C18149AA8C1F32F183239C1A",
                "height": 0
1326
1327
            },
            "evaluatorReader": true,
Emre Ahmetcik's avatar
Emre Ahmetcik committed
1328
            "lineCount": 19,
1329
1330
            "tags": "L0"
        },
1331
1332
1333
1334
1335
1336
1337
1338
        {
            "id": "markdownkIHqDn",
            "type": "markdown",
            "body": [
                "Perform the $\\ell_0$-regularization for different dimensions (numbers of non-zero coefficients in the model) and see the root mean square errors (RMSE) and the selected features."
            ],
            "evaluatorReader": false
        },
1339
1340
1341
1342
1343
1344
        {
            "id": "code5rtN3R",
            "type": "code",
            "evaluator": "IPython",
            "input": {
                "body": [
Emre Ahmetcik's avatar
Emre Ahmetcik committed
1345
                    "selected_feature_list = ['r_s', 'r_p', 'r_d']",
1346
1347
                    "allowed_operations = []",
                    "P, D, feature_list, compounds_list, json_paths = get_data(selected_feature_list, allowed_operations)",
1348
                    "",
1349
                    "print \"    RMSE           Best desriptor\"",
1350
1351
                    "for dim in range(1,7):",
                    "    RMSE, coefficients, selected_indices = L0(P,D,dim)",
1352
                    "    print '%sD:' %dim, RMSE, [feature_list[i] for i in selected_indices]"
1353
1354
1355
1356
1357
1358
                ]
            },
            "output": {
                "state": {},
                "selectedType": "Results",
                "pluginName": "IPython",
Emre Ahmetcik's avatar
Emre Ahmetcik committed
1359
                "shellId": "CFDBBC64C18149AA8C1F32F183239C1A",
1360
                "height": 0
1361
1362
            },
            "evaluatorReader": true,
1363
            "lineCount": 8
1364
1365
        },
        {
1366
1367
1368
1369
1370
1371
            "id": "markdown7wwBua",
            "type": "markdown",
            "body": [
                "Improvements can be obtained by increasing the feature space using more complex features (the derived features). Run the following script and plot the results afterwards. How does the accuracy of the models change? How does the feature space size and the dimension of the descriptors depend on the needed time to solve the $\\ell_0$-problem?"
            ],
            "evaluatorReader": false
1372
1373
        },
        {
1374
            "id": "codeLJovcN",
1375
1376
1377
1378
            "type": "code",
            "evaluator": "IPython",
            "input": {
                "body": [
1379
                    "from time import time",
Emre Ahmetcik's avatar
Emre Ahmetcik committed
1380
                    "selected_feature_list = ['r_s', 'r_p', 'r_d']",
1381
1382
1383
                    "op_lists = [[], ['+','|-|'], ['+','|-|','exp'], ['+','|-|','exp', '^2'] ]",
                    "X  = []",
                    "Errors, Time = np.empty([3,len(op_lists)]), np.empty([3,len(op_lists)])",
1384
                    "",
1385
1386
                    "for n_op, allowed_operations in enumerate(op_lists):",
                    "    P, D, feature_list, compounds_list, json_paths = get_data(selected_feature_list, allowed_operations)",
1387
1388
                    "    number_of_features = len(feature_list)",
                    "    X.append(number_of_features)",
1389
1390
1391
1392
1393
1394
1395
1396
1397
                    "    for dim in range(1,4):",
                    "        t1= time()",
                    "        RMSE, coefficients, selected_indices = L0(P,D,dim)",
                    "        t2 = time()-t1             ",
                    "        ",
                    "        Time [dim-1][n_op] = t2",
                    "        Errors[dim-1][n_op] = RMSE ",
                    "        ",
                    "        print \"features: %s; %sD  RMSE: %s  best features: %s\" %(len(feature_list), dim, RMSE, [feature_list[i] for i in selected_indices])"
1398
1399
1400
1401
1402
1403
                ]
            },
            "output": {
                "state": {},
                "selectedType": "Results",
                "pluginName": "IPython",
Emre Ahmetcik's avatar
Emre Ahmetcik committed
1404
                "shellId": "CFDBBC64C18149AA8C1F32F183239C1A",
1405
                "height": 0
1406
1407
            },
            "evaluatorReader": true,
1408
            "lineCount": 19
1409
1410
        },
        {
1411
            "id": "codeZtBOQi",
1412
            "type": "code",
1413
            "evaluator": "IPython",
1414
1415
            "input": {
                "body": [
1416
1417
1418
                    "#plot",
                    "f, (ax1, ax2) = plt.subplots(1,2, sharex=True, figsize=(12,8))",
                    "ax1.set_xlabel('Number of features')",
Emre Ahmetcik's avatar
Emre Ahmetcik committed
1419
1420
                    "ax2.set_xlabel('Number of features'",
                    ")",
1421
1422
1423
                    "ax1.set_ylabel('RMSE [eV]')",
                    "ax2.set_ylabel('Time [s]')",
                    "#ax2.set_yscale('log')",
1424
                    "",
1425
1426
1427
                    "for dim in range(1,4):",
                    "    ax1.plot(X, Errors[dim-1], 's-', label='%sD' %dim)",
                    "    ax2.plot(X, Time[dim-1], 's-', label='%sD' %dim)",
Emre Ahmetcik's avatar
Emre Ahmetcik committed
1428
1429
                    "ax2.legend(loc='best')",
                    "plt.show()"
1430
                ]
1431
1432
1433
            },
            "output": {
                "state": {},
Emre Ahmetcik's avatar
Emre Ahmetcik committed
1434
                "selectedType": "Html",
1435
                "pluginName": "IPython",
Emre Ahmetcik's avatar
Emre Ahmetcik committed
1436
                "shellId": "CFDBBC64C18149AA8C1F32F183239C1A",
1437
                "height": 0
1438
1439
            },
            "evaluatorReader": true,
Emre Ahmetcik's avatar
Emre Ahmetcik committed
1440
            "lineCount": 14
1441
1442
        },
        {
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
            "id": "markdownJAAraB",
            "type": "markdown",
            "body": [
                "Assume now that we would like to include thousands or millions of (more) complex features to obtain more accurate models..."
            ],
            "evaluatorReader": false
        },
        {
            "id": "sectionX7Z2F0",
            "type": "section",
            "title": "Approximations to the $\\ell_0$ method",
            "level": 1,
            "evaluatorReader": false,
            "collapsed": true
        },
        {
            "id": "markdowneXLmPW",
            "type": "markdown",
            "body": [
                "<summary>",
                "<li >Perform a LASSO minimization and the LASSO+$\\ell_0$ method.</li>",
                "<li >Compare the solutions with the ones from the $\\ell_0$ method.</li>",
                "</summary>"
            ],
            "evaluatorReader": false
        },
        {
            "id": "codeSGrV0g",
1471
            "type": "code",
1472
            "evaluator": "JavaScript",
1473
1474
            "input": {
                "body": [
1475
                    "// 'Import' relevant functions from chapters before",
1476
1477
1478
1479
1480
1481
1482
                    "var functions_list = [\"json_list\", \"nomad_structure_list\", \"get_energies\", \"get_energy_diffs\", \"get_descriptors\", \"get_data\", \"L0\"];",
                    "var n_functions = functions_list.length",
                    "var i;",
                    "for (i = 0; i < n_functions; i++) {",
                    "    beaker.evaluate(functions_list[i]);",
                    "    beaker.print('import '+functions_list[i]);",
                    "}"
1483
                ]
1484
1485
1486
1487
            },
            "output": {
                "state": {},
                "selectedType": "Results",
1488
                "pluginName": "JavaScript",
1489
                "height": 0
1490
1491
            },
            "evaluatorReader": true,
1492
1493
1494
1495
1496
1497
1498
1499
1500
            "lineCount": 8
        },
        {
            "id": "section96VfCa",
            "type": "section",
            "title": "The LASSO",
            "level": 2,
            "evaluatorReader": false,
            "collapsed": false
1501
1502
1503
1504
1505
1506
        },
        {
            "id": "markdownHjYp4E",
            "type": "markdown",
            "body": [
                "",
1507
1508
1509
1510
1511
1512
                "One state-of-the art approximation to the $\\ell_0$-method is the LASSO: ",
                "",
                "$\\text{argmin}_{\\mathbf{c} \\in \\mathbb{R}^{m}} \\{\\|\\mathbf{P} - \\mathbf{D}\\mathbf{c}\\|^2_2 +\\lambda \\|\\mathbf{c}\\|_1\\}$.",
                "",
                "Before performing the LASSO regression we standardize the data to have mean 0 and variance 1, since otherwise the $\\ell_2$-norm of a column would affect bias its contribution to the model. <br>",
                "Note that we can use the LASSO also only for feature selection. We can use then a least-square model with the selected features afterwards instead of the LASSO model directly."
1513
1514
            ],
            "evaluatorReader": false
1515
        },
1516
1517
1518
1519
1520
1521
1522
1523
1524
        {
            "id": "code3B4gWJ",
            "type": "code",
            "evaluator": "IPython",
            "input": {
                "body": [
                    "from sklearn.linear_model import Lasso",
                    "import scipy.stats as ss",
                    "",
1525
                    "def lasso_fit(lam, P, D, feature_list):",
1526
1527
1528
1529
1530
1531
                    "    #LASSO",
                    "    D_standardized = ss.zscore(D)",
                    "    lasso =  Lasso(alpha=lam)",
                    "    lasso.fit(D_standardized, P)",
                    "    coef =  lasso.coef_",
                    "    ",
1532
                    "    # get strings of selected features",
1533
1534
1535
                    "    selected_indices = coef.nonzero()[0]",
                    "    selected_features = [feature_list[i] for i in selected_indices]",
                    "    ",
1536
1537
1538
1539
1540
                    "    # get RMSE of LASSO model",
                    "    P_predict = lasso.predict(D_standardized)",
                    "    RMSE_LASSO = np.linalg.norm(P-P_predict) / np.sqrt(82.)",
                    "       ",
                    "    #get RMSE for least-square fit",
1541
1542
                    "    D_new = D[:, selected_indices]",
                    "    D_new = np.column_stack((D_new, np.ones(82)))",
1543
1544
                    "    RMSE_LS = np.sqrt(np.linalg.lstsq(D_new,P)[1][0]/82.)",
                    "        ",
Emre Ahmetcik's avatar
Emre Ahmetcik committed
1545
1546
                    "    return RMSE_LASSO, RMSE_LS, coef, selected_features",
                    "print \"Done\""
1547
1548
1549
1550
                ]
            },
            "output": {
                "state": {},
Emre Ahmetcik's avatar
Emre Ahmetcik committed
1551
                "selectedType": "Results",
1552
                "pluginName": "IPython",
Emre Ahmetcik's avatar
Emre Ahmetcik committed
1553
1554
                "shellId": "CFDBBC64C18149AA8C1F32F183239C1A",
                "height": 0
1555
1556
            },
            "evaluatorReader": true,
Emre Ahmetcik's avatar
Emre Ahmetcik committed
1557
            "lineCount": 25
1558
1559
1560
1561
1562
        },
        {
            "id": "markdown5p8ptN",
            "type": "markdown",
            "body": [
1563
                "$\\lambda$ regulates the sparsity of the coefficient vector of the model. Get the data and try different $\\lambda$ by shifting the lever along the range. How good does LASSO (directly or with a least square fit afterwards) approximate the L0-method (when the same feature space is used for both)?"
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
            ],
            "evaluatorReader": false
        },
        {
            "id": "codeQQDvNE",
            "type": "code",
            "evaluator": "IPython",
            "input": {
                "body": [
                    "#import Data",
Emre Ahmetcik's avatar
Emre Ahmetcik committed
1574
                    "selected_feature_list = ['r_s', 'r_p', 'r_d']",
1575
1576
1577
1578
                    "allowed_operations = ['+','|-|','exp', '^2']",
                    "P, D, feature_list, compounds_list, json_paths = get_data(selected_feature_list, allowed_operations)"
                ]
            },
1579
1580
1581
1582
            "output": {
                "state": {},
                "selectedType": "Results",
                "pluginName": "IPython",
Emre Ahmetcik's avatar
Emre Ahmetcik committed
1583
                "shellId": "CFDBBC64C18149AA8C1F32F183239C1A",
1584
                "height": 0
1585
1586
            },
            "evaluatorReader": true,
1587
            "lineCount": 4
1588
1589
        },
        {
1590
            "id": "codeQMp7hF",
1591
            "type": "code",
1592
            "evaluator": "HTML",
1593
1594
            "input": {
                "body": [
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
                    "<input id=\"valBox\" type=\"range\" min=\"0.02\" max=\"0.36\" step=\"0.01\" ",
                    "   oninput=\"showVal(this.value)\" >",
                    "<output for=value id=\"output\">lambda: </output>",
                    "<script>",
                    "function showVal(newVal){",
                    "  beaker.lam = newVal;",
                    "  beaker.evaluate(\"lambda_cell\")",
                    "  document.querySelector('#output').value = \"lambda: \"+newVal;;",
                    "}",
                    "</script>"
1605
1606
1607
1608
1609
                ],
                "hidden": true
            },
            "output": {
                "state": {},
1610
1611
                "selectedType": "BeakerDisplay",
                "height": 0,
1612
1613
1614
                "result": {
                    "type": "BeakerDisplay",
                    "innertype": "Html",
1615
                    "object": "<script>\nvar beaker = bkHelper.getBeakerObject().beakerObj;\n</script>\n<input id=\"valBox\" type=\"range\" min=\"0.02\" max=\"0.36\" step=\"0.01\" \n   oninput=\"showVal(this.value)\" >\n<output for=value id=\"output\">lambda: </output>\n<script>\nfunction showVal(newVal){\n  beaker.lam = newVal;\n  beaker.evaluate(\"lambda_cell\")\n  document.querySelector('#output').value = \"lambda: \"+newVal;;\n}\n</script>"