Unverified Commit b7b7ae75 authored by Jan Janssen's avatar Jan Janssen Committed by GitHub
Browse files

Merge pull request #39 from pyiron/binder

setup database
parents 41043960 f1bfa62e
......@@ -4,15 +4,11 @@ bash binder/postBuild
# exclude notebooks
rm -rf ./old_notebooks
# execute notebook to generate dataset first
i=0;
cd datasets
papermill ImportDatabase.ipynb ImportDatabase-out.ipynb -k "python3" || i=$((i+1));
cd ../
rm ./datasets/ImportDatabase.ipynb
# execute notebooks
current_dir=$(pwd)
i=0;
for f in $(find . -name *.ipynb | sort -n); do
cd $(dirname $f);
notebook=$(basename $f);
......
# install dataset
cp datasets/Cu_training_archive.tar.gz .
cp datasets/export.csv .
python << EOF
from pyiron import Project
Project("datasets").unpack("Cu_training_archive")
EOF
rm Cu_training_archive.tar.gz
rm export.csv
# ngl view for jupyter
jupyter nbextension install nglview --py --sys-prefix
jupyter nbextension enable nglview --py --sys-prefix
......
,id,status,chemicalformula,job,subjob,project,timestart,timestop,totalcputime,computer,hamilton,hamversion,parentid,masterid
0,0,finished,,df1_A1_A2_A3_EV_elast_phon,/df1_A1_A2_A3_EV_elast_phon,Cu_training_archive/Cu_database,2021-02-08 10:33:52.341472,,,zora@cmti001#1,GenericJob,0.4,,
1,1,finished,,df3_10k,/df3_10k,Cu_training_archive/Cu_database,2021-02-08 10:33:53.993230,,,zora@cmti001#1,GenericJob,0.4,,
2,2,finished,,df2_1k,/df2_1k,Cu_training_archive/Cu_database,2021-02-08 10:33:54.435308,,,zora@cmti001#1,GenericJob,0.4,,
......@@ -32,7 +32,7 @@
"metadata": {},
"outputs": [],
"source": [
"data_pr = Project(\"import_database\")\n",
"data_pr = Project(\"../../datasets\")\n",
"if len(data_pr.job_table()) == 0:\n",
" data_pr.unpack(\"Cu_training_archive\")"
]
......
%% Cell type:code id: tags:
``` python
%pylab inline
```
%%%% Output: stream
Populating the interactive namespace from numpy and matplotlib
%% Cell type:code id: tags:
``` python
from pyiron import Project
```
%% Cell type:code id: tags:
``` python
data_pr = Project("import_database")
data_pr = Project("../../datasets")
if len(data_pr.job_table()) == 0:
data_pr.unpack("Cu_training_archive")
```
%% Cell type:code id: tags:
``` python
data_pr.job_table()
```
%%%% Output: execute_result
id status chemicalformula job \
0 21 finished None df1_A1_A2_A3_EV_elast_phon
1 22 finished None df3_10k
2 23 finished None df2_1k
subjob projectpath \
0 /df1_A1_A2_A3_EV_elast_phon /home/yury/pyiron/projects/
1 /df3_10k /home/yury/pyiron/projects/
2 /df2_1k /home/yury/pyiron/projects/
project timestart \
0 pyiron-2021/import_database/Cu_database/ 2021-02-08 10:33:52.341472
1 pyiron-2021/import_database/Cu_database/ 2021-02-08 10:33:53.993230
2 pyiron-2021/import_database/Cu_database/ 2021-02-08 10:33:54.435308
timestop totalcputime computer hamilton hamversion parentid \
0 None None zora@cmti001#1 GenericJob 0.4 None
1 None None zora@cmti001#1 GenericJob 0.4 None
2 None None zora@cmti001#1 GenericJob 0.4 None
masterid
0 None
1 None
2 None
%% Cell type:code id: tags:
``` python
data_job = data_pr.load('df1_A1_A2_A3_EV_elast_phon')
```
%% Cell type:markdown id: tags:
# Fitting project
%% Cell type:code id: tags:
``` python
from pyiron_gpl.pacemaker.pacemaker import PaceMakerJob
```
%% Cell type:code id: tags:
``` python
fit_pr = Project("pacemaker_fit")
```
%% Cell type:code id: tags:
``` python
#fit_pr.remove_jobs_silently()#
```
%% Cell type:code id: tags:
``` python
job = fit_pr.create_job(job_type=PaceMakerJob, job_name="df1_cut5_pyace" ,delete_existing_job=True)
```
%% Cell type:markdown id: tags:
## Fit
%% Cell type:code id: tags:
``` python
cutoff = 5.0
```
%% Cell type:code id: tags:
``` python
job.input["potential"]= {
"deltaSplineBins": 0.001,
"element": "Cu",
"fs_parameters": [1, 1, 1, 0.5],
"npot": "FinnisSinclairShiftedScaled",
"NameOfCutoffFunction": "cos",
"rankmax": 3,
"nradmax": [7,2,1],
"lmax": [0,2,1],
"ndensity": 2,
"rcut": cutoff,
"dcut": 0.01,
"radparameters": [5.25],
"radbase": "ChebExpCos",
}
```
%% Cell type:code id: tags:
``` python
job.input["fit"]= {
'optimizer': 'BFGS',
'maxiter': 150,
'loss': {
'kappa': 0.5,
'L1_coeffs': 5e-7, # L1-regularization
'L2_coeffs': 5e-7, # L2-regularization
'w1_coeffs': 1,
'w2_coeffs': 1,
#radial smoothness regularization
'w0_rad': 1e-4,
'w1_rad': 1e-4,
'w2_rad': 1e-4,
},
}
```
%% Cell type:code id: tags:
``` python
```
%% Cell type:code id: tags:
``` python
job.input["cutoff"] = cutoff
```
%% Cell type:code id: tags:
``` python
job.structure_data=data_job
```
%% Cell type:code id: tags:
``` python
job.executable
```
%%%% Output: execute_result
'/home/yury/pyiron/resources/pacemaker/bin/run_pacemakerjob_pyace.sh'
%% Cell type:code id: tags:
``` python
#job.executable.version="tf_cpu" # select the CPU-forces tensorflow version
```
%% Cell type:code id: tags:
``` python
job.run()
```
%%%% Output: stream
2021-02-28 01:16:07,353 - root - INFO - structure_data is TrainingContainer
2021-02-28 01:16:07,363 - root - INFO - Saving training structures dataframe into /home/yury/pyiron/projects/pyiron-2021/pacemaker_fit/df1_cut5_pyace_hdf5/df1_cut5_pyace/df_fit.pckl.gzip with pickle protocol = 4, compression = gzip
%%%% Output: stream
The job df1_cut5_pyace was saved and received the ID: 256
%% Cell type:code id: tags:
``` python
```
%% Cell type:code id: tags:
``` python
fit_pr.job_table(full_table=True)
```
%%%% Output: execute_result
id status chemicalformula job subjob \
0 62 finished None df1_cut5 /df1_cut5
1 256 finished None df1_cut5_pyace /df1_cut5_pyace
projectpath project \
0 /home/yury/pyiron/projects/ pyiron-2021/pacemaker_fit/
1 /home/yury/pyiron/projects/ pyiron-2021/pacemaker_fit/
timestart timestop totalcputime \
0 2021-02-26 18:00:20.326459 2021-02-26 18:02:25.068815 124.0
1 2021-02-28 01:16:07.713672 2021-02-28 01:20:43.280739 275.0
computer hamilton hamversion parentid masterid
0 pyiron@dell-inspiron#1 PaceMakerJob 0.1 None None
1 pyiron@dell-inspiron#1 PaceMakerJob 0.1 None None
%% Cell type:code id: tags:
``` python
```
%% Cell type:code id: tags:
``` python
plt.plot(job["output/log/loss"])
plt.yscale('log')
plt.xlabel("# iter")
plt.ylabel("Loss")
```
%%%% Output: execute_result
Text(0, 0.5, 'Loss')
%%%% Output: display_data
%% Cell type:code id: tags:
``` python
plt.plot(job["output/log/rmse_energy"])
plt.yscale('log')
plt.xlabel("# iter")
plt.ylabel("Energy RMSE, meV/atom")
```
%%%% Output: execute_result
Text(0, 0.5, 'Energy RMSE, meV/atom')
%%%% Output: display_data
%% Cell type:code id: tags:
``` python
plt.plot(job["output/log/rmse_forces"])
plt.yscale('log')
plt.xlabel("# iter")
plt.ylabel("Forces RMSE, meV/Ang/structure")
```
%%%% Output: execute_result
Text(0, 0.5, 'Forces RMSE, meV/Ang/structure')
%%%% Output: display_data
%% Cell type:markdown id: tags:
# Overview of the fitted potential internals
%% Cell type:code id: tags:
``` python
from pyace import *
```
%% Cell type:code id: tags:
``` python
final_potential = job.get_final_potential()
final_basis_set = ACEBBasisSet(final_potential)
```
%% Cell type:markdown id: tags:
For single-species potential there is only one **species block** for *Cu*:
%% Cell type:code id: tags:
``` python
len(final_potential.funcspecs_blocks)
```
%%%% Output: execute_result
1
%% Cell type:code id: tags:
``` python
Cu_block = final_potential.funcspecs_blocks[0]
```
%% Cell type:markdown id: tags:
Basic definitions and notations:
* Radial functions: $R_{nl}(r) = \sum_k c_{nlk} g_k(r)$
* Spherical harmonics: $ Y_{lm}(\hat{\pmb{r}}_{ji}) $
* Basis function: $\phi_{\mu_j \mu_i nlm}(\pmb{r}_{ji}) = R_{nl}^{\mu_j \mu_i}(r_{ji}) Y_{lm}(\hat{\pmb{r}}_{ji}) $
* Atomic base (A-functions): $ A_{i \mu n l m} = \sum_j \delta_{\mu \mu_j} \phi_{\mu_j\mu_i nlm}(\pmb{r}_{ji}) $
* Product of atomic base: $ \pmb{A}_{i\pmb{\mu n l m}} = \prod_{t = 1}^{\nu} A_{i \mu_t n_t l_t m_t} $
* Equivariant basis (B-functions): $ {B}_{i\pmb{\mu n l L}} = \sum_{\pmb{m}} \left(
\begin{array}{c}
\pmb{l m} \\
\pmb{L M}
\end{array}
L_R
\right) \pmb{A}_{i\pmb{\mu n l m}} $ ,
where $ \left(\begin{array}{c} \pmb{l m} \\ \pmb{L M}\end{array} L_R\right) $ is *generalized Clebsh-Gordan coefficients*
* Atomic property (densities) $ \rho_i^{(p)} = \sum_{\pmb{\mu n l L}} {c}^{(p)}_{\mu_i\pmb{\mu n l L}} {B}_{i\pmb{\mu n l L}} $
* Atomic energy: $ E_i = F(\rho_i^{(1)}, \dots,\rho_i^{(P)} ) $
%% Cell type:code id: tags:
``` python
```
%% Cell type:markdown id: tags:
radial coefficients $c_{nlk}$:
%% Cell type:code id: tags:
``` python
np.shape(Cu_block.radcoefficients)
```
%%%% Output: execute_result
(2, 3, 7)
%% Cell type:code id: tags:
``` python
```
%% Cell type:markdown id: tags:
Visualize the radial basis functions ($g_k$) and radial functions ($R_{nl}$) and their derivatives:
%% Cell type:code id: tags:
``` python
RadialFunctionsVisualization(final_basis_set).plot()
```
%%%% Output: display_data
%% Cell type:markdown id: tags:
Total number of basis functions
%% Cell type:code id: tags:
``` python
len(Cu_block.funcspecs)
```
%%%% Output: execute_result
18
%% Cell type:markdown id: tags:
List of B-basis functions $ {B}_{i\pmb{\mu n l L}}$: $\pmb{\mu} = $ `elements`, $\pmb{n} = $ `ns`, $\pmb{l} = $ `ls`, $\pmb{L} = $ `LS`) and corresponding coefficients $ {c}^{(p)}_{\mu_i\pmb{\mu n l L}} =$ `coeffs` for two densities
%% Cell type:code id: tags:
``` python
Cu_block.funcspecs
```
%%%% Output: execute_result
[BBasisFunctionSpecification(elements=[Cu,Cu], ns=[1], ls=[0], coeffs=[-0.947664,-0.20673]),
BBasisFunctionSpecification(elements=[Cu,Cu], ns=[2], ls=[0], coeffs=[0.127744,0.0378252]),
BBasisFunctionSpecification(elements=[Cu,Cu], ns=[3], ls=[0], coeffs=[0.395046,0.128133]),
BBasisFunctionSpecification(elements=[Cu,Cu], ns=[4], ls=[0], coeffs=[0.490821,0.17195]),
BBasisFunctionSpecification(elements=[Cu,Cu], ns=[5], ls=[0], coeffs=[0.192457,0.100866]),
BBasisFunctionSpecification(elements=[Cu,Cu], ns=[6], ls=[0], coeffs=[-0.381239,-0.0435031]),
BBasisFunctionSpecification(elements=[Cu,Cu], ns=[7], ls=[0], coeffs=[-0.717072,-0.124286]),
BBasisFunctionSpecification(elements=[Cu,Cu,Cu], ns=[1,1], ls=[0,0], coeffs=[0.176241,0.135483]),
BBasisFunctionSpecification(elements=[Cu,Cu,Cu], ns=[1,1], ls=[1,1], coeffs=[0.0335965,0.00542081]),
BBasisFunctionSpecification(elements=[Cu,Cu,Cu], ns=[1,1], ls=[2,2], coeffs=[0.361527,0.101027]),
BBasisFunctionSpecification(elements=[Cu,Cu,Cu], ns=[2,1], ls=[0,0], coeffs=[0.953892,0.247374]),
BBasisFunctionSpecification(elements=[Cu,Cu,Cu], ns=[2,1], ls=[1,1], coeffs=[-0.000366411,-0.000581538]),
BBasisFunctionSpecification(elements=[Cu,Cu,Cu], ns=[2,1], ls=[2,2], coeffs=[0.0654394,0.0146316]),
BBasisFunctionSpecification(elements=[Cu,Cu,Cu], ns=[2,2], ls=[0,0], coeffs=[0.142427,0.0350483]),
BBasisFunctionSpecification(elements=[Cu,Cu,Cu], ns=[2,2], ls=[1,1], coeffs=[0.00472481,6.57905e-05]),
BBasisFunctionSpecification(elements=[Cu,Cu,Cu], ns=[2,2], ls=[2,2], coeffs=[0.00853967,-0.000426002]),
BBasisFunctionSpecification(elements=[Cu,Cu,Cu,Cu], ns=[1,1,1], ls=[0,0,0], LS=[0], coeffs=[-0.0106058,-0.0227657]),
BBasisFunctionSpecification(elements=[Cu,Cu,Cu,Cu], ns=[1,1,1], ls=[1,1,0], LS=[0], coeffs=[0.309698,0.0822887])]
%% Cell type:markdown id: tags:
# Test fitted potential
%% Cell type:code id: tags:
``` python
test_pr = Project("test_ace_potential")
```
%% Cell type:code id: tags:
``` python
test_pr.remove_jobs_silently()
```
%% Cell type:code id: tags:
``` python
test_pr.job_table()
```
%%%% Output: execute_result
Empty DataFrame
Columns: []
Index: []
%% Cell type:code id: tags:
``` python
cu_ace_potential = job.get_lammps_potential()
```
%% Cell type:code id: tags:
``` python
cu_ace_potential
```
%%%% Output: execute_result
Config \
0 [pair_style pace\n, pair_coeff * * /home/yury/pyiron/projects/pyiron-2021/pacemaker_fit/df1_cut5_pyace_hdf5/df1_cut5_pyace/df1_cut5_pyace.ace Cu\n]
Filename Model Name Species
0 ACE df1_cut5_pyace [Cu]
%% Cell type:markdown id: tags:
## Optimization
%% Cell type:code id: tags:
``` python
lammps_job = test_pr.create.job.Lammps("opt_lammps", delete_existing_job=True)
```
%% Cell type:code id: tags:
``` python
# lammps_job.executable.version="2020.12.24_pace"
```
%% Cell type:code id: tags:
``` python
lammps_job.potential = cu_ace_potential
```
%% Cell type:code id: tags:
``` python
lammps_job.structure = test_pr.create.structure.ase_bulk("Cu","fcc",cubic=True)
```
%% Cell type:code id: tags:
``` python
lammps_job.calc_minimize(pressure=0.0)
```
%% Cell type:code id: tags:
``` python
lammps_job.run()
```
%%%% Output: stream
The job opt_lammps was saved and received the ID: 257
%% Cell type:code id: tags:
``` python
test_pr.job_table()
```
%%%% Output: execute_result
id status chemicalformula job subjob \
0 257 finished Cu4 opt_lammps /opt_lammps
projectpath project \
0 /home/yury/pyiron/projects/ pyiron-2021/test_ace_potential/
timestart timestop totalcputime \
0 2021-02-28 01:20:51.247184 2021-02-28 01:20:52.024520 0.0
computer hamilton hamversion parentid masterid
0 pyiron@dell-inspiron#1 Lammps 0.1 None None
%% Cell type:markdown id: tags:
## Elastic matrix
%% Cell type:code id: tags:
``` python
elmat_job = test_pr.create.job.ElasticMatrixJob("elmat", delete_existing_job=True)
```
%% Cell type:code id: tags:
``` python
elmat_job.input["eps_range"] = 0.05
```
%% Cell type:code id: tags:
``` python
ref_job = test_pr.create.job.Lammps("ref_job", delete_existing_job=True)
```
%% Cell type:code id: tags:
``` python
# ref_job.executable.version="2020.12.24_pace"
```
%% Cell type:code id: tags:
``` python
ref_job.potential = cu_ace_potential
```
%% Cell type:code id: tags:
``` python
ref_job.structure = lammps_job.get_structure()
```
%% Cell type:code id: tags:
``` python
elmat_job.ref_job = ref_job
```
%% Cell type:code id: tags:
``` python
elmat_job.run()
```
%%%% Output: stream
The job elmat was saved and received the ID: 258
The job s_e_0 was saved and received the ID: 259
The job s_01_e_m0_05000 was saved and received the ID: 260
The job s_01_e_m0_02500 was saved and received the ID: 261
The job s_01_e_0_02500 was saved and received the ID: 262
The job s_01_e_0_05000 was saved and received the ID: 263
The job s_08_e_m0_05000 was saved and received the ID: 264
The job s_08_e_m0_02500 was saved and received the ID: 265
The job s_08_e_0_02500 was saved and received the ID: 266
The job s_08_e_0_05000 was saved and received the ID: 267
The job s_23_e_m0_05000 was saved and received the ID: 268
The job s_23_e_m0_02500 was saved and received the ID: 269
The job s_23_e_0_02500 was saved and received the ID: 270
The job s_23_e_0_05000 was saved and received the ID: 271
%% Cell type:code id: tags:
``` python
elast_val=elmat_job["output/elasticmatrix"]
```
%% Cell type:code id: tags:
``` python
ses = np.array(elast_val['strain_energy'])
for se in ses:
plt.plot(se[:,0], se[:,1])
plt.xlabel("Strain")
plt.ylabel("E, eV")
```
%%%% Output: execute_result
Text(0, 0.5, 'E, eV')
%%%% Output: display_data
%% Cell type:code id: tags:
``` python
C=elast_val["C"]
```
%% Cell type:code id: tags:
``` python
print("C11 = {:.1f} GPa".format(C[0,0]))
print("C12 = {:.1f} GPa".format(C[0,1]))
print("C44 = {:.1f} GPa".format(C[3,3]))
```
%%%% Output: stream
C11 = 210.4 GPa
C12 = 120.1 GPa
C44 = 83.0 GPa
%% Cell type:markdown id: tags:
## Phonons
%% Cell type:code id: tags:
``` python
phon_job = test_pr.create.job.PhonopyJob("phon_job", delete_existing_job=True)
ref_job = test_pr.create.job.Lammps("ref_job", delete_existing_job=True)
# ref_job.executable.version="2020.12.24_pace"
ref_job.potential = cu_ace_potential
ref_job.structure = lammps_job.get_structure()
phon_job.input['dos_mesh']=75
phon_job.ref_job = ref_job
phon_job.run()
```
%%%% Output: stream
The job phon_job was saved and received the ID: 272
The job ref_job_0 was saved and received the ID: 273
%% Cell type:markdown id: tags:
# Comparison with some DFT reference data
%% Cell type:code id: tags:
``` python
from structdbrest import StructDBLightRester
```
%% Cell type:code id: tags:
``` python
rest = StructDBLightRester(token="workshop2021")
```
%% Cell type:code id: tags:
``` python
fhi_calc = rest.query_calculator_types("FHI%aims%")[0]
```
%%%% Output: stream
Querying...done
Response successful, size = 7.20 kB, time = 0.27 s
1 entries received
%% Cell type:code id: tags:
``` python
dft_elast_prop = rest.query_properties(rest.PropertyTypes.ELASTIC_MATRIX, composition="Cu-%", strukturbericht="A1",
calculator_name=fhi_calc.NAME)[0]
```
%%%% Output: stream
Querying...done
Response successful, size = 37.78 kB, time = 0.45 s
1 entries received
%% Cell type:code id: tags:
``` python
C_dft=dft_elast_prop.value["C"]
```
%% Cell type:code id: tags:
``` python
print("C11 = {:.1f} GPa / DFT C11={:.1f} GPa".format(C[0,0], C_dft[0][0]))
print("C12 = {:.1f} GPa / DFT C11={:.1f} GPa".format(C[0,1], C_dft[0][1]))
print("C44 = {:.1f} GPa / DFT C11={:.1f} GPa".format(C[3,3], C_dft[3][3]))
```
%%%% Output: stream
C11 = 210.4 GPa / DFT C11=176.9 GPa
C12 = 120.1 GPa / DFT C11=131.7 GPa
C44 = 83.0 GPa / DFT C11=82.5 GPa
%% Cell type:code id: tags:
``` python
dft_phon_prop = rest.query_properties(rest.PropertyTypes.PHONONS, composition="Cu-%", strukturbericht="A1",
calculator_name=fhi_calc.NAME)[0]
```
%%%% Output: stream
Querying...done
Response successful, size = 22.50 kB, time = 0.24 s
1 entries received
%% Cell type:code id: tags:
``` python
plt.plot(phon_job["output/dos_energies"], phon_job["output/dos_total"]/4, label="ACE")
plt.plot(dft_phon_prop.value["dos_energies"], dft_phon_prop.value["dos_total"], label="DFT")
plt.legend()
```
%%%% Output: execute_result
<matplotlib.legend.Legend at 0x7f3664444f10>
%%%% Output: display_data
%% Cell type:code id: tags:
``` python
```
......
......@@ -145,7 +145,7 @@
}
],
"source": [
"data_pr = Project(\"import_database\")\n",
"data_pr = Project(\"../../datasets\")\n",
"if len(data_pr.job_table()) == 0:\n",
" data_pr.unpack(\"Cu_training_archive\")\n",
"data_pr.job_table()"
......
%% Cell type:code id:political-robinson tags:
``` python
import pandas as pd
import numpy as np
from pyiron import Project, ase_to_pyiron
from pyiron_contrib.atomistic.atomicrex.atomicrex_job import Atomicrex
```
%% Cell type:code id:tracked-postcard tags:
``` python
data_pr = Project("import_database")
data_pr = Project("../../datasets")
if len(data_pr.job_table()) == 0:
data_pr.unpack("Cu_training_archive")
data_pr.job_table()
```
%%%% Output: execute_result
id status chemicalformula job \
0 619 finished None df1_A1_A2_A3_EV_elast_phon
1 620 finished None df3_10k
2 621 finished None df2_1k
subjob projectpath \
0 /df1_A1_A2_A3_EV_elast_phon /home/niklas/pyiron/projects/
1 /df3_10k /home/niklas/pyiron/projects/
2 /df2_1k /home/niklas/pyiron/projects/
project timestart timestop \
0 import_database/Cu_database/ 2021-02-08 10:33:52.341472 None
1 import_database/Cu_database/ 2021-02-08 10:33:53.993230 None
2 import_database/Cu_database/ 2021-02-08 10:33:54.435308 None
totalcputime computer hamilton hamversion parentid masterid
0 None zora@cmti001#1 GenericJob 0.4 None None
1 None zora@cmti001#1 GenericJob 0.4 None None
2 None zora@cmti001#1 GenericJob 0.4 None None
%% Cell type:code id:authentic-substance tags:
``` python
data_job = data_pr.load("df1_A1_A2_A3_EV_elast_phon")
df = data_job.to_pandas()
```
%% Cell type:code id:vertical-simon tags:
``` python
pr = Project("WorkshopPotential")
```
%%%% Output: stream
Are you sure you want to delete all jobs from 'WorkshopPotential'? y/(n) y
%% Cell type:code id:finite-planner tags:
``` python
job = pr.create_job(Atomicrex, "PotentialDF1")
```
%% Cell type:markdown id:fiscal-advocate tags:
### Add the structures that should be fitted.
It is possible to assign different weights to certain structures or properties, depending on what should be investigated using the potential. Here every structure has the same weight, but the force vector with N*3 values is normalized to have the same total weight as the single value energy. Therefore it is divided by the number of atoms.
%% Cell type:code id:funded-offense tags:
``` python
for id, row in df.iterrows():
struct = ase_to_pyiron(row.atoms)
s = job.structures.add_structure(struct, f"id{id}", relative_weight=1)
s.fit_properties.add_FitProperty("atomic-energy", target_value=row.energy/row.number_of_atoms, relative_weight=1)
s.fit_properties.add_FitProperty("atomic-forces", target_value=row.forces, relative_weight=1/row.number_of_atoms)
```
%% Cell type:markdown id:simplified-withdrawal tags:
### Define the type of potential and necessary functions.
In this case an eam potential is fitted.
%% Cell type:code id:worth-electricity tags:
``` python
job.potential = job.factories.potentials.eam_potential()
```
%% Cell type:markdown id:earlier-arrow tags:
It is necessary to define a pair potential, an electronic density function and an embedding function.
For all of those it is possible to choose between different functional forms.
Classic pair potentials are physically motivated and have a very limited number of paramaters that are derived from a experimentally measured quantity.
Splines or polynomials offer more flexibility, but are not directly physically motivated and can lead to unphysical oscillations or overfitting.
In this case a generalized morse function is used for the pair interaction, while the electronic density and embedding function will be splines. Depending on the properties that are calculated other functional forms could give better results.
The parameters in D0=3.5 and r0=1.8 are the approximate cohesive energy and the equilibrium lattice constant. Beta and S can also be derived from physical quantities but are chosen randomly in a typical range in this case. Delta is a parameter that shifts the whole function up or down. The initial parameter choices should not matter too much as long as they are somewhat reasonable since they will be optimized in the fitting process anyway.
%% Cell type:code id:grave-settlement tags:
``` python
V = job.factories.functions.morse_B(identifier="V_CuCu", D0=3.5, r0=1.8, beta=2, S=2, delta=0)
```
%% Cell type:code id:sharp-photographer tags:
``` python
V.parameters.D0.min_val = 0
V.parameters.D0.max_val = 5
V.parameters.r0.min_val = 1
V.parameters.r0.max_val = 2.5
V.parameters.delta.min_val = -1
V.parameters.delta.max_val = 1
V.parameters.beta.min_val = 0.1
V.parameters.beta.max_val = 10
```
%% Cell type:markdown id:rough-purchase tags:
Additionally a screening function needs to be defined for the morse potential
%% Cell type:code id:practical-details tags:
``` python
V.screening = job.factories.functions.exp_A_screening(identifier="V_cutoff", cutoff=7)
```
%% Cell type:markdown id:external-ready tags:
The electron density is chosen to be a spline function. The cutoff has to be defined. Derivatives left and right are optional, they default to 0. For the right cutoff this is fine, since the forces should smoothly go to 0. For the left this is not necessarily the best choice, since the function value should increase at very close distances.
%% Cell type:code id:departmental-dynamics tags:
``` python
rho = job.factories.functions.spline(identifier="rho_CuCu", cutoff=7, derivative_left=-1)
```
%% Cell type:markdown id:latter-wright tags:
For a spline function it is necessary to define node points. They can be equally spaced or sampled with higher density around turning points, f.e. the first neighbor distance.
Too few nodepoints mean low flexibilty, too many lead to overfitting. This requires some testing to find an optimal choice.
%% Cell type:code id:double-engineering tags:
``` python
rho_nodes = np.linspace(0.5, 7.0, 7).round(2)
rho_nodes
```
%%%% Output: execute_result
array([0.5 , 1.58, 2.67, 3.75, 4.83, 5.92, 7. ])
%% Cell type:markdown id:handled-housing tags:
The nodes need initial values. The electron density should be proportional to $e^{-r}$, so this function is chosen to calculate them.
%% Cell type:code id:developing-apache tags:
``` python
decaying_exp = lambda r: np.exp(-r)
rho_initial = decaying_exp(rho_nodes)
```
%% Cell type:markdown id:separated-journal tags:
Additionally it is a good idea to define limits for the node points. This is optional for local minimizers, but the fit can quickly run away without limits. Global optimizers typically require them to constrain the sampled space.
A density can't be negative so the lower limit is set to 0. The upper limit is chosen to be 3 times the initial values. These choices aswell as the choice for $e^{-r}$ as initial values are somewhat arbitrary, but don't matter much. The electron density from single atoms does not directly influence the calculated energies and forces, instead the summed up density at some place is used in the embedding function, so the final numerical values are an interplay between electron density and embedding function. Since the latter will also be a spline function it can only be defined for a certain range of rho values as node points. Therefore it is better to limit the range of electron density values and define larger limits for the embedding function instead.
%% Cell type:code id:colored-discount tags:
``` python
rho_mins = np.zeros((len(rho_nodes)))
rho_maxs = 3*rho_initial.round(6)
rho.parameters.create_from_arrays(rho_nodes, rho_initial, min_vals=rho_mins, max_vals=rho_maxs)
```
%% Cell type:raw id:preliminary-peace tags:
Finally the last node point at the cutoff range is set to 0 and fitting is disabled to prevent a discontinuous change of energy at the cutoff.
%% Cell type:code id:lasting-reply tags:
``` python
rho.parameters["node_7.0"].start_val = 0
rho.parameters["node_7.0"].enabled = False
```
%% Cell type:markdown id:saving-vegetable tags:
$-\sqrt(\rho)$ can be used as initial guess for the embedding energy, which is taken from second moment approximation tight binding.
The node points have to be chosen in a range compatible to the electron density. This can be estimated by calculating it for a densely packed structure.
Alternatively atomicrex writes the maximum electron density of all structures to the output. This can be used as a hint for the node points for consequent fits.
Everything else is similar to the electron density.
%% Cell type:code id:overall-flower tags:
``` python
F = job.factories.functions.spline(identifier="F_CuCu", cutoff=5)
F_nodes = np.linspace(0.0, 5.0, 7).round(2) #9 is worse 11 is worse 7 is best
F_init = -np.sqrt(F_nodes)
F_maxs = np.zeros(len(F_nodes))
F_mins = -np.ones(len(F_nodes))*5
F.parameters.create_from_arrays(F_nodes, F_init, F_mins, F_maxs)
F.parameters["node_0.0"].enabled=False
F.parameters["node_0.0"].start_val = 0
F.parameters["node_5.0"].enabled=False
F.parameters["node_5.0"].start_val = 0
```
%% Cell type:markdown id:neutral-gateway tags:
The functions have to be assigned to the potential
%% Cell type:code id:major-capacity tags:
``` python
job.potential.pair_interactions[V.identifier] = V
job.potential.electron_densities[rho.identifier] = rho
job.potential.embedding_energies[F.identifier] = F
```
%% Cell type:markdown id:unusual-retirement tags:
### Define fitting procedure
Finally a few parameters need to be set that influence the fitting process.
The minimization can be done with different algorithms. Atomicrex itself implements the BFGS algorithm. Additionally the algorithms from the nlopt library can be used.
%% Cell type:code id:productive-spare tags:
``` python
## Define the atom types of the potential
job.input.atom_types.Cu = None
##
job.input.fit_algorithm = job.factories.algorithms.ar_lbfgs(max_iter=500)
job.run()
job.output
```
%%%% Output: stream
The job PotentialDF1 was saved and received the ID: 622
%%%% Output: execute_result
Output({'error': None, 'residual': 0.162222, 'iterations': 19588})
%% Cell type:markdown id:abandoned-reform tags:
### Same for the 1000 structures dataset
The final parameters of the 100 structure fit can be used for the 1000 Structure fit. This speeds up the fitting process and often leads to better results.
In general it is a good idea to start with few structures and try around with different functions and initial parameters. This is much faster than using all structures from the beginning and gives good guesses for the initial values of the parameters. It also allows to use global optimization with millions of steps in short time spans.
### This can take long and writes 1000 seperate POSCAR files, TAKE CARE
%% Cell type:code id:preceding-march tags:
``` python
j = pr.create_job(Atomicrex, "PotentialDF2")
j.potential = job.potential.copy()
## Use the final parameters as starting values for the new fit
j.potential.copy_final_to_initial_params()
j.input = job.input.copy()
```
%% Cell type:code id:ancient-september tags:
``` python
df = data_pr.load("df2_1k").to_pandas()
for id, row in df.iterrows():
struct = ase_to_pyiron(row.atoms)
s = j.structures.add_structure(struct, f"id{id}", relative_weight=1)
s.fit_properties.add_FitProperty("atomic-energy", target_value=row.energy/row.number_of_atoms, relative_weight=1)
s.fit_properties.add_FitProperty("atomic-forces", target_value=row.forces, relative_weight=1/row.number_of_atoms)
```
%% Cell type:code id:correct-israeli tags:
``` python
import time
```
%% Cell type:code id:multiple-harbor tags:
``` python
t1 = time.time()
j.run()
t2 = time.time()
```
%%%% Output: stream
The job PotentialDF2 was saved and received the ID: 623
%% Cell type:code id:eligible-soviet tags:
``` python
t2-t1
```
%%%% Output: execute_result
538.0122790336609
%% Cell type:code id:subsequent-diploma tags:
``` python
j.output
```
%%%% Output: execute_result
Output({'error': None, 'residual': 117.878, 'iterations': 3619})
%% Cell type:markdown id:collaborative-charleston tags:
This is the result if the initilly guessed values are taken instead of the fitted ones.
%% Cell type:code id:dutch-module tags:
``` python
j2 = pr.create_job(Atomicrex, "PotentialDF2_BadStartParams", delete_existing_job=True)
j2.potential = job.potential.copy()
j2.input = j.input.copy()
j2.structures = j.structures
j2.run()
```
%%%% Output: stream
The job PotentialDF2_BadStartParams was saved and received the ID: 624
%% Cell type:code id:innocent-pasta tags:
``` python
j2.output
```
%%%% Output: execute_result
Output({'error': None, 'residual': 8269.14, 'iterations': 607})
%% Cell type:code id:handed-tribute tags:
``` python
```
......
......@@ -18,23 +18,13 @@
"pr = Project(\"runner_fit\")"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"!cp ../../datasets/Cu_training_archive.tar.gz .\n",
"!cp ../../datasets/export.csv ."
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"pr_fit = Project(\"import_database\")\n",
"pr_fit = Project(\"../../datasets\")\n",
"if len(pr_fit.job_table()) == 0:\n",
" pr_fit.unpack(\"Cu_training_archive\")"
]
......
%% Cell type:code id: tags:
``` python
from pyiron import Project
```
%% Cell type:code id: tags:
``` python
pr = Project("runner_fit")
```
%% Cell type:code id: tags:
``` python
!cp ../../datasets/Cu_training_archive.tar.gz .
!cp ../../datasets/export.csv .
```
%% Cell type:code id: tags:
``` python
pr_fit = Project("import_database")
pr_fit = Project("../../datasets")
if len(pr_fit.job_table()) == 0:
pr_fit.unpack("Cu_training_archive")
```
%% Cell type:code id: tags:
``` python
pr_fit.job_table()
```
%%%% Output: execute_result
id status chemicalformula job \
1 13791972 finished None df1_A1_A2_A3_EV_elast_phon
0 13791973 finished None df3_10k
2 13791974 finished None df2_1k
subjob projectpath \
1 /df1_A1_A2_A3_EV_elast_phon /cmmc/u/
0 /df3_10k /cmmc/u/
2 /df2_1k /cmmc/u/
project \
1 zora/pyiron/projects/PotentialWorkshop/pyiron_potentialfit/day_2/runner/import_database/Cu_database/
0 zora/pyiron/projects/PotentialWorkshop/pyiron_potentialfit/day_2/runner/import_database/Cu_database/
2 zora/pyiron/projects/PotentialWorkshop/pyiron_potentialfit/day_2/runner/import_database/Cu_database/
timestart timestop totalcputime computer \
1 2021-02-08 10:33:52.341472 None None zora@cmti001#1
0 2021-02-08 10:33:53.993230 None None zora@cmti001#1
2 2021-02-08 10:33:54.435308 None None zora@cmti001#1
hamilton hamversion parentid masterid
1 TrainingContainer 0.4 None None
0 TrainingContainer 0.4 None None
2 TrainingContainer 0.4 None None
%% Cell type:code id: tags:
``` python
import pyiron_contrib
```
%% Cell type:code id: tags:
``` python
j = pr.create.job.RunnerFit('fit', delete_existing_job=True)
```
%% Cell type:code id: tags:
``` python
j.add_job_to_fitting(pr_fit.load('df1_A1_A2_A3_EV_elast_phon'))
```
%% Cell type:code id: tags:
``` python
j.run()
```
%%%% Output: stream
The job fit was saved and received the ID: 13791989
%% Cell type:code id: tags:
``` python
j.lammps_potential
```
%%%% Output: execute_result
Name \
0 RuNNer-Cu
Filename \
0 [/cmmc/u/zora/pyiron/projects/PotentialWorkshop/pyiron_potentialfit/day_2/runner/runner_fit/fit_hdf5/fit/input.nn, /cmmc/u/zora/pyiron/projects/PotentialWorkshop/pyiron_potentialfit/day_2/runner/r...
Model Species \
0 RuNNer [Cu]
Config
0 [pair_style nnp dir "./" showew no showewsum 0 resetew no maxew 100 cflength 1.8897261328 cfenergy 0.0367493254 emap "1:Cu"\n, pair_coeff * * 12\n]
%% Cell type:code id: tags:
``` python
pr.job_table()
```
%%%% Output: execute_result
id status chemicalformula job subjob projectpath \
0 13791989 finished None fit /fit /cmmc/u/
project \
0 zora/pyiron/projects/PotentialWorkshop/pyiron_potentialfit/day_2/runner/runner_fit/
timestart timestop totalcputime \
0 2021-03-04 19:20:48.233697 2021-03-04 19:21:07.665989 19.0
computer hamilton hamversion parentid masterid
0 zora@cmti001#1 RunnerFit 0.4 None None
......
......@@ -20,8 +20,7 @@
"metadata": {},
"outputs": [],
"source": [
"from pyiron import Project\n",
"import pyiron_gpl"
"from pyiron import Project"
]
},
{
......@@ -1968,7 +1967,7 @@
"metadata": {},
"outputs": [],
"source": [
"pr_import = Project(\"import_database\")\n",
"pr_import = Project(\"../datasets\")\n",
"if len(pr_import.job_table()) == 0:\n",
" pr_import.unpack(\"Cu_training_archive\")"
]
......
%% Cell type:code id:decreased-jumping tags:
``` python
import numpy as np
%matplotlib inline
import matplotlib.pylab as plt
import pandas as pd
```
%% Cell type:code id:monetary-county tags:
``` python
from pyiron import Project
import pyiron_gpl
```
%% Cell type:code id:based-croatia tags:
``` python
pr = Project("validation")
#pr.remove_jobs(recursive=True)
```
%% Cell type:code id:deluxe-fight tags:
``` python
# The list of potentials to iterate over
potential_list = ['2012--Mendelev-M-I--Cu--LAMMPS--ipr1', '2004--Zhou-X-W--Cu-Ag-Au--LAMMPS--ipr2', '1985--Foiles-S-M--Ni-Cu--LAMMPS--ipr1']
```
%% Cell type:code id:atmospheric-harbor tags:
``` python
# Do Murnaghan, ElasticMatrix job, vac formation energy, binding energy, surface energies, comparison with dataset forces, energies
```
%% Cell type:code id:seasonal-truck tags:
``` python
def clean_project_name(name):
return name.replace("-", "_")
```
%% Cell type:code id:elect-optics tags:
``` python
%%time
for pot in potential_list:
group_name = clean_project_name(pot)
pr_pot = pr.create_group(pot)
job_ref = pr_pot.create_job(pr_pot.job_type.Lammps, "ref_job")
job_ref.structure = pr_pot.create_ase_bulk("Cu")
job_ref.potential = pot
job_ref.calc_minimize()
murn_job = job_ref.create_job(pr_pot.job_type.Murnaghan, "murn_job")
murn_job.run()
murn_job.plot()
```
%%%% Output: stream
The job murn_job was saved and received the ID: 3880
The job strain_0_9 was saved and received the ID: 3881
The job strain_0_92 was saved and received the ID: 3882
The job strain_0_94 was saved and received the ID: 3883
The job strain_0_96 was saved and received the ID: 3884
The job strain_0_98 was saved and received the ID: 3885
The job strain_1_0 was saved and received the ID: 3886
The job strain_1_02 was saved and received the ID: 3887
The job strain_1_04 was saved and received the ID: 3888
The job strain_1_06 was saved and received the ID: 3889
The job strain_1_08 was saved and received the ID: 3890
The job strain_1_1 was saved and received the ID: 3891
job_id: 3881 finished
job_id: 3882 finished
job_id: 3883 finished
job_id: 3884 finished
job_id: 3885 finished
job_id: 3886 finished
job_id: 3887 finished
job_id: 3888 finished
job_id: 3889 finished
job_id: 3890 finished
job_id: 3891 finished
%%%% Output: display_data