From a258536610caa5426aee3d237f198e8a0e3440fe Mon Sep 17 00:00:00 2001 From: Pilar Cossio <pilar.cossio@biophys.mpg.de> Date: Wed, 20 Dec 2017 21:01:54 +0100 Subject: [PATCH] Corrections Manual; Prior Norm and bug in projection --- bioem.cpp | 8 ++-- doc/index.rst | 104 ++++++++++++++++++++++++++------------------------ param.cpp | 4 +- 3 files changed, 61 insertions(+), 55 deletions(-) diff --git a/bioem.cpp b/bioem.cpp index 1cf3cd0..b3f0cdc 100644 --- a/bioem.cpp +++ b/bioem.cpp @@ -1696,9 +1696,10 @@ int bioem::createProjection(int iMap, mycomplex_t *mapFFT) irad = int(Model.points[n].radius / param.pixelSize) + 1; rad2 = Model.points[n].radius * Model.points[n].radius; - if (i < 0 || j < 0 || i >= param.param_device.NumberPixels || - j >= param.param_device.NumberPixels) + if (i < irad || j < irad || i >= param.param_device.NumberPixels-irad || + j >= param.param_device.NumberPixels-irad ) { + if (DebugOutput >= 0) cout << "WARNING::: Model Point out of Projection map: " << i << ", " << j << "\n"; @@ -1717,7 +1718,7 @@ int bioem::createProjection(int iMap, mycomplex_t *mapFFT) // continue; if (not param.ignorepointsout) exit(1); - } + }else{ // Projecting over the radius for (int ii = i - irad; ii < i + irad + 1; ii++) @@ -1736,6 +1737,7 @@ int bioem::createProjection(int iMap, mycomplex_t *mapFFT) sqrt(rad2 - dist) * Model.points[n].density * 3 / (4 * M_PI * Model.points[n].radius * rad2); } + } } } } diff --git a/doc/index.rst b/doc/index.rst index de745d1..3ac8ede 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -26,15 +26,15 @@ Welcome to BioEM's documentation! :Authors: - P.Cossio, D. Rohr, V. Lindestruth, G. Hummer + P.Cossio, D. Rohr, Fabio Baruffa, Luka Stanisic, Markus Rampp, V. Lindestruth, G. Hummer :Organization: - **Max Planck Institute of Biophysics** + *Max Planck Institute of Biophysics* - **Frankfurt Institute for Advanced Studies** + *Frankfurt Institute for Advanced Studies* - **Max Planck Computing and Data Facility** + *Max Planck Computing and Data Facility* :Date: @@ -83,8 +83,8 @@ the instantaneous configuration of the biomolecule, and, in principle, each particle can be in a different conformational state. However, analyzing the images individually is challenging because the signal-to-noise level is very low. This has so far limited EM to study a -small subset of static biomolecules, because, for it to be effective, it -requires most particles to be in the same conformation. +subset of static biomolecules because the reconstruction of high-resolution +density maps requires most particles to be in the same conformation. Here, we present a computing tool to harness the single-molecule character of EM for studying dynamic biomolecules. With our method, we @@ -98,7 +98,7 @@ in the image formation, such as molecular orientation and interference effects. The BioEM software computes this integral via numerical grid sampling over a portable CPU/GPU computing platform. By comparing the BioEM posterior probabilities it is possible to discriminate and rank -structural models, allowing to characterize the variability and dynamics +structural models, allowing to characterize the dynamics of the biological system. In this chapter, we briefly describe the mathematical background of the @@ -201,7 +201,7 @@ Mandatory preinstalled libraries in BioEM, to calculate the convolution of the ideal image with the PSF, and the cross-correlation of the calculated image to the experimental image. FFTW can be downloaded from the webpage - www.fftw.org. + https://fftw.org/. Optional preinstalled programs ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -212,13 +212,13 @@ and optimal performance, are described below: - *CMake (minimal version 2.6):* is a cross-platform software for managing the build process of software using a compiler-independent method (*i.e.*, creating a Makefile). CMake can be downloaded from - www.cmake.org. + https://cmake.org/. - *CUDA (minimal version 5.5):* is a parallel computing platform implemented by the graphics processing units (GPUs) that NVIDIA produce. Thus, NVIDIA graphics cards are necessary for running BioEM with the CUDA implementation. For more information see - www.nvidia.com. + https://nvidia.com/. - *MPI:* Message Passing Interface is a standardized and portable message-passing system designed to function on a wide variety of @@ -310,7 +310,7 @@ ON/OFF options are presented. .. table:: CMake keyword options. +-----------------------------+---------------------------------------------------------+ - | **Keyword** | **Option** | + | **<optionname>** | **Option** | +=============================+=========================================================+ | ``USE_OPENMP`` | Enable/Disable OpenMP | +-----------------------------+---------------------------------------------------------+ @@ -368,7 +368,7 @@ For a simple test, run the BioEM executable ./bioEM If the code runs successfully, the output on the terminal screen -should be as that shown in :numref:`Listing %s<cmdline>`. +should be as shown in :numref:`Listing %s<cmdline>`. .. .. _tabletest: .. code-block:: none @@ -535,6 +535,8 @@ the logarithm of the posterior probability of the model to each individual experimental image and the parameter set that gives a maximum of the posterior (see section :ref:`anaout` for its format). +.. _inparam: + Input of Physical Parameters ---------------------------- @@ -546,8 +548,6 @@ of the algorithm, such as the integration ranges and grid points, and are passed using specific keywords in the this file (see also section :ref:`infile`). -.. _inparam: - Micrograph parameters ~~~~~~~~~~~~~~~~~~~~~ @@ -654,8 +654,8 @@ quaternions: - *Non-uniform sampling of orientations from a file:* We note that with the option of reading the orientations from a file (section - :ref:`ortfile`) the user has great freedom to sample, also non-uniformly, - the orientation space. + :ref:`ortfile`) the user has great freedom to sample also non-uniformly + the orientation space (for example around a given orientation, see :ref:`modcom`). Integration of the PSF parameters ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -679,7 +679,7 @@ The envelope function is .. math:: \mathrm{Env}(s)=e^{-bs^2/2}, where parameter :math:`b` controls the Gaussian width and modulates the -CTF. +. To calculate the BioEM posterior probability, we integrate numerically the three parameters :math:`\Delta f`, :math:`b` and :math:`A`. To do @@ -760,7 +760,7 @@ Priors the high-frequency components in Fourier space, the code utilizes a Gaussian prior on the :math:`b` envelope parameter - .. math:: p(b)=\frac{1}{\sqrt{2\pi}\sigma_b}e^{-b^2/2\sigma_b^2}, + .. math:: p(b)=\frac{1}{2\sqrt{2\pi}\sigma_b}e^{-b^2/2\sigma_b^2}, where :math:`\sigma_b` is the Gaussian width. By default the Gaussian prior is centered at zero, and :math:`\sigma_b=100\AA`, to @@ -829,7 +829,7 @@ orientations. The keyword in parameter file is With this feature there is an additional output file :outpar:`ANG_PROB` where ``(int)`` orientations with highest posterior -are written. The orientations in this file are sorted in decreasing +are written. The orientations in this file are sorted in decreasing log-posterior order. File Formats @@ -845,7 +845,7 @@ There are two types of model file formats that are read by BioEM: - *Text file:* A simple text file with format “%f %f %f %f %f”. The first three columns are the coordinates of the sphere centers in :math:`\AA`, the fourth column is the radius in :math:`\AA`, and the - last column is the corresponding number of electrons. + last column is the corresponding number of electrons (which can be non-integer). (Format: ``x — y — z — radius — number electrons``). @@ -954,37 +954,17 @@ BioEM posterior probability computation: it is the number of pixels along an axis. This should coincide with the number of pixels read from the micrograph. -- :inpar:`GRIDPOINTS_ALPHA` ``(int)``: (Integration of Orientations) - Number of grid points used in the integration over Euler angle - :math:`\alpha \in [-\pi,\pi]`. Here a cubic grid in Euler angle - space is performed. The integral over Euler angle :math:`\gamma` is - identical to that of :math:`\alpha`. - -- :inpar:`GRIDPOINTS_BETA` ``(int)``: (Integration of Orientations) - Number of grid points used in the integration over - :math:`\cos(\beta) \in [-1,1]`. - -- :inpar:`USE_QUATERNIONS`: (Integration of Orientations) If using - quaternions to the describe the orientations. *Recommended* for - uniformly sampling of :math:`SO3` with the quaternions lists - available in the **Quaternions** directory. - -- :inpar:`GRIDPOINTS_QUATERNION` ``(int)``: (Integration of - Orientations) For a hypercubic grid quaternion sampling. Each - quaternion is within :math:`[-1,1]`. ``(int)`` is the number of - grid points per dimension. - -- :inpar:`CTF_DEFOCUS` ``(float) (float) (int)``: (CTF Integration) +- :inpar:`CTF_DEFOCUS` ``(float) (float) (int)``: (CTF integration) Grid sampling of CTF defocus, :math:`\Delta f`. Units of micro-meters. ``(float) (float)`` are the starting and ending limits, respectively, and ``(int)`` is the number of grid points. -- :inpar:`CTF_B_ENV` ``(float) (float) (int)``: (CTF Integration) +- :inpar:`CTF_B_ENV` ``(float) (float) (int)``: (CTF integration) Grid sampling of envelope parameter :math:`b`. Units of Å\ :math:`^2`. ``(float) (float)`` are the starting and ending limits, respectively, and ``(int)`` is the number of grid points. -- :inpar:`CTF_AMPLITUDE` ``(float) (float) (int)``: (CTF Integration) +- :inpar:`CTF_AMPLITUDE` ``(float) (float) (int)``: (CTF integration) Grid sampling of the CTF amplitude, :math:`A` (adimensional :math:`\in [0,1]`). ``(float) (float)`` are the starting and ending limits, respectively, and ``(int)`` is the number of grid points. @@ -997,6 +977,29 @@ BioEM posterior probability computation: Optional keywords: ^^^^^^^^^^^^^^^^^^ +- :inpar:`GRIDPOINTS_ALPHA` ``(int)``: (Integration of orientations, + mandatory if quaterionions or `--ReadOrientation` are not used) + Number of grid points used in the integration over Euler angle + :math:`\alpha \in [-\pi,\pi]`. Here a cubic grid in Euler angle + space is performed. The integral over Euler angle :math:`\gamma` is + identical to that of :math:`\alpha`. + +- :inpar:`GRIDPOINTS_BETA` ``(int)``: (Integration of orientations, + mandatory if quaterionions or `--ReadOrientation` are not used) + Number of grid points used in the integration over + :math:`\cos(\beta) \in [-1,1]`. + +- :inpar:`USE_QUATERNIONS`: (Integration of rientations) If using + quaternions to the describe the orientations. *Recommended* for + uniformly sampling of :math:`SO3` with the quaternions lists + available in the **Quaternions** directory. + +- :inpar:`GRIDPOINTS_QUATERNION` ``(int)``: (Integration of + Orientations) For a hypercubic grid quaternion sampling. Each + quaternion is within :math:`[-1,1]`. ``(int)`` is the number of + grid points per dimension. + + - :inpar:`ELECTRON_WAVELENGTH` ``(float)``: To change the default value of the electron wavelength ``(float)`` used to calculate the CTF phase with the defocus. Default 0.019688 :math:`\AA`. @@ -1135,9 +1138,9 @@ part is not the time-limiting step. **BioEM 2.0** has been optimized for both CPU and GPU performance according to two different scenarios: -- **Many orientations versus *many* experimental images** +- **Many orientations versus** *many* **experimental images** -- **Many orientations versus *few* experimental images** +- **Many orientations versus** *few* **experimental images** Because the optimal parallelization scheme changes depending on the previous conditions, we address each item separately. @@ -1359,7 +1362,7 @@ In this algorithm, the parallelization for GPU is now done on a lower level: the GPU (or OpenMP for the only CPU case) processes the center displacements, whilst the CPU with MPI processes the orientations and with OpenMP the projections and convolutions. Hence, there is more -parallelism and better performance for the GPU. +parallelism and better performance for the GPU for this case. Parallelization ~~~~~~~~~~~~~~~ @@ -1375,7 +1378,8 @@ We present the different parallelization options when using the :envvar:`BIOEM_PROJ_CONV_AT_ONCE` is by default equal to :envvar:`OMP_NUM_THREADS`. However, :envvar:`BIOEM_PROJ_CONV_AT_ONCE` can also be modified as described - above, and for this algorithm its contribution is important. These + above. Importantly, for :envvar:`BIOEM_ALGO`\ ``=2`` the contribution of + :envvar:`BIOEM_PROJ_CONV_AT_ONCE` is signifcant. These OMP threads are used to work in parallel on the projections, the convolutions, and if GPU is disabled on the center displacements and comparisons. @@ -1541,15 +1545,15 @@ List of environment variables .. envvar:: BIOEM_PROJ_CONV_AT_ONCE - (Default: 1) This defines the number of projections and + (Default: 1 for :envvar:`BIOEM_ALGO`\ ``=1`` and :envvar:`=OMP_NUM_THREADS` for + :envvar:`BIOEM_ALGO`\ ``=2``) This defines the number of projections and convolutions prepared at once. OpenMP threads (whose number is defined by :envvar:`OMP_NUM_THREADS` environment variable) are used to prepare these projections and convolutions in parallel. For :envvar:`BIOEM_ALGO`\ ``=1`` :envvar:`BIOEM_PROJ_CONV_AT_ONCE`\ ``=[x]`` is mostly relevant, if OpenMP is used, no GPU is used, and/or the number of reference particle-image is very small. For - :envvar:`BIOEM_ALGO`\ ``=2`` by default it is always selected and - equal to the number of OMP threads. + :envvar:`BIOEM_ALGO`\ ``=2`` its contribution is important. .. envvar:: BIOEM_DEBUG_BREAK diff --git a/param.cpp b/param.cpp index ec5d151..f1ba380 100644 --- a/param.cpp +++ b/param.cpp @@ -1695,7 +1695,7 @@ int bioem_param::CalculateRefCTF() exit(1); } - // ********** Calculating normalized volumen element ********* + // ********** Calculating normalized volume element ********* if (!printModel) { @@ -1709,7 +1709,7 @@ int bioem_param::CalculateRefCTF() ((2.f * (myfloat_t) param_device.maxDisplaceCenter + 1.)) / (2.f * (myfloat_t)(param_device.maxDisplaceCenter + 1.)) / (myfloat_t) numberGridPointsCTF_amp * gridEnvelop * gridCTF_phase / - M_PI / param_device.sigmaPriorbctf / param_device.sigmaPriordefo; + 4.f / M_PI / sqrt(2.f * M_PI) / param_device.sigmaPriorbctf / param_device.sigmaPriordefo / param_device.sigmaPrioramp; // cout << "VOLU " << param_device.volu << " " << gridCTF_amp << "\n"; // *** Number of total pixels*** -- GitLab