From cc655bd7d5ba1e36a4ba5db8b5992869c2923f8b Mon Sep 17 00:00:00 2001
From: Markus Scheidgen <markus.scheidgen@gmail.com>
Date: Wed, 8 Jan 2020 12:52:37 +0100
Subject: [PATCH] CLI command for generatic statistics tables.

---
 nomad/cli/client/statistics.py | 192 ++++++++++++++++++++++++++++++++-
 tests/bravado_flask.py         |   2 +-
 tests/test_cli.py              |  14 +++
 3 files changed, 206 insertions(+), 2 deletions(-)

diff --git a/nomad/cli/client/statistics.py b/nomad/cli/client/statistics.py
index 07615991cf..fdce1a1b85 100644
--- a/nomad/cli/client/statistics.py
+++ b/nomad/cli/client/statistics.py
@@ -243,7 +243,7 @@ def bar_plot(
 @click.option('--power', type=float, help='User power scale instead of log with the given inverse power.')
 @click.option('--open-access', is_flag=True, help='Only consider Open-Access data.')
 @click.option('--minimum', type=int, default=1, help='Only consider codes with at least the given ammount of entries.')
-def statistics(errors, title, x_axis, y_axis, cumulate, total, save, power, open_access, minimum):
+def statistics_plot(errors, title, x_axis, y_axis, cumulate, total, save, power, open_access, minimum):
     from .client import create_client
     client = create_client()
 
@@ -353,3 +353,193 @@ def statistics(errors, title, x_axis, y_axis, cumulate, total, save, power, open
             per_page=1, owner=owner, statistics=True,
             metrics=['total_energies', 'calculations', 'uploaders', 'authors', 'datasets']).response().result
         print(json.dumps(data.statistics['total'], indent=4))
+
+
+@client.command(help='Generate table with basic statistics summary.')
+@click.option('--html', is_flag=True, help='Output HTML instead of plain text table.')
+@click.option('--geometries', is_flag=True, help='Use geometries not unique geometries.')
+def statistics_table(html, geometries):
+    def get_statistic(response, quantity, value, metric):
+        quantity_data = response.statistics.get(quantity)
+        if quantity_data is None:
+            return 0
+        value_data = quantity_data.get(value)
+        if value_data is None:
+            return 0
+
+        value = value_data.get(metric)
+        return value if value is not None else 0
+
+    from nomad.cli.client import create_client
+    client = create_client()
+
+    geometry_metric = 'unique_geometries' if not geometries else 'geometries'
+
+    # search scc with system type
+    data_all = client.repo.search(
+        per_page=1, metrics=['calculations'], statistics=True).response().result
+
+    entries = get_statistic(data_all, 'total', 'all', 'code_runs')
+    calculations = get_statistic(data_all, 'total', 'all', 'calculations')
+    calculations_1d = get_statistic(data_all, 'system', '1D', 'calculations') \
+        + get_statistic(data_all, 'system', 'atom', 'calculations') \
+        + get_statistic(data_all, 'system', 'molecule / cluster', 'calculations')
+
+    calculations_2d = get_statistic(data_all, 'system', '2D / surface', 'calculations')
+    calculations_3d = get_statistic(data_all, 'system', 'bulk', 'calculations')
+
+    metrics_all = client.repo.search(per_page=1, metrics=[geometry_metric]).response().result
+    geometries = get_statistic(metrics_all, 'total', 'all', geometry_metric)
+    quantities = get_statistic(metrics_all, 'total', 'all', 'quantities')
+
+    # search calcs quantities=section_k_band
+    band_structures = get_statistic(
+        client.repo.search(per_page=1, quantities=['section_k_band']).response().result,
+        'total', 'all', 'code_runs')
+
+    # search calcs quantities=section_dos
+    dos = get_statistic(
+        client.repo.search(per_page=1, quantities=['section_dos']).response().result,
+        'total', 'all', 'code_runs')
+
+    phonons = get_statistic(
+        client.repo.search(per_page=1, code_name='Phonopy').response().result,
+        'total', 'all', 'code_runs')
+
+    if not html:
+        print('''
+            Entries: {:,},
+            Calculations, e.g. total energies: {:,},
+            Unique geometries: {:,},
+            Bulk crystals: {:,},
+            2D / Surfaces: {:,},
+            Atoms / Molecules: {:,},
+            DOS: {:,},
+            Band structures: {:,}
+            Total parsed quantities: {:,}
+        '''.format(
+            entries,
+            calculations,
+            geometries,
+            calculations_3d,
+            calculations_2d,
+            calculations_1d,
+            dos,
+            band_structures,
+            quantities
+        ))
+
+    else:
+        print('''
+            <div class="container">
+                <p>The <i>NOMAD Archive</i> stores in a code-independent format calculations performed
+                with all the most important and widely used electronic-structure and force-field codes.
+                </p>
+                <p>Summary statistics of the Archive content (last update in {}):</p>
+                <table class="table">
+                    <thead>
+                        <tr>
+                        <th scope="col">Metric</th>
+                        <th scope="col">Value</th>
+                        </tr>
+                    </thead>
+                    <tbody>
+                        <tr>
+                        <th scope="row">Entries, i.e. code runs</th>
+                        <td>{:,}</td>
+                        </tr>
+                        <tr>
+                        <th scope="row">Calculations, e.g. total energies</th>
+                        <td>{:,}</td>
+                        </tr>
+                        <tr>
+                        <th scope="row">Unique geometries</th>
+                        <td>{:,}</td>
+                        </tr>
+                        <tr>
+                        <th scope="row">Bulk Crystals</th>
+                        <td>{:,}</td>
+                        </tr>
+                        <tr>
+                        <th scope="row">Surfaces</th>
+                        <td>{:,}</td>
+                        </tr>
+                        <tr>
+                        <th scope="row">Molecules/Clusters</th>
+                        <td>{:,}</td>
+                        </tr>
+                        <tr>
+                        <th scope="row">DOS</th>
+                        <td>{:,}</td>
+                        </tr>
+                        <tr>
+                        <th scope="row">Band Structures</th>
+                        <td>{:,}</td>
+                        </tr>
+                        <tr>
+                        <th scope="row">Phonon Calculations</th>
+                        <td>{:,}</td>
+                        </tr>
+                        <tr>
+                        <th scope="row">Overall parsed quantities</th>
+                        <td>{:,}</td>
+                        </tr>
+                    </tbody>
+                </table>
+                <p>
+                    Furthermore:
+                </p>
+                <ul>
+                    <li><b>9,274</b> Zip Archives for parsing: <b>16.5 TB</b> of data (compressed)</li>
+                    <li>Data extracted with parsing: <b>5.6 TB</b> of HDF5 files (compressed)</li>
+                    <li>Data classified using <b>168</b> public metadata of the NOMAD Meta Info and <b>2,360</b> code-specific metadata</li>
+                    <li>Number of parsed quantities <b>871,497,996</b></li>
+                </ul>
+                <p>
+                    90% of VASP calculations are provided by
+                        <a href="http://aflowlib.org">AFLOWlib</a> (S. Curtarolo),
+                        <a href="http://oqmd.org"> OQMD</a> (C. Wolverton) and
+                        <a href="https://materialsproject.org">Materials Project</a> (K. Persson).
+                </p>
+                <p>
+                    You can further explore the statistics in the below dynamic histograms. To
+                    change the displayed quantity, select from the "Quantities" drop-down. To
+                    filter the data, click histogram bars for different filter combinations.
+                    To reset filters, click "Reset Filters".
+                </p>
+                <p>
+                    The archive data is represented in a code-independent, structured
+                    form. The archive structure and all quantities are described via the
+                    <a href="https://www.nomad-coe.eu/the-project/nomad-archive/archive-meta-info">NOMAD Metainfo</a>.
+                    The NOMAD Metainfo defines a conceptual model to store the values connected
+                    to atomistic or <i>ab initio</i> calculations. A clear and usable metadata definition
+                    is a prerequisites to preparing the data for analysis that everybody
+                    can contribute to.
+                </p>
+                <p>
+                    In collaboration with the <a href="http://www.bbdc.berlin/">Berlin Big Data Center (BBDC)</a>,
+                    we use the Apache Flink infrastructure to support and go beyond the standard MapReduce model to enable
+                    rapid and complex queries.
+                </p>
+                <p>
+                    Contact concerning general aspects of the CoE: <a href="mailto:pietsch@fhi-berlin.mpg.de">Jessica Pietsch</a>
+                </p>
+                <p>
+                    Contact concerning the NOMAD Archive:
+                        <a href="mailto:markus.scheidgen@physik.hu-berlin.de">Markus Scheidgen</a>,
+                        <a href="mailto:ghiringhelli@fhi-berlin.mpg.de">Luca Ghiringhelli</a>
+                </p>
+            </div>
+        '''.format(
+            datetime.now().strftime('%b %y'),
+            entries,
+            calculations,
+            geometries,
+            calculations_3d,
+            calculations_2d,
+            calculations_1d,
+            dos,
+            band_structures,
+            phonons,
+            quantities
+        ))
diff --git a/tests/bravado_flask.py b/tests/bravado_flask.py
index e5d664e4ad..2616a1bfe7 100644
--- a/tests/bravado_flask.py
+++ b/tests/bravado_flask.py
@@ -78,7 +78,7 @@ class FlaskTestFutureAdapter:
         path = self._request_params['url'].replace('http://localhost', '')
         method = self._request_params.get('method')
 
-        query = urlencode(self._request_params.get('params', {}))
+        query = urlencode(self._request_params.get('params', {}), doseq=True)
         if query is not None and query != '':
             url = '%s?%s' % (path, query)
         else:
diff --git a/tests/test_cli.py b/tests/test_cli.py
index e3c69a6211..18dd6f1b06 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -250,3 +250,17 @@ class TestClient:
         assert published.upload_files.os_path in result.output
 
         published.upload_files.exists
+
+    def test_statistics(self, client, proc_infra, admin_user_bravado_client):
+
+        result = click.testing.CliRunner().invoke(
+            cli, ['client', 'statistics-table'], catch_exceptions=True, obj=utils.POPO())
+
+        assert result.exit_code == 0, result.output
+        assert 'Calculations, e.g. total energies' in result.output
+        assert 'Unique geometries' in result.output
+        assert 'Bulk crystals' in result.output
+        assert '2D / Surfaces' in result.output
+        assert 'Atoms / Molecules' in result.output
+        assert 'DOS' in result.output
+        assert 'Band structures' in result.output
-- 
GitLab