From ec1280980f844eb6a6dbab65e78afde324abce64 Mon Sep 17 00:00:00 2001
From: Markus Scheidgen <markus.scheidgen@gmail.com>
Date: Mon, 10 Feb 2020 14:32:38 +0100
Subject: [PATCH] Search aggregation without min_doc_count in API.

---
 gui/src/components/api.js | 25 +++++++++++++++++++++++++
 nomad/datamodel/base.py   |  6 +-----
 nomad/datamodel/dft.py    |  2 +-
 nomad/search.py           |  5 +----
 4 files changed, 28 insertions(+), 10 deletions(-)

diff --git a/gui/src/components/api.js b/gui/src/components/api.js
index 3d4019b54a..9305c2f432 100644
--- a/gui/src/components/api.js
+++ b/gui/src/components/api.js
@@ -207,6 +207,8 @@ class Api {
     this.onStartLoading = () => null
     this.onFinishLoading = () => null
 
+    this.statistics = {}
+
     this._swaggerClient = Swagger(`${apiBase}/swagger.json`)
     this.keycloak = keycloak
 
@@ -389,6 +391,29 @@ class Api {
         ...search}))
       .catch(handleApiError)
       .then(response => response.body)
+      .then(response => {
+        // fill absent statistics values with values from prior searches
+        // this helps to keep consistent values, e.g. in the metadata search view
+        if (response.statistics) {
+          const empty = {}
+          Object.keys(response.statistics.total.all).forEach(metric => empty[metric] = 0)
+          Object.keys(response.statistics)
+            .filter(key => !['total', 'authors', 'atoms'].includes(key))
+            .forEach(key => {
+              if (!this.statistics[key]) {
+                this.statistics[key] = new Set()
+              }
+              const values = this.statistics[key]
+              Object.keys(response.statistics[key]).forEach(value => values.add(value))
+              values.forEach(value => {
+                if (!response.statistics[key][value]) {
+                  response.statistics[key][value] = empty
+                }
+              })
+            })
+        }
+        return response
+      })
       .finally(this.onFinishLoading)
   }
 
diff --git a/nomad/datamodel/base.py b/nomad/datamodel/base.py
index c49716feb7..88e76268ad 100644
--- a/nomad/datamodel/base.py
+++ b/nomad/datamodel/base.py
@@ -193,8 +193,6 @@ class DomainQuantity:
             0 (the default) means no aggregations.
         metric: Indicates that this quantity should be used as search metric. Values need
             to be tuples with metric name and elastic aggregation (e.g. sum, cardinality)
-        zero_aggs: Return aggregation values for values with zero hits in the search. Default
-            is with zero aggregations.
         elastic_mapping: An optional elasticsearch_dsl mapping. Default is ``Keyword``.
         elastic_search_type: An optional elasticsearch search type. Default is ``term``.
         elastic_field: An optional elasticsearch key. Default is the name of the quantity.
@@ -206,8 +204,7 @@ class DomainQuantity:
     def __init__(
             self, description: str = None, multi: bool = False, aggregations: int = 0,
             order_default: bool = False, metric: Tuple[str, str] = None,
-            zero_aggs: bool = True, metadata_field: str = None,
-            elastic_mapping: type = None,
+            metadata_field: str = None, elastic_mapping: type = None,
             elastic_search_type: str = 'term', elastic_field: str = None,
             elastic_value: Callable[[Any], Any] = None,
             argparse_action: str = 'append'):
@@ -218,7 +215,6 @@ class DomainQuantity:
         self.order_default = order_default
         self.aggregations = aggregations
         self.metric = metric
-        self.zero_aggs = zero_aggs
         self.elastic_mapping = elastic_mapping
         self.elastic_search_type = elastic_search_type
         self.metadata_field = metadata_field
diff --git a/nomad/datamodel/dft.py b/nomad/datamodel/dft.py
index d2ca7cb930..c1d9f85d34 100644
--- a/nomad/datamodel/dft.py
+++ b/nomad/datamodel/dft.py
@@ -274,7 +274,7 @@ Domain(
             order_default=True),
         atoms=DomainQuantity(
             'The atom labels of all atoms in the simulated system.',
-            aggregations=len(ase.data.chemical_symbols), multi=True, zero_aggs=False),
+            aggregations=len(ase.data.chemical_symbols), multi=True),
         only_atoms=DomainQuantity(
             'The atom labels concatenated in species-number order. Used with keyword search '
             'to facilitate exclusive searches.',
diff --git a/nomad/search.py b/nomad/search.py
index 39cdbe4527..a913775b46 100644
--- a/nomad/search.py
+++ b/nomad/search.py
@@ -440,10 +440,7 @@ class SearchRequest:
                 The basic doc_count metric ``code_runs`` is always given.
         """
         quantity = quantities[quantity_name]
-        min_doc_count = 0 if quantity.zero_aggs else 1
-        terms = A(
-            'terms', field=quantity.elastic_field, size=size, min_doc_count=min_doc_count,
-            order=dict(_key='asc'))
+        terms = A('terms', field=quantity.elastic_field, size=size, order=dict(_key='asc'))
 
         buckets = self._search.aggs.bucket('statistics:%s' % quantity_name, terms)
         self._add_metrics(buckets, metrics_to_use)
-- 
GitLab