From b5c109dd1463a305dc866d80624600640fee4306 Mon Sep 17 00:00:00 2001
From: Benjamin Regler <regler@fhi-berlin.mpg.de>
Date: Mon, 8 Jan 2018 15:38:56 +0100
Subject: [PATCH] :bookmark: Correctly resolve Nomad URIs and added new resolve
 parameters in `nomad_query` module

---
 common/python/nomadcore/nomad_query.py | 60 ++++++++++++++++++--------
 1 file changed, 41 insertions(+), 19 deletions(-)

diff --git a/common/python/nomadcore/nomad_query.py b/common/python/nomadcore/nomad_query.py
index 005237a..b6d160f 100644
--- a/common/python/nomadcore/nomad_query.py
+++ b/common/python/nomadcore/nomad_query.py
@@ -27,6 +27,7 @@ import sys
 import json
 import time
 import errno
+import random
 
 if sys.version_info.major > 2:
     # For Python 3.0 and later
@@ -63,7 +64,21 @@ class NomadQueryResult(object):
         # Get Nomad URIs
         response = NomadQuery().request(self._download_url)
         if response['status'] == 'success':
-            self._uri = response['data'].get('result', [])
+            regex = re.compile(r'(?<=/[a-zA-Z0-9\-_]{3}/)[^\.]+')
+            paths = response['data'].get('result', [])
+
+            uri = []
+            for path in paths:
+                match = regex.search(path)
+                if match:
+                    # Substitute prefixes
+                    groups = match.group(0).split('/')
+                    groups[0] = 'N' + groups[0][1:]         # Normalized
+
+                    if len(groups) == 2:
+                        groups[1] = 'C' + groups[1][1:]     # Computed
+
+                self._uri.append('nmd://' + '/'.join(groups))
 
     def timestamp(self):
         """Get the timestamp of the query.
@@ -355,8 +370,10 @@ class NomadQuery(object):
                                        (default: {''})
             resolve {bool}          -- Automatically resolve Nomad URIs
                                        (default: {False})
-            params {dict}           -- Parameter passed to resolve method
-                                       (default: {{}})
+            params {dict}           -- Parameter passed to resolve method. May
+                                       include `size` and `seed` for fetching
+                                       only a limited number of URIs chosen by
+                                       a given seed (default: {{}})
 
         Returns:
             dict|bool -- A dictionary with query information, Nomad URIs, and
@@ -418,8 +435,9 @@ class NomadQuery(object):
         Keyword Arguments:
             resolve {bool}  -- Automatically resolve Nomad URIs
                                (default: {False})
-            params {dict}   -- Parameter passed to resolve method
-                               (default: {{}})
+            params {dict}   -- Parameter passed to resolve method. May include
+                               `size` and `seed` for fetching only a limited
+                               number of URIs chosen by a given seed (default: {{}})
 
         Returns:
             dict -- A dictionary with query information, Nomad URIs, and
@@ -494,34 +512,38 @@ class NomadQuery(object):
             data['data'] = self._resolve(data['uri'], **params)
         return data
 
-    def _resolve(self, paths, **params):
+    def _resolve(self, paths, size=None, seed=None, **params):
         """[Protected] Resolve Nomad URIs.
 
         Arguments:
             paths {list}  -- List of Nomad URIs
 
         Keyword Arguments:
+            size {number} -- Total number of URIs to resolve (default: {None})
+            seed {number} -- Seed to initialize the internal state of the
+                             random number generator for selecting n-th Nomad
+                             URIs (default: {None})
             params {dict} -- Parameter passed to resolve method (default: {{}})
 
         Returns:
             dict -- A dictionary with resolved Nomad URIs
         """
-        data = {}
-        regex = re.compile(r'(?<=/[a-zA-Z0-9\-_]{3}/)[^\.]+')
+        size = params.pop('size', size)
+        seed = params.pop('seed', seed)
 
-        for path in paths:
-            match = regex.search(path)
-            if match:
-                # Substitute prefixes
-                groups = match.group(0).split('/')
-                groups[0] = 'N' + groups[0][1:]         # Normalized
+        # Truncate number of URIs to specified size
+        if size and size > 1:
+            state = random.getstate()
 
-                if len(groups) == 2:
-                    groups[1] = 'C' + groups[1][1:]     # Computed
+            # Set seed of random number generator
+            if seed is not None:
+                random.seed(seed)
 
-                # Resolve with correct Nomad URI
-                data[path] = self.resolve('nmd://' + '/'.join(groups), **params)
-        return data
+            # Get limited set of Nomad URIs
+            paths = random.sample(paths, size)
+            random.setstate(state)
+
+        return {path: self.resolve(path, **params) for path in paths}
 
     def _makedir(self, path):
         """[Protected] Recursively create directory.
-- 
GitLab