From b5c109dd1463a305dc866d80624600640fee4306 Mon Sep 17 00:00:00 2001 From: Benjamin Regler <regler@fhi-berlin.mpg.de> Date: Mon, 8 Jan 2018 15:38:56 +0100 Subject: [PATCH] :bookmark: Correctly resolve Nomad URIs and added new resolve parameters in `nomad_query` module --- common/python/nomadcore/nomad_query.py | 60 ++++++++++++++++++-------- 1 file changed, 41 insertions(+), 19 deletions(-) diff --git a/common/python/nomadcore/nomad_query.py b/common/python/nomadcore/nomad_query.py index 005237a..b6d160f 100644 --- a/common/python/nomadcore/nomad_query.py +++ b/common/python/nomadcore/nomad_query.py @@ -27,6 +27,7 @@ import sys import json import time import errno +import random if sys.version_info.major > 2: # For Python 3.0 and later @@ -63,7 +64,21 @@ class NomadQueryResult(object): # Get Nomad URIs response = NomadQuery().request(self._download_url) if response['status'] == 'success': - self._uri = response['data'].get('result', []) + regex = re.compile(r'(?<=/[a-zA-Z0-9\-_]{3}/)[^\.]+') + paths = response['data'].get('result', []) + + uri = [] + for path in paths: + match = regex.search(path) + if match: + # Substitute prefixes + groups = match.group(0).split('/') + groups[0] = 'N' + groups[0][1:] # Normalized + + if len(groups) == 2: + groups[1] = 'C' + groups[1][1:] # Computed + + self._uri.append('nmd://' + '/'.join(groups)) def timestamp(self): """Get the timestamp of the query. @@ -355,8 +370,10 @@ class NomadQuery(object): (default: {''}) resolve {bool} -- Automatically resolve Nomad URIs (default: {False}) - params {dict} -- Parameter passed to resolve method - (default: {{}}) + params {dict} -- Parameter passed to resolve method. May + include `size` and `seed` for fetching + only a limited number of URIs chosen by + a given seed (default: {{}}) Returns: dict|bool -- A dictionary with query information, Nomad URIs, and @@ -418,8 +435,9 @@ class NomadQuery(object): Keyword Arguments: resolve {bool} -- Automatically resolve Nomad URIs (default: {False}) - params {dict} -- Parameter passed to resolve method - (default: {{}}) + params {dict} -- Parameter passed to resolve method. May include + `size` and `seed` for fetching only a limited + number of URIs chosen by a given seed (default: {{}}) Returns: dict -- A dictionary with query information, Nomad URIs, and @@ -494,34 +512,38 @@ class NomadQuery(object): data['data'] = self._resolve(data['uri'], **params) return data - def _resolve(self, paths, **params): + def _resolve(self, paths, size=None, seed=None, **params): """[Protected] Resolve Nomad URIs. Arguments: paths {list} -- List of Nomad URIs Keyword Arguments: + size {number} -- Total number of URIs to resolve (default: {None}) + seed {number} -- Seed to initialize the internal state of the + random number generator for selecting n-th Nomad + URIs (default: {None}) params {dict} -- Parameter passed to resolve method (default: {{}}) Returns: dict -- A dictionary with resolved Nomad URIs """ - data = {} - regex = re.compile(r'(?<=/[a-zA-Z0-9\-_]{3}/)[^\.]+') + size = params.pop('size', size) + seed = params.pop('seed', seed) - for path in paths: - match = regex.search(path) - if match: - # Substitute prefixes - groups = match.group(0).split('/') - groups[0] = 'N' + groups[0][1:] # Normalized + # Truncate number of URIs to specified size + if size and size > 1: + state = random.getstate() - if len(groups) == 2: - groups[1] = 'C' + groups[1][1:] # Computed + # Set seed of random number generator + if seed is not None: + random.seed(seed) - # Resolve with correct Nomad URI - data[path] = self.resolve('nmd://' + '/'.join(groups), **params) - return data + # Get limited set of Nomad URIs + paths = random.sample(paths, size) + random.setstate(state) + + return {path: self.resolve(path, **params) for path in paths} def _makedir(self, path): """[Protected] Recursively create directory. -- GitLab