From 799bc785ba9537e93d15913ca2c68c0d21354a2a Mon Sep 17 00:00:00 2001
From: Alessio Berti <alessioberti90@gmail.com>
Date: Tue, 21 Apr 2020 18:26:42 +0200
Subject: [PATCH] [__init__.py]: added mask to recognize superstar files (real
 and MC). Added class member containing a string with the data level (for the
 moment calibrated or superstar).

---
 ctapipe_io_magic/__init__.py | 45 ++++++++++++++++++++++++++++--------
 1 file changed, 36 insertions(+), 9 deletions(-)

diff --git a/ctapipe_io_magic/__init__.py b/ctapipe_io_magic/__init__.py
index 0344d12..67294a8 100644
--- a/ctapipe_io_magic/__init__.py
+++ b/ctapipe_io_magic/__init__.py
@@ -71,14 +71,19 @@ class MAGICEventSource(EventSource):
         run_info = list(map(self._get_run_info_from_name, self.file_list))
         run_numbers = [i[0] for i in run_info]
         is_mc_runs = [i[1] for i in run_info]
+        data_levels = [i[2] for i in run_info]
 
         self.run_numbers, indices = np.unique(run_numbers, return_index=True)
         is_mc_runs = [is_mc_runs[i] for i in indices]
         is_mc_runs = np.unique(is_mc_runs)
+        data_levels = np.unique(data_levels)
         # Checking if runt type (data/MC) is consistent:
         if len(is_mc_runs) > 1:
             raise ValueError("Loaded files contain data and MC runs. Please load only data OR Monte Carlos.")
+        if len(data_levels) > 1:
+            raise ValueError("Loaded files of different data level. Please load only data at the same level.")
         self.is_mc = is_mc_runs[0]
+        self.data_level = data_levels[0]
 
         # # Setting up the current run with the first run present in the data
         # self.current_run = self._set_active_run(run_number=0)
@@ -155,25 +160,47 @@ class MAGICEventSource(EventSource):
             A run number of the file.
         """
 
-        mask_data = r".*\d+_M\d+_(\d+)\.\d+_Y_.*"
-        mask_mc = r".*_M\d_za\d+to\d+_\d_(\d+)_Y_.*"
-        mask_mc_alt = r".*_M\d_\d_(\d+)_.*"
-        if len(re.findall(mask_data, file_name)) > 0:
-            parsed_info = re.findall(mask_data, file_name)
+        mask_data_calibrated   = r".*\d{6}_M\d+_(\d+)\.\d+_Y_.*"
+        mask_data_superstar    = r".*\d{6}_(\d+)_S_.*"
+        mask_mc_calibrated     = r".*_M\d_za\d+to\d+_\d_(\d+)_Y_.*"
+        mask_mc_calibrated_alt = r".*_M\d_\d_(\d+)_.*"
+        mask_mc_superstar      = r".*_za\d+to\d+_\d_S_w0_(\d).*"
+        mask_mc_superstar_alt  = r".*_za\d+to\d+_\d_S_w0__(\d).*"
+        if len(re.findall(mask_data_calibrated, file_name)) > 0:
+            parsed_info = re.findall(mask_data_calibrated, file_name)
             is_mc = False
-        elif len(re.findall(mask_mc, file_name)) > 0:
-            parsed_info = re.findall(mask_mc, file_name)
+            data_level = 'calibrated'
+        elif len(re.findall(mask_data_superstar, file_name)) > 0:
+            parsed_info = re.findall(mask_data_superstar, file_name)
+            is_mc = False
+            data_level = 'superstar'
+        elif len(re.findall(mask_mc_calibrated, file_name)) > 0:
+            parsed_info = re.findall(mask_mc_calibrated, file_name)
+            is_mc = True
+            data_level = 'calibrated'
+        elif len(re.findall(mask_mc_calibrated_alt, file_name)) > 0:
+            parsed_info = re.findall(mask_mc_calibrated_alt, file_name)
+            is_mc = True
+            data_level = 'calibrated'
+        elif len(re.findall(mask_mc_superstar, file_name)) > 0:
+            parsed_info = re.findall(mask_mc_superstar, file_name)
+            is_mc = True
+            data_level = 'superstar'
+        elif len(re.findall(mask_mc_superstar_alt, file_name)) > 0:
+            parsed_info = re.findall(mask_mc_superstar_alt, file_name)
             is_mc = True
+            data_level = 'superstar'
         else:
-            parsed_info = re.findall(mask_mc_alt, file_name)
+            parsed_info = re.findall(mask_mc_calibrated_alt, file_name)
             is_mc = True
+            data_level = 'unknown'
 
         try:
             run_number = int(parsed_info[0])
         except IndexError:
             raise IndexError('Can not identify the run number and type (data/MC) of the file {:s}'.format(file_name))
 
-        return run_number, is_mc
+        return run_number, is_mc, data_level
 
     def _set_active_run(self, run_number):
         """
-- 
GitLab