Commit 45d2e492 authored by Ievgen Vovk's avatar Ievgen Vovk
Browse files

Initial commit

parents
data_files:
mc:
train_sample:
magic1:
input_mask: "../../../MCs/MAGIC/ST.03.07/za05to35/Train_sample/1.Calibrated/GA_M1*root"
hillas_output: "../../../MCs/MAGIC/ST.03.07/za05to35/Train_sample/2.Hillas/iv_hillas_m1.h5"
magic2:
input_mask: "../../../MCs/MAGIC/ST.03.07/za05to35/Train_sample/1.Calibrated/GA_M2*root"
hillas_output: "../../../MCs/MAGIC/ST.03.07/za05to35/Train_sample/2.Hillas/iv_hillas_m2.h5"
test_sample:
magic1:
input_mask: "../../../MCs/MAGIC/ST.03.07/za05to35/Test_sample/1.Calibrated/GA_M1*root"
hillas_output: "../../../MCs/MAGIC/ST.03.07/za05to35/Test_sample/2.Hillas/iv_hillas_m1.h5"
reco_output: "../../../MCs/MAGIC/ST.03.07/za05to35/Test_sample/3.Reco/iv_reco_m1.h5"
magic2:
input_mask: "../../../MCs/MAGIC/ST.03.07/za05to35/Test_sample/1.Calibrated/GA_M2*root"
hillas_output: "../../../MCs/MAGIC/ST.03.07/za05to35/Test_sample/2.Hillas/iv_hillas_m2.h5"
reco_output: "../../../MCs/MAGIC/ST.03.07/za05to35/Test_sample/3.Reco/iv_reco_m2.h5"
data:
train_sample:
magic1:
input_mask: "../../../Data/MAGIC/Off/Train_sample/1.Calibrated/20*_M1_*root"
hillas_output: "../../../Data/MAGIC/Off/Train_sample/2.Hillas/iv_hillas_m1.h5"
magic2:
input_mask: "../../../Data/MAGIC/Off/Train_sample/1.Calibrated/20*_M2_*root"
hillas_output: "../../../Data/MAGIC/Off/Train_sample/2.Hillas/iv_hillas_m2.h5"
test_sample:
magic1:
input_mask: "../../../Data/MAGIC/CrabNebula/1.Calibrated/20*_M1_*root"
hillas_output: "../../../Data/MAGIC/CrabNebula/ctapipe/2.Hillas/iv_hillas_m1.h5"
reco_output: "../../../Data/MAGIC/CrabNebula/ctapipe/3.Reco/reco_m1.h5"
magic2:
input_mask: "../../../Data/MAGIC/CrabNebula/1.Calibrated/20*_M2_*root"
hillas_output: "../../../Data/MAGIC/CrabNebula/ctapipe/2.Hillas/iv_hillas_m2.h5"
reco_output: "../../../Data/MAGIC/CrabNebula/ctapipe/3.Reco/reco_m2.h5"
image_cleanining:
magic:
charge_thresholds:
picture_thresh: 6
boundary_thresh: 3.5
min_number_picture_neighbors: 1
time_thresholds:
# 1.5 slices x 1.64 GHz
time_limit: 2.46
# 4.5 slices x 1.64 GHz
max_time_off: 7.38
min_number_neighbors: 1
energy_rf:
save_name: "RFs/energy_rf.joblib"
cuts: "(multiplicity > 1) & (intensity > 30) & (length > 0.0) & (leakage1_intensity < 0.15)"
settings:
n_estimators: 100
min_samples_leaf: 10
n_jobs: 3
features:
['slope', 'length', 'width', 'intensity', 'skewness', 'kurtosis', 'x', 'y', 'psi', 'leakage1_intensity', 'leakage2_intensity']
direction_rf:
save_name: "RFs/direction_rf.joblib"
cuts: "(multiplicity > 1) & (intensity > 30) & (length > 0.0) & (leakage1_intensity < 0.15)"
settings:
n_estimators: 100
min_samples_leaf: 10
n_jobs: 3
features:
'disp': ['slope', 'length', 'width', 'intensity', 'skewness', 'kurtosis', 'x', 'y', 'psi']
'pos_angle_shift': ['slope', 'length', 'width', 'intensity', 'skewness', 'kurtosis', 'x', 'y', 'psi']
classifier_rf:
save_name: "RFs/classifier_rf.joblib"
cuts: "(multiplicity > 1) & (intensity > 30) & (length > 0.0) & (leakage1_intensity < 0.15)"
settings:
n_estimators: 100
min_samples_leaf: 10
n_jobs: 3
features:
['slope', 'length', 'width', 'intensity', 'skewness', 'kurtosis', 'x', 'y', 'psi', 'n_islands']
event_list:
max_time_diff: 6.9e-4
cuts:
l3rate: "80 < value < 1000"
dc: "2.5e3 < value < 3.5e3"
# coding: utf-8
import glob
import yaml
import argparse
import datetime
import scipy
import uproot
import pandas
def info_message(text, prefix='info'):
"""
This function prints the specified text with the prefix of the current date
Parameters
----------
text: str
Returns
-------
None
"""
date_str = datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S")
print(f"({prefix:s}) {date_str:s}: {text:s}")
def identify_time_edges(times, criterion, max_time_diff=6.9e-4):
"""
Identifies the time interval edges, corresponding to the True
state of the specified condition. Neighbouring time intervals,
separated by not more than max_time_diff are joined together.
Parameters
----------
times: array_like
Array of the time data points.
criterion: array_like
Array of True/False values, indicating the goodness of the
corresponding data points.
max_time_diff: float, optional
Maximal time difference between the time intervals, below which
they are joined into one.
Returns
-------
parts_edges: list
List of start/stop pairs, describing the identified time intervals.
"""
times = scipy.array(times)
wh = scipy.where(criterion == True)
if len(wh[0]) == 0:
print("No time intervals excluded!")
return [[0,0]]
# The above function will result in indicies of non-zero bins.
# But we want indicies of their _edges_.
non_zero_bin_edges = []
for i in wh[0]:
if i-1 >= 0:
if abs(times[i]-times[i-1]) < max_time_diff:
non_zero_bin_edges.append(i-1)
non_zero_bin_edges.append(i)
if i+1 < len(times):
if abs(times[i]-times[i+1]) < max_time_diff:
non_zero_bin_edges.append(i+1)
non_zero_bin_edges = scipy.unique(non_zero_bin_edges)
if len(non_zero_bin_edges) > 2:
# Finding the elements, that separate the observational time intervals
# During one time interval diff should return 1 (current bin has index i, the index of the next time bin is i+1).
# Here we're looking for diffs that are not equal to 1.
#division_indicies = (scipy.diff(non_zero_bin_edges[1:-1])-1).nonzero()
#division_indicies = division_indicies[0]
cond_id = scipy.diff(non_zero_bin_edges[1:-1]) > 1
cond_time = scipy.diff(times[non_zero_bin_edges[1:-1]]) > max_time_diff
division_indicies = scipy.where(cond_id | cond_time)
division_indicies = division_indicies[0]
# Concatenating to the found elements the beginning and the end of the observational time.
# Also adding i+1 elements, to correctly switch to the next observational time interval.
parts_edges_idx = scipy.concatenate(
(
[non_zero_bin_edges[0]],
non_zero_bin_edges[1:-1][division_indicies],
non_zero_bin_edges[1:-1][division_indicies+1],
[non_zero_bin_edges[-1]]
)
)
else:
parts_edges_idx = scipy.array(non_zero_bin_edges)
parts_edges_idx.sort()
# Transorming edges indicies to the real values and transforming them to the [start, stop] list.
parts_edges = times[parts_edges_idx]
parts_edges = parts_edges.reshape(-1,2)
return parts_edges
def intersect_time_intervals(intervals1, intervals2):
"""
Intersects two lists of (TStart, TStop) pairs. Returned list
contains the start/stop invervals, common in both input lists.
Parameters
----------
interval1: list
First list of (TStart, TStop) lists (or tuples).
interval2: list
Second list of (TStart, TStop) lists (or tuples).
Returns
-------
joint_intervals: list
A list of (TStart, TStop) lists, representing the start/stop invervals,
common in both input lists.
"""
joined_intervals = []
# Comparing 1st to 2nd
for interv1 in intervals1:
tstart = None
tstop = None
for interv2 in intervals2:
if (interv2[0] >= interv1[0]) and (interv2[0] <= interv1[1]):
tstart = interv2[0]
tstop = min(interv2[1], interv1[1])
elif (interv2[1] >= interv1[0]) and (interv2[1] <= interv1[1]):
tstart = max(interv2[0], interv1[0])
tstop = interv2[1]
elif (interv2[0] < interv1[0]) and (interv2[1] > interv1[1]):
tstart = interv1[0]
tstop = interv1[1]
if tstart and tstop:
joined_intervals.append([tstart, tstop])
return joined_intervals
def gti_generator(config):
"""
GTI list generator.
Parameters
----------
config: dict
Configuration dictionary (e.g. read from a YAML file).
Returns
-------
joint_intervals: list
A list of (TStart, TStop) lists, representing the identified GTIs.
"""
if 'event_list' not in config:
raise ValueError('GTI generator error: the configuration file is missing the "event_list" section. Exiting.')
# Identifying the files to read
info_message("looking for the files", "GTI generator")
file_mask = config["data_files"]["data"]["test_sample"]["magic2"]["input_mask"]
file_list = glob.glob(file_mask)
if not file_list:
raise ValueError("No files to process")
# Containers for the data points
dfs = {
'dc': pandas.DataFrame(),
'l3rate': pandas.DataFrame()
}
# Removing the containers, not specified in the configuration card
if "l3rate" not in config['event_list']['cuts']:
del dfs['l3rate']
if "dc" not in config['event_list']['cuts']:
del dfs['dc']
# Looping over the data files
for fnum, file_name in enumerate(file_list):
info_message(f"processing file {fnum+1} / {len(file_list)}", "GTI generator")
with uproot.open(file_name) as input_stream:
# --- DC ---
if "dc" in dfs:
mjd = input_stream["Camera"]["MTimeCamera.fMjd"].array()
millisec = input_stream["Camera"]["MTimeCamera.fTime.fMilliSec"].array()
nanosec = input_stream["Camera"]["MTimeCamera.fNanoSec"].array()
df_ = pandas.DataFrame()
df_['mjd'] = mjd + (millisec / 1e3 + nanosec / 1e9) / 86400
df_['value'] = input_stream["Camera"]["MReportCamera.fMedianDC"].array()
dfs['dc'] = dfs['dc'].append(df_)
# --- L3 rate ---
if "l3rate" in dfs:
mjd = input_stream["Trigger"]["MTimeTrigger.fMjd"].array()
millisec = input_stream["Trigger"]["MTimeTrigger.fTime.fMilliSec"].array()
nanosec = input_stream["Trigger"]["MTimeTrigger.fNanoSec"].array()
df_ = pandas.DataFrame()
df_['mjd'] = mjd + (millisec / 1e3 + nanosec / 1e9) / 86400
df_['value'] = input_stream["Trigger"]["MReportTrigger.fL3Rate"].array()
dfs['l3rate'] = dfs['l3rate'].append(df_)
# Sorting data points by date is needed for the time intervals identification
for key in dfs:
dfs[key] = dfs[key].sort_values(by=['mjd'])
info_message("identifying GTIs", "GTI generator")
time_intervals_list = []
# Identifying DC-related GTIs
if "dc" in dfs:
cut = config['event_list']['cuts']['dc']
selection = dfs['dc'].query(cut)
_, idx, _ = scipy.intersect1d(dfs['dc']["mjd"], selection["mjd"], return_indices=True)
criterion = scipy.repeat(False, len(dfs['dc']["mjd"]))
criterion[idx] = True
time_intervals = identify_time_edges(dfs['dc']["mjd"], criterion, max_time_diff=config['event_list']['max_time_diff'])
time_intervals_list.append(time_intervals)
# Identifying L3-related GTIs
if "l3rate" in dfs:
cut = config['event_list']['cuts']['l3rate']
selection = dfs['l3rate'].query(cut)
_, idx, _ = scipy.intersect1d(dfs['l3rate']["mjd"], selection["mjd"], return_indices=True)
criterion = scipy.repeat(False, len(dfs['l3rate']["mjd"]))
criterion[idx] = True
time_intervals = identify_time_edges(dfs['l3rate']["mjd"], criterion, max_time_diff=config['event_list']['max_time_diff'])
time_intervals_list.append(time_intervals)
if not len(time_intervals_list):
raise ValueError("No valid selection cuts provided in the configuration file.")
joint_intervals = time_intervals_list[0]
# Joining all found GTIs
for i in range(1, len(time_intervals_list)):
joint_intervals = intersect_time_intervals(joint_intervals, time_intervals_list[i])
return joint_intervals
# =================
# === Main code ===
# =================
if __name__ == "__main__":
# --------------------------
# Adding the argument parser
arg_parser = argparse.ArgumentParser(description="""
This tools fits the direction random forest regressor on the specified events files.
""")
arg_parser.add_argument("--config", default="config.yaml",
help='Configuration file to steer the code execution.')
parsed_args = arg_parser.parse_args()
# --------------------------
# ------------------------------
# Reading the configuration file
file_not_found_message = """
Error: can not load the configuration file {:s}.
Please check that the file exists and is of YAML or JSON format.
Exiting.
"""
try:
config = yaml.load(open(parsed_args.config, "r"))
except IOError:
print(file_not_found_message.format(parsed_args.config))
exit()
if 'event_list' not in config:
print('Error: the configuration file is missing the "event_list" section. Exiting.')
exit()
# ------------------------------
joint_intervals = gti_generator(config)
print("--- Identified GTI intervals [MJD] ---")
for interval in joint_intervals:
print(interval)
\ No newline at end of file
#!/usr/bin/env python
# coding: utf-8
import datetime
import argparse
import glob
import re
import yaml
import pandas as pd
import numpy as np
import scipy
from scipy.sparse.csgraph import connected_components
import traitlets
import ctapipe
from ctapipe_io_magic import MAGICEventSource, MAGICEventSourceMC
from ctapipe.io import HDF5TableWriter
from ctapipe.core.container import Container, Field
from ctapipe.calib import CameraCalibrator
from ctapipe.reco import HillasReconstructor
from ctapipe.image import hillas_parameters, leakage
from ctapipe.image.timing_parameters import timing_parameters
from ctapipe.image.cleaning import tailcuts_clean
from astropy import units as u
def info_message(text, prefix='info'):
"""
This function prints the specified text with the prefix of the current date
Parameters
----------
text: str
Returns
-------
None
"""
date_str = datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S")
print(f"({prefix:s}) {date_str:s}: {text:s}")
def apply_time_delta_cleaning(geom, mask, core_mask, arrival_times,
min_number_neighbors, time_limit):
""" Remove all pixels from selection that have less than N
neighbors that arrived within a given timeframe. Leave the *core*
pixels intact.
Parameters
----------
geom: `ctapipe.instrument.CameraGeometry`
Camera geometry information
mask: array, boolean
boolean mask of *clean* pixels before time_delta_cleaning
core_mask: array, boolean
boolean mask of *clean* *core* pixels before time_delta_cleaning.
These will not be applied the time_delta constraint to.
arrival_times: array
pixel timing information
min_number_neighbors: int
Threshold to determine if a pixel survives cleaning steps.
These steps include checks of neighbor arrival time and value
time_limit: int or float
arrival time limit for neighboring pixels
Returns
-------
A boolean mask of *clean* pixels. To get a zero-suppressed image and pixel
list, use `image[mask], geom.pix_id[mask]`, or to keep the same
image size and just set unclean pixels to 0 or similar, use
`image[~mask] = 0`
"""
pixels_to_remove = []
for pixel in np.where(mask)[0]:
if pixel in np.where(core_mask)[0]:
continue
neighbors = geom.neighbor_matrix_sparse[pixel].indices
time_diff = np.abs(arrival_times[neighbors] - arrival_times[pixel])
if sum(time_diff < time_limit) < min_number_neighbors:
pixels_to_remove.append(pixel)
mask[pixels_to_remove] = False
return mask
def apply_magic_time_off_cleaning(camera, clean_mask, charge_map, arrival_times, max_time_off, picture_thresh):
# Identifying connected islands
neighbors = camera.neighbor_matrix_sparse
clean_neighbors = neighbors[clean_mask][:, clean_mask]
num_islands, labels = connected_components(clean_neighbors, directed=False)
# Marking pixels according to their islands
island_ids = scipy.zeros(camera.n_pixels)
island_ids[clean_mask] = labels + 1
# Finding the islands "sizes" (total charges)
island_sizes = scipy.zeros(num_islands)
for i in range(num_islands):
island_sizes[i] = charge_map[clean_mask][labels == i].sum()
# Disabling pixels for all islands save the brightest one
brightest_id = island_sizes.argmax() + 1
core_pixel_times = arrival_times[clean_mask & (island_ids == brightest_id)]
core_pixel_charges = charge_map[clean_mask & (island_ids == brightest_id)]
core_pixel_times = core_pixel_times[core_pixel_charges > 6]
core_pixel_charges = core_pixel_charges[core_pixel_charges > 6]
core_time = np.sum(core_pixel_times * core_pixel_charges**2) / np.sum(core_pixel_charges**2)
#core_time = core_pixel_times[core_pixel_charges.argmax()]
time_diff = np.abs(arrival_times - core_time)
clean_mask[(charge_map >= 2*picture_thresh) & (time_diff > 2*max_time_off)] = False
clean_mask[(charge_map < 2*picture_thresh) & (time_diff > max_time_off)] = False
return clean_mask
def filter_brightest_island(camera, clean_mask, event_image):
# Identifying connected islands
neighbors = camera.neighbor_matrix_sparse
clean_neighbors = neighbors[clean_mask][:, clean_mask]
num_islands, labels = connected_components(clean_neighbors, directed=False)
# Marking pixels according to their islands
island_ids = scipy.zeros(camera.n_pixels)
island_ids[clean_mask] = labels + 1
# Finding the islands "sizes" (total charges)
island_sizes = scipy.zeros(num_islands)
for i in range(num_islands):
island_sizes[i] = event_image[clean_mask][labels == i].sum()
# Disabling pixels for all islands save the brightest one
brightest_id = island_sizes.argmax() + 1
clean_mask[island_ids != brightest_id] = False
return clean_mask
def get_num_islands(camera, clean_mask, event_image):
# Identifying connected islands
neighbors = camera.neighbor_matrix_sparse
clean_neighbors = neighbors[clean_mask][:, clean_mask]
num_islands, labels = connected_components(clean_neighbors, directed=False)
return num_islands
def process_dataset_mc(input_mask, output_name, image_cleaning_settings):
# Create event metadata container to hold event / observation / telescope IDs
# and MC true values for the event energy and direction. We will need it to add
# this information to the event Hillas parameters when dumping the results to disk.
class InfoContainer(Container):
obs_id = Field(-1, "Observation ID")
event_id = Field(-1, "Event ID")
tel_id = Field(-1, "Telescope ID")
true_energy = Field(-1, "MC event energy", unit=u.TeV)
true_alt = Field(-1, "MC event altitude", unit=u.rad)
true_az = Field(-1, "MC event azimuth", unit=u.rad)
tel_alt = Field(-1, "MC telescope altitude", unit=u.rad)
tel_az = Field(-1, "MC telescope azimuth", unit=u.rad)
n_islands = Field(-1, "Number of image islands")
# Setting up the calibrator class.
config = traitlets.config.Config()
integrator_name = 'LocalPeakWindowSum'
config[integrator_name]['window_width'] = 5
config[integrator_name]['window_shift'] = 1
calibrator = CameraCalibrator(r1_product="HESSIOR1Calibrator",
extractor_name=integrator_name, config=config)
# Finding available MC files
input_files = glob.glob(input_mask)
input_files.sort()
# Now let's loop over the events and perform:
# - image cleaning;
# - hillas parameter calculation;
# - time gradient calculation.
#
# We'll write the result to the HDF5 file that can be used for further processing.
hillas_reconstructor = HillasReconstructor()
charge_thresholds = image_cleaning_settings['charge_thresholds']
time_thresholds = image_cleaning_settings['time_thresholds']
core_charge_thresholds = charge_thresholds.copy()
core_charge_thresholds['boundary_thresh'] = core_charge_thresholds['picture_thresh']