REST-for-Physics  v2.3
Rare Event Searches ToolKit for Physics
Loading...
Searching...
No Matches
TRestDataSet.h
1/*************************************************************************
2 * This file is part of the REST software framework. *
3 * *
4 * Copyright (C) 2016 GIFNA/TREX (University of Zaragoza) *
5 * For more information see https://gifna.unizar.es/trex *
6 * *
7 * REST is free software: you can redistribute it and/or modify *
8 * it under the terms of the GNU General Public License as published by *
9 * the Free Software Foundation, either version 3 of the License, or *
10 * (at your option) any later version. *
11 * *
12 * REST is distributed in the hope that it will be useful, *
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
15 * GNU General Public License for more details. *
16 * *
17 * You should have a copy of the GNU General Public License along with *
18 * REST in $REST_PATH/LICENSE. *
19 * If not, see https://www.gnu.org/licenses/. *
20 * For the list of contributors see $REST_PATH/CREDITS. *
21 *************************************************************************/
22
23#ifndef REST_TRestDataSet
24#define REST_TRestDataSet
25
26#include <TTimeStamp.h>
27
28#include <ROOT/RDataFrame.hxx>
29
30#include "TRestCut.h"
31#include "TRestMetadata.h"
32
35 public:
38 std::string metadata;
39
41 std::string strategy;
42
44 std::string description;
45
47 std::string value;
48 };
49
50 private:
52 std::string fFilterStartTime = "2000/01/01"; //<
53
55 std::string fFilterEndTime = "3000/12/31"; //<
56
58 std::string fFilePattern = ""; //<
59
61 std::vector<std::string> fObservablesList; //<
62
64 std::vector<std::string> fProcessObservablesList; //<
65
67 std::vector<std::string> fFilterMetadata; //<
68
70 std::vector<std::string> fFilterContains; //<
71
73 std::vector<Double_t> fFilterGreaterThan; //<
74
76 std::vector<Double_t> fFilterLowerThan; //<
77
79 std::vector<Double_t> fFilterEqualsTo; //<
80
82 std::map<std::string, RelevantQuantity> fQuantity; //<
83
85 TRestCut* fCut = nullptr; //<
86
88 Double_t fTotalDuration = 0; //<
89
91 std::vector<std::string> fFileSelection; //<
92
95
98
100 Bool_t fMergedDataset = false; //<
101
103 std::vector<std::string> fImportedFiles; //<
104
106 std::vector<std::pair<std::string, std::string>> fColumnNameExpressions; //<
107
109 Bool_t fMT = false; //<
110
111 // If the dataframe was defined externally it will be true
112 Bool_t fExternal = false; //<
113
115 ROOT::RDF::RNode fDataFrame = ROOT::RDataFrame(0);
116
118 TChain* fTree = nullptr;
119
120 void InitFromConfigFile() override;
121
122 protected:
123 virtual std::vector<std::string> FileSelection();
124
125 void RegenerateTree(std::vector<std::string> finalList = {});
126
127 public:
129 ROOT::RDF::RNode GetDataFrame() const {
130 if (!fExternal && fTree == nullptr)
131 RESTWarning << "DataFrame has not been yet initialized" << RESTendl;
132 return fDataFrame;
133 }
134
135 void EnableMultiThreading(Bool_t enable = true) { fMT = enable; }
136
138 TTree* GetTree() const {
139 if (fTree == nullptr && fExternal) {
140 RESTInfo << "The tree is not accessible. Only GetDataFrame can be used in an externally "
141 "generated dataset"
142 << RESTendl;
143 RESTInfo << "You may write a tree using GetDataFrame()->Snapshot(\"MyTree\", \"output.root\");"
144 << RESTendl;
145 return fTree;
146 }
147
148 if (fTree == nullptr) {
149 RESTError << "Tree has not been yet initialized" << RESTendl;
150 RESTError << "You should invoke TRestDataSet::GenerateDataSet() or " << RESTendl;
151 RESTError << "TRestDataSet::Import( fname ) before trying to access the tree" << RESTendl;
152 }
153 return fTree;
154 }
155
157 size_t GetNumberOfColumns() { return fDataFrame.GetColumnNames().size(); }
158
161
163 std::vector<std::string> GetFileSelection() { return fFileSelection; }
164
166 Double_t GetTotalTimeInSeconds() const { return fTotalDuration; }
167
168 inline auto GetFilterStartTime() const { return fFilterStartTime; }
169 inline auto GetFilterEndTime() const { return fFilterEndTime; }
170 inline auto GetStartTime() const { return fStartTime; }
171 inline auto GetEndTime() const { return fEndTime; }
172 inline auto GetFilePattern() const { return fFilePattern; }
173 inline auto GetObservablesList() const { return fObservablesList; }
174 inline auto GetFileSelection() const { return fFileSelection; }
175 inline auto GetProcessObservablesList() const { return fProcessObservablesList; }
176 inline auto GetFilterMetadata() const { return fFilterMetadata; }
177 inline auto GetFilterContains() const { return fFilterContains; }
178 inline auto GetFilterGreaterThan() const { return fFilterGreaterThan; }
179 inline auto GetFilterLowerThan() const { return fFilterLowerThan; }
180 inline auto GetFilterEqualsTo() const { return fFilterEqualsTo; }
181 inline auto GetQuantity() const { return fQuantity; }
182 inline auto GetAddedColumns() const { return fColumnNameExpressions; }
183 inline auto GetCut() const { return fCut; }
184 inline auto IsMergedDataSet() const { return fMergedDataset; }
185
186 inline void SetObservablesList(const std::vector<std::string>& obsList) { fObservablesList = obsList; }
187 inline void SetFilePattern(const std::string& pattern) { fFilePattern = pattern; }
188 inline void SetQuantity(const std::map<std::string, RelevantQuantity>& quantity) { fQuantity = quantity; }
189
190 void SetTotalTimeInSeconds(Double_t seconds) { fTotalDuration = seconds; }
191 void SetDataFrame(const ROOT::RDF::RNode& dS) {
192 fDataFrame = dS;
193 fExternal = true;
194 }
195
197 Bool_t Merge(const TRestDataSet& dS);
198 void Import(const std::string& fileName);
199 void Import(std::vector<std::string> fileNames);
200 void Export(const std::string& filename, std::vector<std::string> excludeColumns = {});
201
202 ROOT::RDF::RNode MakeCut(const TRestCut* cut);
203 ROOT::RDF::RNode ApplyRange(size_t from, size_t to);
204 ROOT::RDF::RNode Range(size_t from, size_t to);
205 ROOT::RDF::RNode DefineColumn(const std::string& columnName, const std::string& formula);
206
207 size_t GetEntries();
208
209 void PrintMetadata() override;
210 void Initialize() override;
211
212 void GenerateDataSet();
213
214 TRestDataSet();
215 TRestDataSet(const char* cfgFileName, const std::string& name = "");
217
218 ClassDefOverride(TRestDataSet, 8);
219};
220#endif
A class to help on cuts definitions. To be used with TRestAnalysisTree.
Definition TRestCut.h:31
It allows to group a number of runs that satisfy given metadata conditions.
std::vector< std::string > fFilterContains
If not empty it will check if the metadata member contains the string.
virtual std::vector< std::string > FileSelection()
Function to determine the filenames that satisfy the dataset conditions.
std::vector< Double_t > fFilterLowerThan
If the corresponding element is not empty it will check if the metadata member is lower.
void PrintMetadata() override
Prints on screen the information about the metadata members of TRestDataSet.
TChain * fTree
A pointer to the generated tree.
std::vector< std::string > fProcessObservablesList
It contains a list of the process where all observables should be added.
void Import(const std::string &fileName)
This function imports metadata from a root file it import metadata info from the previous dataSet whi...
std::map< std::string, RelevantQuantity > fQuantity
The properties of a relevant quantity that we want to store together with the dataset.
ROOT::RDF::RNode Range(size_t from, size_t to)
This method returns a RDataFrame node with the number of samples inside the dataset by selecting a ra...
std::vector< std::pair< std::string, std::string > > fColumnNameExpressions
A list of new columns together with its corresponding expressions added to the dataset.
ROOT::RDF::RNode DefineColumn(const std::string &columnName, const std::string &formula)
This function will add a new column to the RDataFrame using the same scheme as the usual RDF::Define ...
Double_t fEndTime
TimeStamp for the end time of the last file.
ROOT::RDF::RNode fDataFrame
The resulting RDF::RNode object after initialization.
size_t GetNumberOfBranches()
Number of variables (or observables)
size_t GetEntries()
It returns the number of entries found inside fDataFrame and prints out a warning if the number of en...
TRestDataSet()
Default constructor.
ROOT::RDF::RNode GetDataFrame() const
Gives access to the RDataFrame.
Double_t GetTotalTimeInSeconds() const
It returns the accumulated run time in seconds.
ROOT::RDF::RNode MakeCut(const TRestCut *cut)
This function applies a TRestCut to the dataframe and returns a dataframe with the applied cuts....
void GenerateDataSet()
This function generates the data frame with the filelist and column names (or observables) that have ...
Bool_t fMT
A flag to enable Multithreading during dataframe generation.
TRestCut * fCut
Parameter cuts over the selected dataset.
void Export(const std::string &filename, std::vector< std::string > excludeColumns={})
It will generate an output file with the dataset compilation. Only the selected branches and the file...
std::string fFilterStartTime
All the selected runs will have a starting date after fStartTime.
Bool_t Merge(const TRestDataSet &dS)
This function merge different TRestDataSet metadata in current dataSet.
std::vector< std::string > fFilterMetadata
A list of metadata members where filters will be applied.
std::vector< std::string > fFileSelection
A list populated by the FileSelection method using the conditions of the dataset.
std::vector< std::string > GetFileSelection()
It returns a list of the files that have been finally selected.
std::string fFilterEndTime
All the selected runs will have an ending date before fEndTime.
Double_t fStartTime
TimeStamp for the start time of the first file.
std::vector< std::string > fObservablesList
It contains a list of the observables that will be added to the final tree or exported file.
TTree * GetTree() const
Gives access to the tree.
Bool_t fMergedDataset
It keeps track if the generated dataset is a pure dataset or a merged one.
void Initialize() override
This function initialize different parameters from the TRestDataSet.
void RegenerateTree(std::vector< std::string > finalList={})
It regenerates the tree so that it is an exact copy of the present DataFrame.
std::vector< std::string > fImportedFiles
The list of dataset files imported.
Double_t fTotalDuration
The total integrated run time of selected files.
std::string fFilePattern
A glob file pattern that must be satisfied by all files.
size_t GetNumberOfColumns()
Number of variables (or observables)
std::vector< Double_t > fFilterGreaterThan
If the corresponding element is not empty it will check if the metadata member is greater.
ROOT::RDF::RNode ApplyRange(size_t from, size_t to)
This method reduces the number of samples inside the dataset by selecting a range.
std::vector< Double_t > fFilterEqualsTo
If the corresponding element is not empty it will check if the metadata member is equal.
void InitFromConfigFile() override
Initialization of specific TRestDataSet members through an RML file.
TRestDataSet & operator=(TRestDataSet &dS)
Operator to copy TRestDataSet metadata.
~TRestDataSet()
Default destructor.
A base class for any REST metadata class.
endl_t RESTendl
Termination flag object for TRestStringOutput.
time_t StringToTimeStamp(std::string time)
A method to convert a date/time formatted string to a timestamp.
std::string metadata
The associated metadata member used to register the relevant quantity.
std::string description
A user given description that can be used to define the relevant quantity.
std::string strategy
It determines how to produce the relevant quantity (accumulate/unique/last/max/min)
std::string value
The quantity value.