REST-for-Physics  v2.3
Rare Event Searches ToolKit for Physics
Loading...
Searching...
No Matches
TRestDataSet.h
1/*************************************************************************
2 * This file is part of the REST software framework. *
3 * *
4 * Copyright (C) 2016 GIFNA/TREX (University of Zaragoza) *
5 * For more information see https://gifna.unizar.es/trex *
6 * *
7 * REST is free software: you can redistribute it and/or modify *
8 * it under the terms of the GNU General Public License as published by *
9 * the Free Software Foundation, either version 3 of the License, or *
10 * (at your option) any later version. *
11 * *
12 * REST is distributed in the hope that it will be useful, *
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
15 * GNU General Public License for more details. *
16 * *
17 * You should have a copy of the GNU General Public License along with *
18 * REST in $REST_PATH/LICENSE. *
19 * If not, see https://www.gnu.org/licenses/. *
20 * For the list of contributors see $REST_PATH/CREDITS. *
21 *************************************************************************/
22
23#ifndef REST_TRestDataSet
24#define REST_TRestDataSet
25
26#include <TTimeStamp.h>
27
28#include <ROOT/RDataFrame.hxx>
29
30#include "TRestCut.h"
31#include "TRestMetadata.h"
32
35 public:
38 std::string metadata;
39
41 std::string strategy;
42
44 std::string description;
45
47 std::string value;
48 };
49
50 private:
52 std::string fFilterStartTime = "2000/01/01"; //<
53
55 std::string fFilterEndTime = "3000/12/31"; //<
56
58 std::string fFilePattern = ""; //<
59
61 std::vector<std::string> fObservablesList; //<
62
64 std::vector<std::string> fFilterMetadata; //<
65
67 std::vector<std::string> fFilterContains; //<
68
70 std::vector<Double_t> fFilterGreaterThan; //<
71
73 std::vector<Double_t> fFilterLowerThan; //<
74
76 std::vector<Double_t> fFilterEqualsTo; //<
77
79 std::map<std::string, RelevantQuantity> fQuantity; //<
80
82 TRestCut* fCut = nullptr; //<
83
85 Double_t fTotalDuration = 0; //<
86
88 std::vector<std::string> fFileSelection; //<
89
92
95
97 Bool_t fMergedDataset = false; //<
98
100 std::vector<std::string> fImportedFiles; //<
101
103 std::vector<std::pair<std::string, std::string>> fColumnNameExpressions; //<
104
106 Bool_t fMT = false; //<
107
108 // If the dataframe was defined externally it will be true
109 Bool_t fExternal = false; //<
110
112 ROOT::RDF::RNode fDataFrame = ROOT::RDataFrame(0);
113
115 TChain* fTree = nullptr;
116
117 void InitFromConfigFile() override;
118
119 protected:
120 virtual std::vector<std::string> FileSelection();
121
122 void RegenerateTree(std::vector<std::string> finalList = {});
123
124 public:
126 ROOT::RDF::RNode GetDataFrame() const {
127 if (!fExternal && fTree == nullptr)
128 RESTWarning << "DataFrame has not been yet initialized" << RESTendl;
129 return fDataFrame;
130 }
131
132 void EnableMultiThreading(Bool_t enable = true) { fMT = enable; }
133
135 TTree* GetTree() const {
136 if (fTree == nullptr && fExternal) {
137 RESTInfo << "The tree is not accessible. Only GetDataFrame can be used in an externally "
138 "generated dataset"
139 << RESTendl;
140 RESTInfo << "You may write a tree using GetDataFrame()->Snapshot(\"MyTree\", \"output.root\");"
141 << RESTendl;
142 return fTree;
143 }
144
145 if (fTree == nullptr) {
146 RESTError << "Tree has not been yet initialized" << RESTendl;
147 RESTError << "You should invoke TRestDataSet::GenerateDataSet() or " << RESTendl;
148 RESTError << "TRestDataSet::Import( fname ) before trying to access the tree" << RESTendl;
149 }
150 return fTree;
151 }
152
154 size_t GetNumberOfColumns() { return fDataFrame.GetColumnNames().size(); }
155
158
160 std::vector<std::string> GetFileSelection() { return fFileSelection; }
161
163 Double_t GetTotalTimeInSeconds() const { return fTotalDuration; }
164
165 inline auto GetFilterStartTime() const { return fFilterStartTime; }
166 inline auto GetFilterEndTime() const { return fFilterEndTime; }
167 inline auto GetStartTime() const { return fStartTime; }
168 inline auto GetEndTime() const { return fEndTime; }
169 inline auto GetFilePattern() const { return fFilePattern; }
170 inline auto GetObservablesList() const { return fObservablesList; }
171 inline auto GetFileSelection() const { return fFileSelection; }
172 inline auto GetFilterMetadata() const { return fFilterMetadata; }
173 inline auto GetFilterContains() const { return fFilterContains; }
174 inline auto GetFilterGreaterThan() const { return fFilterGreaterThan; }
175 inline auto GetFilterLowerThan() const { return fFilterLowerThan; }
176 inline auto GetFilterEqualsTo() const { return fFilterEqualsTo; }
177 inline auto GetQuantity() const { return fQuantity; }
178 inline auto GetAddedColumns() const { return fColumnNameExpressions; }
179 inline auto GetCut() const { return fCut; }
180 inline auto IsMergedDataSet() const { return fMergedDataset; }
181
182 inline void SetObservablesList(const std::vector<std::string>& obsList) { fObservablesList = obsList; }
183 inline void SetFilePattern(const std::string& pattern) { fFilePattern = pattern; }
184 inline void SetQuantity(const std::map<std::string, RelevantQuantity>& quantity) { fQuantity = quantity; }
185
186 void SetTotalTimeInSeconds(Double_t seconds) { fTotalDuration = seconds; }
187 void SetDataFrame(const ROOT::RDF::RNode& dS) {
188 fDataFrame = dS;
189 fExternal = true;
190 }
191
193 Bool_t Merge(const TRestDataSet& dS);
194 void Import(const std::string& fileName);
195 void Import(std::vector<std::string> fileNames);
196 void Export(const std::string& filename, std::vector<std::string> excludeColumns = {});
197
198 ROOT::RDF::RNode MakeCut(const TRestCut* cut);
199 ROOT::RDF::RNode ApplyRange(size_t from, size_t to);
200 ROOT::RDF::RNode Range(size_t from, size_t to);
201 ROOT::RDF::RNode DefineColumn(const std::string& columnName, const std::string& formula);
202
203 size_t GetEntries();
204
205 void PrintMetadata() override;
206 void Initialize() override;
207
208 void GenerateDataSet();
209
210 TRestDataSet();
211 TRestDataSet(const char* cfgFileName, const std::string& name = "");
213
214 ClassDefOverride(TRestDataSet, 9);
215};
216#endif
A class to help on cuts definitions. To be used with TRestAnalysisTree.
Definition TRestCut.h:31
It allows to group a number of runs that satisfy given metadata conditions.
std::vector< std::string > fFilterContains
If not empty it will check if the metadata member contains the string.
virtual std::vector< std::string > FileSelection()
Function to determine the filenames that satisfy the dataset conditions.
std::vector< Double_t > fFilterLowerThan
If the corresponding element is not empty it will check if the metadata member is lower.
void PrintMetadata() override
Prints on screen the information about the metadata members of TRestDataSet.
TChain * fTree
A pointer to the generated tree.
void Import(const std::string &fileName)
This function imports metadata from a root file it import metadata info from the previous dataSet whi...
std::map< std::string, RelevantQuantity > fQuantity
The properties of a relevant quantity that we want to store together with the dataset.
ROOT::RDF::RNode Range(size_t from, size_t to)
This method returns a RDataFrame node with the number of samples inside the dataset by selecting a ra...
std::vector< std::pair< std::string, std::string > > fColumnNameExpressions
A list of new columns together with its corresponding expressions added to the dataset.
ROOT::RDF::RNode DefineColumn(const std::string &columnName, const std::string &formula)
This function will add a new column to the RDataFrame using the same scheme as the usual RDF::Define ...
Double_t fEndTime
TimeStamp for the end time of the last file.
ROOT::RDF::RNode fDataFrame
The resulting RDF::RNode object after initialization.
size_t GetNumberOfBranches()
Number of variables (or observables)
size_t GetEntries()
It returns the number of entries found inside fDataFrame and prints out a warning if the number of en...
TRestDataSet()
Default constructor.
ROOT::RDF::RNode GetDataFrame() const
Gives access to the RDataFrame.
Double_t GetTotalTimeInSeconds() const
It returns the accumulated run time in seconds.
ROOT::RDF::RNode MakeCut(const TRestCut *cut)
This function applies a TRestCut to the dataframe and returns a dataframe with the applied cuts....
void GenerateDataSet()
This function generates the data frame with the filelist and column names (or observables) that have ...
Bool_t fMT
A flag to enable Multithreading during dataframe generation.
TRestCut * fCut
Parameter cuts over the selected dataset.
void Export(const std::string &filename, std::vector< std::string > excludeColumns={})
It will generate an output file with the dataset compilation. Only the selected branches and the file...
std::string fFilterStartTime
All the selected runs will have a starting date after fStartTime.
Bool_t Merge(const TRestDataSet &dS)
This function merge different TRestDataSet metadata in current dataSet.
std::vector< std::string > fFilterMetadata
A list of metadata members where filters will be applied.
std::vector< std::string > fFileSelection
A list populated by the FileSelection method using the conditions of the dataset.
std::vector< std::string > GetFileSelection()
It returns a list of the files that have been finally selected.
std::string fFilterEndTime
All the selected runs will have an ending date before fEndTime.
Double_t fStartTime
TimeStamp for the start time of the first file.
std::vector< std::string > fObservablesList
It contains a list of the observables that will be added to the final tree or exported file.
TTree * GetTree() const
Gives access to the tree.
Bool_t fMergedDataset
It keeps track if the generated dataset is a pure dataset or a merged one.
void Initialize() override
This function initialize different parameters from the TRestDataSet.
void RegenerateTree(std::vector< std::string > finalList={})
It regenerates the tree so that it is an exact copy of the present DataFrame.
std::vector< std::string > fImportedFiles
The list of dataset files imported.
Double_t fTotalDuration
The total integrated run time of selected files.
std::string fFilePattern
A glob file pattern that must be satisfied by all files.
size_t GetNumberOfColumns()
Number of variables (or observables)
std::vector< Double_t > fFilterGreaterThan
If the corresponding element is not empty it will check if the metadata member is greater.
ROOT::RDF::RNode ApplyRange(size_t from, size_t to)
This method reduces the number of samples inside the dataset by selecting a range.
std::vector< Double_t > fFilterEqualsTo
If the corresponding element is not empty it will check if the metadata member is equal.
void InitFromConfigFile() override
Initialization of specific TRestDataSet members through an RML file.
TRestDataSet & operator=(TRestDataSet &dS)
Operator to copy TRestDataSet metadata.
~TRestDataSet()
Default destructor.
A base class for any REST metadata class.
endl_t RESTendl
Termination flag object for TRestStringOutput.
time_t StringToTimeStamp(std::string time)
A method to convert a date/time formatted string to a timestamp.
std::string metadata
The associated metadata member used to register the relevant quantity.
std::string description
A user given description that can be used to define the relevant quantity.
std::string strategy
It determines how to produce the relevant quantity (accumulate/unique/last/max/min)
std::string value
The quantity value.