23 #ifndef REST_TRestDataSet
24 #define REST_TRestDataSet
26 #include <TTimeStamp.h>
28 #include <ROOT/RDataFrame.hxx>
31 #include "TRestMetadata.h"
112 Bool_t fExternal =
false;
130 if (!fExternal &&
fTree ==
nullptr)
131 RESTWarning <<
"DataFrame has not been yet initialized" <<
RESTendl;
135 void EnableMultiThreading(Bool_t enable =
true) {
fMT = enable; }
139 if (
fTree ==
nullptr && fExternal) {
140 RESTInfo <<
"The tree is not accessible. Only GetDataFrame can be used in an externally "
143 RESTInfo <<
"You may write a tree using GetDataFrame()->Snapshot(\"MyTree\", \"output.root\");"
148 if (
fTree ==
nullptr) {
149 RESTError <<
"Tree has not been yet initialized" <<
RESTendl;
150 RESTError <<
"You should invoke TRestDataSet::GenerateDataSet() or " <<
RESTendl;
151 RESTError <<
"TRestDataSet::Import( fname ) before trying to access the tree" <<
RESTendl;
170 inline auto GetStartTime()
const {
return fStartTime; }
171 inline auto GetEndTime()
const {
return fEndTime; }
172 inline auto GetFilePattern()
const {
return fFilePattern; }
181 inline auto GetQuantity()
const {
return fQuantity; }
183 inline auto GetCut()
const {
return fCut; }
186 inline void SetObservablesList(
const std::vector<std::string>& obsList) {
fObservablesList = obsList; }
187 inline void SetFilePattern(
const std::string& pattern) {
fFilePattern = pattern; }
188 inline void SetQuantity(
const std::map<std::string, RelevantQuantity>& quantity) {
fQuantity = quantity; }
190 void SetTotalTimeInSeconds(Double_t seconds) {
fTotalDuration = seconds; }
191 void SetDataFrame(
const ROOT::RDF::RNode& dS) {
198 void Import(
const std::string& fileName);
199 void Import(std::vector<std::string> fileNames);
200 void Export(
const std::string& filename, std::vector<std::string> excludeColumns = {});
203 ROOT::RDF::RNode
ApplyRange(
size_t from,
size_t to);
204 ROOT::RDF::RNode
Range(
size_t from,
size_t to);
205 ROOT::RDF::RNode
DefineColumn(
const std::string& columnName,
const std::string& formula);
215 TRestDataSet(
const char* cfgFileName,
const std::string& name =
"");
A class to help on cuts definitions. To be used with TRestAnalysisTree.
It allows to group a number of runs that satisfy given metadata conditions.
std::vector< std::string > fFilterContains
If not empty it will check if the metadata member contains the string.
virtual std::vector< std::string > FileSelection()
Function to determine the filenames that satisfy the dataset conditions.
std::vector< Double_t > fFilterLowerThan
If the corresponding element is not empty it will check if the metadata member is lower.
void PrintMetadata() override
Prints on screen the information about the metadata members of TRestDataSet.
TChain * fTree
A pointer to the generated tree.
std::vector< std::string > fProcessObservablesList
It contains a list of the process where all observables should be added.
void Import(const std::string &fileName)
This function imports metadata from a root file it import metadata info from the previous dataSet whi...
std::map< std::string, RelevantQuantity > fQuantity
The properties of a relevant quantity that we want to store together with the dataset.
ROOT::RDF::RNode Range(size_t from, size_t to)
This method returns a RDataFrame node with the number of samples inside the dataset by selecting a ra...
std::vector< std::pair< std::string, std::string > > fColumnNameExpressions
A list of new columns together with its corresponding expressions added to the dataset.
ROOT::RDF::RNode DefineColumn(const std::string &columnName, const std::string &formula)
This function will add a new column to the RDataFrame using the same scheme as the usual RDF::Define ...
Double_t fEndTime
TimeStamp for the end time of the last file.
ROOT::RDF::RNode fDataFrame
The resulting RDF::RNode object after initialization.
size_t GetNumberOfBranches()
Number of variables (or observables)
size_t GetEntries()
It returns the number of entries found inside fDataFrame and prints out a warning if the number of en...
TRestDataSet()
Default constructor.
ROOT::RDF::RNode GetDataFrame() const
Gives access to the RDataFrame.
Double_t GetTotalTimeInSeconds() const
It returns the accumulated run time in seconds.
ROOT::RDF::RNode MakeCut(const TRestCut *cut)
This function applies a TRestCut to the dataframe and returns a dataframe with the applied cuts....
void GenerateDataSet()
This function generates the data frame with the filelist and column names (or observables) that have ...
Bool_t fMT
A flag to enable Multithreading during dataframe generation.
TRestCut * fCut
Parameter cuts over the selected dataset.
void Export(const std::string &filename, std::vector< std::string > excludeColumns={})
It will generate an output file with the dataset compilation. Only the selected branches and the file...
std::string fFilterStartTime
All the selected runs will have a starting date after fStartTime.
Bool_t Merge(const TRestDataSet &dS)
This function merge different TRestDataSet metadata in current dataSet.
std::vector< std::string > GetFileSelection()
It returns a list of the files that have been finally selected.
std::vector< std::string > fFilterMetadata
A list of metadata members where filters will be applied.
std::vector< std::string > fFileSelection
A list populated by the FileSelection method using the conditions of the dataset.
std::string fFilterEndTime
All the selected runs will have an ending date before fEndTime.
Double_t fStartTime
TimeStamp for the start time of the first file.
std::vector< std::string > fObservablesList
It contains a list of the observables that will be added to the final tree or exported file.
Bool_t fMergedDataset
It keeps track if the generated dataset is a pure dataset or a merged one.
void Initialize() override
This function initialize different parameters from the TRestDataSet.
void RegenerateTree(std::vector< std::string > finalList={})
It regenerates the tree so that it is an exact copy of the present DataFrame.
std::vector< std::string > fImportedFiles
The list of dataset files imported.
Double_t fTotalDuration
The total integrated run time of selected files.
std::string fFilePattern
A glob file pattern that must be satisfied by all files.
TTree * GetTree() const
Gives access to the tree.
size_t GetNumberOfColumns()
Number of variables (or observables)
std::vector< Double_t > fFilterGreaterThan
If the corresponding element is not empty it will check if the metadata member is greater.
ROOT::RDF::RNode ApplyRange(size_t from, size_t to)
This method reduces the number of samples inside the dataset by selecting a range.
std::vector< Double_t > fFilterEqualsTo
If the corresponding element is not empty it will check if the metadata member is equal.
void InitFromConfigFile() override
Initialization of specific TRestDataSet members through an RML file.
TRestDataSet & operator=(TRestDataSet &dS)
Operator to copy TRestDataSet metadata.
~TRestDataSet()
Default destructor.
time_t StringToTimeStamp(std::string time)
A method to convert a date/time formatted string to a timestamp.
std::string metadata
The associated metadata member used to register the relevant quantity.
std::string description
A user given description that can be used to define the relevant quantity.
std::string strategy
It determines how to produce the relevant quantity (accumulate/unique/last/max/min)
std::string value
The quantity value.