85 #include "TRestComponentDataSet.h"
144 RESTMetadata <<
" == Dataset filenames ==" <<
RESTendl;
157 RESTMetadata <<
"This component has no nodes!" <<
RESTendl;
158 RESTMetadata <<
" Use: LoadDataSets() to initialize the nodes" <<
RESTendl;
163 RESTMetadata <<
" == Weights ==" <<
RESTendl;
170 RESTMetadata <<
" Use : PrintStatistics() to check node statistics" <<
RESTendl;
180 if (!
HasNodes() && !IsDataSetLoaded()) {
181 RESTWarning <<
"TRestComponentDataSet::PrintStatistics. Empty nodes and no dataset loaded!"
183 RESTWarning <<
"Invoking TRestComponentDataSet::Initialize() might solve the problem" <<
RESTendl;
188 RESTInfo <<
"Total counts : " << result <<
RESTendl;
189 std::cout << std::endl;
206 while (ele !=
nullptr) {
225 <<
"TRestComponentDataSet::FillHistograms. Trying to fill histograms but no variables found!"
232 if (!IsDataSetLoaded()) {
233 RESTError <<
"TRestComponentDataSet::FillHistograms. Dataset has not been initialized!" <<
RESTendl;
238 RESTWarning <<
"Nodes have not been defined" <<
RESTendl;
239 RESTWarning <<
"The full dataset will be used to generate the density distribution" <<
RESTendl;
243 RESTInfo <<
"Generating N-dim histograms" <<
RESTendl;
254 ROOT::RDF::RNode df = ROOT::RDataFrame(0);
258 RESTInfo <<
"Creating component with no parameters (full dataset used)" <<
RESTendl;
271 Int_t* bins =
new Int_t[
fNbins.size()];
272 Double_t* xmin =
new Double_t[
fNbins.size()];
273 Double_t* xmax =
new Double_t[
fNbins.size()];
275 for (
size_t n = 0; n <
fNbins.size(); n++) {
284 std::vector<std::string> varsAndWeight =
fVariables;
287 std::string weightsStr =
"";
288 for (
size_t n = 0; n <
fWeights.size(); n++) {
289 if (n > 0) weightsStr +=
"*";
293 df = df.Define(
"componentWeight", weightsStr);
294 varsAndWeight.push_back(
"componentWeight");
297 auto hn = df.HistoND({hName, hName, (int)
fNbins.size(), bins, xmin, xmax}, varsAndWeight);
298 THnD* hNd =
new THnD(*hn);
317 RESTError <<
"TRestComponentDataSet::RegenerateActiveNode. Active node undefined!" <<
RESTendl;
329 Double_t node = GetActiveNodeValue();
332 ROOT::RDF::RNode df = ROOT::RDataFrame(0);
339 Int_t* bins =
new Int_t[
fNbins.size()];
340 Double_t* xmin =
new Double_t[
fNbins.size()];
341 Double_t* xmax =
new Double_t[
fNbins.size()];
343 for (
size_t n = 0; n <
fNbins.size(); n++) {
352 std::vector<std::string> varsAndWeight =
fVariables;
355 std::string weightsStr =
"";
356 for (
size_t n = 0; n <
fWeights.size(); n++) {
357 if (n > 0) weightsStr +=
"*";
361 df = df.Define(
"componentWeight", weightsStr);
362 varsAndWeight.push_back(
"componentWeight");
365 auto hn = df.HistoND({hName, hName, (int)
fNbins.size(), bins, xmin, xmax}, varsAndWeight);
366 THnD* hNd =
new THnD(*hn);
382 RESTInfo <<
"Extracting parameterization nodes" <<
RESTendl;
384 std::vector<double> vs;
385 if (!IsDataSetLoaded()) {
386 RESTError <<
"TRestComponentDataSet::ExtractParameterizationNodes. Dataset has not been initialized!"
391 auto GetUniqueElements = [](
const std::vector<double>& vec) {
392 std::set<double> uniqueSet(vec.begin(), vec.end());
393 return std::vector<double>(uniqueSet.begin(), uniqueSet.end());
399 std::vector<double> uniqueVec = GetUniqueElements(*parValues);
400 vs.insert(vs.end(), uniqueVec.begin(), uniqueVec.end());
421 std::vector<Int_t> stats;
422 if (!IsDataSetLoaded()) {
423 RESTError <<
"TRestComponentDataSet::ExtractNodeStatistics. Dataset has not been initialized!"
428 RESTInfo <<
"Counting statistics for each node ..." <<
RESTendl;
438 RESTInfo <<
"Total entries for " <<
fParameter <<
":" << p <<
" = " << *nEv <<
RESTendl;
444 RESTWarning <<
"The number of requested samples (" <<
fSamples
445 <<
") is higher than the number of dataset entries (" << *nEv <<
")" <<
RESTendl;
447 RESTInfo <<
"Samples to be used for " <<
fParameter <<
":" << p <<
" = " << *nEv <<
RESTendl;
448 stats.push_back(*nEv);
464 RESTInfo <<
"Loading datasets" <<
RESTendl;
466 std::vector<std::string> fullFileNames;
474 if (fileName.empty()) {
475 RESTError <<
"TRestComponentDataSet::LoadDataSet. Error loading file : " << name <<
RESTendl;
476 RESTError <<
"Does the file exist?" <<
RESTendl;
477 RESTError <<
"You may use `<globals> <searchPath ...` to indicate the path location" <<
RESTendl;
480 fullFileNames.push_back(fileName);
490 RESTError <<
"Problem loading dataset from file list :" <<
RESTendl;
499 RESTInfo <<
"Filling histograms" <<
RESTendl;
515 if (std::count(cNames.begin(), cNames.end(), var) == 0) {
516 RESTError <<
"Variable ---> " << var <<
" <--- NOT found on dataset" <<
RESTendl;
530 if (!
isANumber(var) && std::count(cNames.begin(), cNames.end(), var) == 0) {
531 RESTError <<
"Weight ---> " << var <<
" <--- NOT found on dataset" <<
RESTendl;
543 if (!IsDataSetLoaded()) {
544 RESTWarning <<
"TRestComponentDataSet::ValidDataSet. Dataset has not been loaded" <<
RESTendl;
545 RESTWarning <<
"Try calling TRestComponentDataSet::Initialize()" <<
RESTendl;
547 RESTInfo <<
"Trying to load datasets" <<
RESTendl;
549 if (IsDataSetLoaded()) {
552 RESTError <<
"Failed loading datasets" <<
RESTendl;
558 RESTError <<
"TRestComponentDataSet::ValidDataSet. Active node has not been defined" <<
RESTendl;
It defines a background/signal model distribution in a given parameter space (tipically x,...
long long unsigned int fSplitEntries
It helps to split large datasets when extracting the parameterization nodes.
Bool_t ValidDataSet()
Takes care of initializing datasets if have not been initialized. On sucess it returns true.
TRestDataSet fDataSet
The dataset used to initialize the distribution.
void PrintMetadata() override
Prints on screen the information about the metadata members of TRestAxionSolarFlux.
Bool_t fDataSetLoaded
It is true of the dataset was loaded without issues.
TVector2 fDFRange
It creates a sample subset using a range definition.
void FillHistograms() override
It will produce a histogram with the distribution defined using the variables and the weights for eac...
std::vector< Int_t > fNSimPerNode
std::vector< std::string > fWeights
A list with the dataset columns used to weight the distribution density and define rate.
Bool_t LoadDataSets()
A method responsible to import a list of TRestDataSet into fDataSet and check that the variables and ...
Bool_t VariablesOk()
It returns true if all variables have been found inside TRestDataSet.
void PrintStatistics()
It prints out the statistics available for each parametric node.
TRestComponentDataSet()
Default constructor.
~TRestComponentDataSet()
Default destructor.
void RegenerateActiveNodeDensity() override
It will regenerate the density histogram for the active node. It is practical in the case when the nu...
std::vector< Int_t > fTotalSamples
It defines the total number of entries for each parameterization node (Initialized by the dataset)
std::vector< Double_t > ExtractParameterizationNodes()
It returns a vector with all the different values found on the dataset column for the user given para...
void Initialize() override
It will initialize the data frame with the filelist and column names (or observables) that have been ...
Bool_t WeightsOk()
It returns true if all weights have been found inside TRestDataSet.
std::vector< std::string > fDataSetFileNames
The filename of the dataset used.
std::vector< Int_t > ExtractNodeStatistics()
It returns a vector with the number of entries found for each parameterization node.
void InitFromConfigFile() override
It customizes the retrieval of XML data values of this class.
It defines a background/signal model distribution in a given parameter space (tipically x,...
void InitFromConfigFile() override
It customizes the retrieval of XML data values of this class.
void PrintMetadata() override
Prints on screen the information about the metadata members of TRestAxionSolarFlux.
Int_t fActiveNode
It is used to define the node that will be accessed for rate retrieval.
Int_t fSamples
It introduces a fixed number of samples (if 0 it will take all available samples)
std::string fParameter
It is used to parameterize a set of distribution densities (e.g. WIMP or axion mass)
Float_t fPrecision
A precision used to select the node value with a given range defined as a fraction of the value.
std::vector< Int_t > fNbins
The number of bins in which we should divide each variable.
void Initialize() override
It initializes the random number. We avoid to define the section name here since we will never define...
std::vector< TVector2 > fRanges
The range of each of the variables used to create the PDF distribution.
std::vector< Double_t > fParameterizationNodes
It defines the nodes of the parameterization (Initialized by the dataset)
Bool_t HasNodes()
It returns true if any nodes have been defined.
TRandom3 * fRandom
Internal process random generator.
std::vector< std::string > fVariables
A list with the branches that will be used to create the distribution space.
std::vector< THnD * > fNodeDensity
The generated N-dimensional variable space density for a given node.
void PrintMetadata() override
Prints on screen the information about the metadata members of TRestDataSet.
void Import(const std::string &fileName)
This function imports metadata from a root file it import metadata info from the previous dataSet whi...
ROOT::RDF::RNode Range(size_t from, size_t to)
This method returns a RDataFrame node with the number of samples inside the dataset by selecting a ra...
size_t GetEntries()
It returns the number of entries found inside fDataFrame and prints out a warning if the number of en...
ROOT::RDF::RNode GetDataFrame() const
Gives access to the RDataFrame.
TTree * GetTree() const
Gives access to the tree.
ROOT::RDF::RNode ApplyRange(size_t from, size_t to)
This method reduces the number of samples inside the dataset by selecting a range.
@ REST_Info
+show most of the information for each steps
std::string DoubleToString(Double_t d, std::string format="%8.6e")
Gets a string from a double.
Int_t isANumber(std::string in)
Returns 1 only if a valid number is found in the string in. If not it returns 0.