REST-for-Physics  v2.3
Rare Event Searches ToolKit for Physics
Loading...
Searching...
No Matches
TRestDataSet.cxx
1/*************************************************************************
2 * This file is part of the REST software framework. *
3 * *
4 * Copyright (C) 2016 GIFNA/TREX (University of Zaragoza) *
5 * For more information see https://gifna.unizar.es/trex *
6 * *
7 * REST is free software: you can redistribute it and/or modify *
8 * it under the terms of the GNU General Public License as published by *
9 * the Free Software Foundation, either version 3 of the License, or *
10 * (at your option) any later version. *
11 * *
12 * REST is distributed in the hope that it will be useful, *
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
15 * GNU General Public License for more details. *
16 * *
17 * You should have a copy of the GNU General Public License along with *
18 * REST in $REST_PATH/LICENSE. *
19 * If not, see https://www.gnu.org/licenses/. *
20 * For the list of contributors see $REST_PATH/CREDITS. *
21 *************************************************************************/
22
293#include "TRestDataSet.h"
294
295#include "TRestRun.h"
296#include "TRestTools.h"
297
298ClassImp(TRestDataSet);
299
304
319TRestDataSet::TRestDataSet(const char* cfgFileName, const std::string& name) : TRestMetadata(cfgFileName) {
321}
322
327
332void TRestDataSet::Initialize() { SetSectionName(this->ClassName()); }
333
339 if (fTree != nullptr) {
340 RESTWarning << "Tree has already been loaded. Skipping TRestDataSet::GenerateDataSet ... "
341 << RESTendl;
342 return;
343 }
344
345 if (fFileSelection.empty()) FileSelection();
346
347 // We are not ready yet
348 if (fFileSelection.empty()) {
349 RESTError << "File selection is empty " << RESTendl;
350 return;
351 }
352
354 TRestRun run(fFileSelection.front());
355 std::vector<std::string> finalList;
356 finalList.push_back("runOrigin");
357 finalList.push_back("eventID");
358 finalList.push_back("timeStamp");
359
360 auto obsNames = run.GetAnalysisTree()->GetObservableNames();
361 for (const auto& obs : fObservablesList) {
362 if (std::find(obsNames.begin(), obsNames.end(), obs) != obsNames.end()) {
363 finalList.push_back(obs);
364 } else {
365 RESTWarning << " Observable " << obs << " not found in observable list, skipping..." << RESTendl;
366 }
367 }
368
369 for (const auto& name : obsNames) {
370 for (const auto& pcs : fProcessObservablesList) {
371 if (name.find(pcs) == 0) finalList.push_back(name);
372 }
373 }
374
375 // Remove duplicated observables if any
376 std::sort(finalList.begin(), finalList.end());
377 finalList.erase(std::unique(finalList.begin(), finalList.end()), finalList.end());
378
379 if (fMT)
380 ROOT::EnableImplicitMT();
381 else
382 ROOT::DisableImplicitMT();
383
384 RESTInfo << "Initializing dataset" << RESTendl;
385 fDataFrame = ROOT::RDataFrame("AnalysisTree", fFileSelection);
386
387 RESTInfo << "Making cuts" << RESTendl;
389
390 // Adding new user columns added to the dataset
391 for (const auto& [cName, cExpression] : fColumnNameExpressions) {
392 RESTInfo << "Adding column to dataset: " << cName << RESTendl;
393 finalList.emplace_back(cName);
394 fDataFrame = DefineColumn(cName, cExpression);
395 }
396
397 RegenerateTree(finalList);
398
399 RESTInfo << " - Dataset generated!" << RESTendl;
400}
401
405void TRestDataSet::RegenerateTree(std::vector<std::string> finalList) {
406 RESTInfo << "Generating snapshot." << RESTendl;
407 std::string user = getenv("USER");
408 std::string fOutName = "/tmp/rest_output_" + user + ".root";
409 if (!finalList.empty())
410 fDataFrame.Snapshot("AnalysisTree", fOutName, finalList);
411 else
412 fDataFrame.Snapshot("AnalysisTree", fOutName);
413
414 RESTInfo << "Re-importing analysis tree." << RESTendl;
415 fDataFrame = ROOT::RDataFrame("AnalysisTree", fOutName);
416
417 TFile* f = TFile::Open(fOutName.c_str());
418 fTree = (TChain*)f->Get("AnalysisTree");
419}
420
424std::vector<std::string> TRestDataSet::FileSelection() {
425 fFileSelection.clear();
426
427 std::time_t time_stamp_start = REST_StringHelper::StringToTimeStamp(fFilterStartTime);
428 std::time_t time_stamp_end = REST_StringHelper::StringToTimeStamp(fFilterEndTime);
429
430 if (!time_stamp_end || !time_stamp_start) {
431 RESTError << "TRestDataSet::FileSelect. Start or end dates not properly formed. Please, check "
432 "REST_StringHelper::StringToTimeStamp documentation for valid formats"
433 << RESTendl;
434 return fFileSelection;
435 }
436
437 std::vector<std::string> fileNames = TRestTools::GetFilesMatchingPattern(fFilePattern);
438
439 RESTInfo << "TRestDataSet::FileSelection. Starting file selection." << RESTendl;
440 RESTInfo << "Total files : " << fileNames.size() << RESTendl;
441 RESTInfo << "This process may take long computation time in case there are many files." << RESTendl;
442
443 fTotalDuration = 0;
444 std::cout << "Processing file selection.";
445 int cnt = 1;
446 for (const auto& file : fileNames) {
447 if (cnt % 100 == 0) {
448 std::cout << std::endl;
449 std::cout << "Files processed: " << cnt << " ." << std::flush;
450 }
451 cnt++;
452 TRestRun run(file);
453 std::cout << "." << std::flush;
454 double runStart = run.GetStartTimestamp();
455 double runEnd = run.GetEndTimestamp();
456
457 if (runStart < time_stamp_start || runEnd > time_stamp_end) {
458 RESTInfo << "Rejecting file out of date range: " << file << RESTendl;
459 continue;
460 }
461
462 int n = 0;
463 bool accept = true;
464 for (const auto& md : fFilterMetadata) {
465 std::string mdValue = run.GetMetadataMember(md);
466
467 if (!fFilterContains[n].empty())
468 if (mdValue.find(fFilterContains[n]) == std::string::npos) accept = false;
469
470 if (fFilterGreaterThan[n] != -1) {
471 if (StringToDouble(mdValue) <= fFilterGreaterThan[n]) accept = false;
472 }
473
474 if (fFilterLowerThan[n] != -1)
475 if (StringToDouble(mdValue) >= fFilterLowerThan[n]) accept = false;
476
477 if (fFilterEqualsTo[n] != -1)
478 if (StringToDouble(mdValue) != fFilterEqualsTo[n]) accept = false;
479
480 n++;
481 }
482
483 if (!accept) continue;
484
485 Double_t acc = 0;
486 for (auto& [name, properties] : fQuantity) {
487 std::string value = run.ReplaceMetadataMembers(properties.metadata);
488 const Double_t val = REST_StringHelper::StringToDouble(value);
489
490 if (properties.strategy == "accumulate") {
491 acc += val;
492 properties.value = StringWithPrecision(val, 2);
493 }
494
495 if (properties.strategy == "max")
496 if (properties.value.empty() || REST_StringHelper::StringToDouble(properties.value) < val)
497 properties.value = value;
498
499 if (properties.strategy == "min")
500 if (properties.value.empty() || REST_StringHelper::StringToDouble(properties.value) > val)
501 properties.value = value;
502
503 if (properties.strategy == "unique") {
504 if (properties.value.empty())
505 properties.value = value;
506 else if (properties.value != value) {
507 RESTWarning << "TRestDataSet::FileSelection. Relevant quantity retrieval." << RESTendl;
508 RESTWarning << "A unique metadata member used for the `" << name
509 << "` quantity is not unique!!" << RESTendl;
510 RESTWarning << "Pre-registered value : " << properties.value << " New value : " << value
511 << RESTendl;
512 }
513 }
514
515 if (properties.strategy == "last") properties.value = value;
516 }
517
518 if (run.GetStartTimestamp() < fStartTime) fStartTime = run.GetStartTimestamp();
519
520 if (run.GetEndTimestamp() > fEndTime) fEndTime = run.GetEndTimestamp();
521
522 fTotalDuration += run.GetEndTimestamp() - run.GetStartTimestamp();
523 fFileSelection.push_back(file);
524 }
525 std::cout << std::endl;
526
527 return fFileSelection;
528}
529
536ROOT::RDF::RNode TRestDataSet::Range(size_t from, size_t to) { return fDataFrame.Range(from, to); }
537
542ROOT::RDF::RNode TRestDataSet::ApplyRange(size_t from, size_t to) {
543 fDataFrame = fDataFrame.Range(from, to);
545 return fDataFrame;
546}
547
554ROOT::RDF::RNode TRestDataSet::MakeCut(const TRestCut* cut) {
555 auto df = fDataFrame;
556
557 if (cut == nullptr) return df;
558
559 auto paramCut = cut->GetParamCut();
560 auto obsList = df.GetColumnNames();
561 for (const auto& [param, condition] : paramCut) {
562 if (std::find(obsList.begin(), obsList.end(), param) != obsList.end()) {
563 std::string pCut = param + condition;
564 RESTDebug << "Applying cut " << pCut << RESTendl;
565 df = df.Filter(pCut);
566 } else {
567 RESTWarning << " Cut observable " << param << " not found in observable list, skipping..."
568 << RESTendl;
569 }
570 }
571
572 auto cutString = cut->GetCutStrings();
573 for (const auto& pCut : cutString) {
574 bool added = false;
575 for (const auto& obs : obsList) {
576 if (pCut.find(obs) != std::string::npos) {
577 RESTDebug << "Applying cut " << pCut << RESTendl;
578 df = df.Filter(pCut);
579 added = true;
580 break;
581 }
582 }
583
584 if (!added) {
585 RESTWarning << " Cut string " << pCut << " not found in observable list, skipping..." << RESTendl;
586 }
587 }
588
589 return df;
590}
591
598 auto nEntries = fDataFrame.Count();
599 if (*nEntries == (long long unsigned int)GetTree()->GetEntries()) return *nEntries;
600 RESTWarning << "TRestDataSet::GetEntries. Number of tree entries is not the same as RDataFrame entries."
601 << RESTendl;
602 RESTWarning << "Returning RDataFrame entries" << RESTendl;
603 return *nEntries;
604}
605
618ROOT::RDF::RNode TRestDataSet::DefineColumn(const std::string& columnName, const std::string& formula) {
619 auto df = fDataFrame;
620
621 std::string evalFormula = formula;
622 for (auto const& [name, properties] : fQuantity)
623 evalFormula = REST_StringHelper::Replace(evalFormula, name, properties.value);
624
625 df = df.Define(columnName, evalFormula);
626
627 return df;
628}
629
635
636 RESTMetadata << " - StartTime : " << REST_StringHelper::ToDateTimeString(fStartTime) << RESTendl;
637 RESTMetadata << " - EndTime : " << REST_StringHelper::ToDateTimeString(fEndTime) << RESTendl;
638 RESTMetadata << " - Path : " << TRestTools::SeparatePathAndName(fFilePattern).first << RESTendl;
639 RESTMetadata << " - File pattern : " << TRestTools::SeparatePathAndName(fFilePattern).second << RESTendl;
640 RESTMetadata << " " << RESTendl;
641 RESTMetadata << " - Accumulated run time (seconds) : " << fTotalDuration << RESTendl;
642 RESTMetadata << " - Accumulated run time (hours) : " << fTotalDuration / 3600. << RESTendl;
643 RESTMetadata << " - Accumulated run time (days) : " << fTotalDuration / 3600. / 24. << RESTendl;
644
645 RESTMetadata << " " << RESTendl;
646
647 if (!fObservablesList.empty()) {
648 RESTMetadata << " Single observables added:" << RESTendl;
649 RESTMetadata << " -------------------------" << RESTendl;
650 for (const auto& l : fObservablesList) RESTMetadata << " - " << l << RESTendl;
651
652 RESTMetadata << " " << RESTendl;
653 }
654
655 if (!fProcessObservablesList.empty()) {
656 RESTMetadata << " Process observables added: " << RESTendl;
657 RESTMetadata << " -------------------------- " << RESTendl;
658 for (const auto& l : fProcessObservablesList) RESTMetadata << " - " << l << RESTendl;
659
660 RESTMetadata << " " << RESTendl;
661 }
662
663 if (!fFilterMetadata.empty()) {
664 RESTMetadata << " Metadata filters: " << RESTendl;
665 RESTMetadata << " ----------------- " << RESTendl;
666 RESTMetadata << " - StartTime : " << fFilterStartTime << RESTendl;
667 RESTMetadata << " - EndTime : " << fFilterEndTime << RESTendl;
668 int n = 0;
669 for (const auto& mdFilter : fFilterMetadata) {
670 RESTMetadata << " - " << mdFilter << ".";
671
672 if (!fFilterContains[n].empty()) RESTMetadata << " Contains: " << fFilterContains[n];
673 if (fFilterGreaterThan[n] != -1) RESTMetadata << " Greater than: " << fFilterGreaterThan[n];
674 if (fFilterLowerThan[n] != -1) RESTMetadata << " Lower than: " << fFilterLowerThan[n];
675 if (fFilterEqualsTo[n] != -1) RESTMetadata << " Equals to: " << fFilterEqualsTo[n];
676
677 RESTMetadata << RESTendl;
678 n++;
679 }
680
681 RESTMetadata << " " << RESTendl;
682 }
683
684 if (!fQuantity.empty()) {
685 RESTMetadata << " Relevant quantities: " << RESTendl;
686 RESTMetadata << " -------------------- " << RESTendl;
687
688 for (auto const& [name, properties] : fQuantity) {
689 RESTMetadata << " - Name : " << name << ". Value : " << properties.value
690 << ". Strategy: " << properties.strategy << RESTendl;
691 RESTMetadata << " - Metadata: " << properties.metadata << RESTendl;
692 RESTMetadata << " - Description: " << properties.description << RESTendl;
693 RESTMetadata << " " << RESTendl;
694 }
695 }
696
697 if (!fColumnNameExpressions.empty()) {
698 RESTMetadata << " New columns added to generated dataframe: " << RESTendl;
699 RESTMetadata << " ---------------------------------------- " << RESTendl;
700 for (const auto& [cName, cExpression] : fColumnNameExpressions) {
701 RESTMetadata << " - Name : " << cName << RESTendl;
702 RESTMetadata << " - Expression: " << cExpression << RESTendl;
703 RESTMetadata << " " << RESTendl;
704 }
705 }
706
707 if (fMergedDataset) {
708 RESTMetadata << " " << RESTendl;
709 RESTMetadata << "This is a combined dataset." << RESTendl;
710 RESTMetadata << " -------------------- " << RESTendl;
711 RESTMetadata << " - Relevant quantities have been removed!" << RESTendl;
712 RESTMetadata << " - Dataset metadata properties correspond to the first file in the list."
713 << RESTendl;
714 RESTMetadata << " " << RESTendl;
715 RESTMetadata << "List of imported files: " << RESTendl;
716 RESTMetadata << " -------------------- " << RESTendl;
717 for (const auto& fn : fImportedFiles) RESTMetadata << " - " << fn << RESTendl;
718 }
719
720 RESTMetadata << " " << RESTendl;
721 if (fMT)
722 RESTMetadata << " - Multithreading was enabled" << RESTendl;
723 else
724 RESTMetadata << " - Multithreading was NOT enabled" << RESTendl;
725
726 RESTMetadata << "----" << RESTendl;
727}
728
734
736 TiXmlElement* filterDefinition = GetElement("filter");
737 while (filterDefinition != nullptr) {
738 std::string metadata = GetFieldValue("metadata", filterDefinition);
739 if (metadata.empty() || metadata == "Not defined") {
740 RESTError << "Filter key defined without metadata member!" << RESTendl;
741 exit(1);
742 }
743
744 fFilterMetadata.push_back(metadata);
745
746 std::string contains = GetFieldValue("contains", filterDefinition);
747 if (contains == "Not defined") contains = "";
748 Double_t greaterThan = StringToDouble(GetFieldValue("greaterThan", filterDefinition));
749 Double_t lowerThan = StringToDouble(GetFieldValue("lowerThan", filterDefinition));
750 Double_t equalsTo = StringToDouble(GetFieldValue("equalsTo", filterDefinition));
751
752 fFilterContains.push_back(contains);
753 fFilterGreaterThan.push_back(greaterThan);
754 fFilterLowerThan.push_back(lowerThan);
755 fFilterEqualsTo.push_back(equalsTo);
756
757 filterDefinition = GetNextElement(filterDefinition);
758 }
759
761 TiXmlElement* observablesDefinition = GetElement("observables");
762 while (observablesDefinition != nullptr) {
763 std::string observables = GetFieldValue("list", observablesDefinition);
764 if (observables.empty() || observables == "Not defined") {
765 RESTError << "<observables key does not contain a list!" << RESTendl;
766 exit(1);
767 }
768
769 std::vector<std::string> obsList = REST_StringHelper::Split(observables, ",");
770
771 fObservablesList.insert(fObservablesList.end(), obsList.begin(), obsList.end());
772
773 observablesDefinition = GetNextElement(observablesDefinition);
774 }
775
777 TiXmlElement* obsProcessDefinition = GetElement("processObservables");
778 while (obsProcessDefinition != nullptr) {
779 std::string observables = GetFieldValue("list", obsProcessDefinition);
780 if (observables.empty() || observables == "Not defined") {
781 RESTError << "<processObservables key does not contain a list!" << RESTendl;
782 exit(1);
783 }
784
785 std::vector<std::string> obsList = REST_StringHelper::Split(observables, ",");
786
787 for (const auto& l : obsList) fProcessObservablesList.push_back(l);
788
789 obsProcessDefinition = GetNextElement(obsProcessDefinition);
790 }
791
793 TiXmlElement* quantityDefinition = GetElement("quantity");
794 while (quantityDefinition != nullptr) {
795 std::string name = GetFieldValue("name", quantityDefinition);
796 if (name.empty() || name == "Not defined") {
797 RESTError << "<quantity key does not contain a name!" << RESTendl;
798 exit(1);
799 }
800
801 std::string metadata = GetFieldValue("metadata", quantityDefinition);
802 if (metadata.empty() || metadata == "Not defined") {
803 RESTError << "<quantity key does not contain a metadata value!" << RESTendl;
804 exit(1);
805 }
806
807 std::string strategy = GetFieldValue("strategy", quantityDefinition);
808 if (strategy.empty() || strategy == "Not defined") {
809 strategy = "unique";
810 }
811
812 std::string description = GetFieldValue("description", quantityDefinition);
813
814 RelevantQuantity quantity;
815 quantity.metadata = metadata;
816 quantity.strategy = strategy;
817 quantity.description = description;
818 quantity.value = "";
819
820 fQuantity[name] = quantity;
821
822 quantityDefinition = GetNextElement(quantityDefinition);
823 }
824
826 TiXmlElement* columnDefinition = GetElement("addColumn");
827 while (columnDefinition != nullptr) {
828 std::string name = GetFieldValue("name", columnDefinition);
829 if (name.empty() || name == "Not defined") {
830 RESTError << "<define key does not contain a name name!" << RESTendl;
831 exit(1);
832 }
833
834 std::string expression = GetFieldValue("expression", columnDefinition);
835 if (expression.empty() || expression == "Not defined") {
836 RESTError << "<addColumn key does not contain a expression value!" << RESTendl;
837 exit(1);
838 }
839
840 fColumnNameExpressions.push_back({name, expression});
841
842 columnDefinition = GetNextElement(columnDefinition);
843 }
844
845 fCut = (TRestCut*)InstantiateChildMetadata("TRestCut");
846}
847
861void TRestDataSet::Export(const std::string& filename, std::vector<std::string> excludeColumns) {
862 RESTInfo << "Exporting dataset" << RESTendl;
863
864 std::vector<std::string> columns = fDataFrame.GetColumnNames();
865 if (!excludeColumns.empty()) {
866 columns.erase(std::remove_if(columns.begin(), columns.end(),
867 [&excludeColumns](std::string elem) {
868 return std::find(excludeColumns.begin(), excludeColumns.end(),
869 elem) != excludeColumns.end();
870 }),
871 columns.end());
872
873 RESTInfo << "Re-Generating snapshot." << RESTendl;
874 std::string user = getenv("USER");
875 std::string fOutName = "/tmp/rest_output_" + user + ".root";
876 fDataFrame.Snapshot("AnalysisTree", fOutName, columns);
877
878 RESTInfo << "Re-importing analysis tree." << RESTendl;
879 fDataFrame = ROOT::RDataFrame("AnalysisTree", fOutName);
880
881 TFile* f = TFile::Open(fOutName.c_str());
882 fTree = (TChain*)f->Get("AnalysisTree");
883 }
884
885 if (TRestTools::GetFileNameExtension(filename) == "txt" ||
886 TRestTools::GetFileNameExtension(filename) == "csv") {
887 if (excludeColumns.empty()) {
888 RESTInfo << "Re-Generating snapshot." << RESTendl;
889 std::string user = getenv("USER");
890 std::string fOutName = "/tmp/rest_output_" + user + ".root";
891 fDataFrame.Snapshot("AnalysisTree", fOutName);
892
893 TFile* f = TFile::Open(fOutName.c_str());
894 fTree = (TChain*)f->Get("AnalysisTree");
895 }
896
897 std::vector<std::string> dataTypes;
898 for (int n = 0; n < fTree->GetListOfBranches()->GetEntries(); n++) {
899 std::string bName = fTree->GetListOfBranches()->At(n)->GetName();
900 std::string type = fTree->GetLeaf((TString)bName)->GetTypeName();
901 dataTypes.push_back(type);
902 if (type != "Double_t" && type != "Int_t") {
903 RESTError << "Branch name : " << bName << " is type : " << type << RESTendl;
904 RESTError << "Only Int_t and Double_t types are allowed for "
905 "exporting to ASCII table"
906 << RESTendl;
907 RESTError << "File will not be generated" << RESTendl;
908 return;
909 }
910 }
911
912 FILE* f = fopen(filename.c_str(), "wt");
914 fprintf(f, "### TRestDataSet generated file\n");
915 fprintf(f, "### \n");
916 fprintf(f, "### StartTime : %s\n", fFilterStartTime.c_str());
917 fprintf(f, "### EndTime : %s\n", fFilterEndTime.c_str());
918 fprintf(f, "###\n");
919 fprintf(f, "### Accumulated run time (seconds) : %lf\n", fTotalDuration);
920 fprintf(f, "### Accumulated run time (hours) : %lf\n", fTotalDuration / 3600.);
921 fprintf(f, "### Accumulated run time (days) : %lf\n", fTotalDuration / 3600. / 24.);
922 fprintf(f, "###\n");
923 fprintf(f, "### Data path : %s\n", TRestTools::SeparatePathAndName(fFilePattern).first.c_str());
924 fprintf(f, "### File pattern : %s\n", TRestTools::SeparatePathAndName(fFilePattern).second.c_str());
925 fprintf(f, "###\n");
926 if (!fFilterMetadata.empty()) {
927 fprintf(f, "### Metadata filters : \n");
928 int n = 0;
929 for (const auto& md : fFilterMetadata) {
930 fprintf(f, "### - %s.", md.c_str());
931 if (!fFilterContains[n].empty()) fprintf(f, " Contains: %s.", fFilterContains[n].c_str());
932 if (fFilterGreaterThan[n] != -1) fprintf(f, " Greater than: %6.3lf.", fFilterGreaterThan[n]);
933 if (fFilterLowerThan[n] != -1) fprintf(f, " Lower than: %6.3lf.", fFilterLowerThan[n]);
934 if (fFilterEqualsTo[n] != -1) fprintf(f, " Equals to: %6.3lf.", fFilterLowerThan[n]);
935 fprintf(f, "\n");
936 n++;
937 }
938 }
939 fprintf(f, "###\n");
940 fprintf(f, "### Relevant quantities: \n");
941 for (auto& [name, properties] : fQuantity) {
942 fprintf(f, "### - %s : %s - %s\n", name.c_str(), properties.value.c_str(),
943 properties.description.c_str());
944 }
945 fprintf(f, "###\n");
946 fprintf(f, "### Observables list: ");
947 for (int n = 0; n < fTree->GetListOfBranches()->GetEntries(); n++) {
948 std::string bName = fTree->GetListOfBranches()->At(n)->GetName();
949 fprintf(f, " %s", bName.c_str());
950 }
951 fprintf(f, "\n");
952 fprintf(f, "###\n");
953 fprintf(f, "### Data starts here\n");
954
955 auto obsNames = fDataFrame.GetColumnNames();
956 std::string obsListStr = "";
957 for (const auto& l : obsNames) {
958 if (!obsListStr.empty()) obsListStr += ":";
959 obsListStr += l;
960 }
961
962 // We do this so that later we can recover the values using TTree::GetVal
963 fTree->Draw((TString)obsListStr, "", "goff");
964
965 for (unsigned int n = 0; n < fTree->GetEntries(); n++) {
966 for (unsigned int m = 0; m < GetNumberOfBranches(); m++) {
967 std::string bName = fTree->GetListOfBranches()->At(m)->GetName();
968 if (m > 0) fprintf(f, "\t");
969 if (dataTypes[m] == "Double_t")
970 if (bName == "timeStamp")
971 fprintf(f, "%010.0lf", fTree->GetVal(m)[n]);
972 else
973 fprintf(f, "%05.3e", fTree->GetVal(m)[n]);
974 else
975 fprintf(f, "%06d", (Int_t)fTree->GetVal(m)[n]);
976 }
977 fprintf(f, "\n");
978 }
979 fclose(f);
980
981 return;
982 } else if (TRestTools::GetFileNameExtension(filename) == "root") {
983 fDataFrame.Snapshot("AnalysisTree", filename);
984
985 TFile* f = TFile::Open(filename.c_str(), "UPDATE");
986 std::string name = this->GetName();
987 if (name.empty()) name = "mock";
988 this->Write(name.c_str());
989 f->Close();
990 } else {
991 RESTWarning << "TRestDataSet::Export. Extension " << TRestTools::GetFileNameExtension(filename)
992 << " not recognized" << RESTendl;
993 }
994 RESTInfo << "Dataset generated: " << filename << RESTendl;
995}
996
1001 SetName(dS.GetName());
1002 fFilterStartTime = dS.GetFilterStartTime();
1003 fFilterEndTime = dS.GetFilterEndTime();
1004 fStartTime = dS.GetStartTime();
1005 fEndTime = dS.GetEndTime();
1006 fFilePattern = dS.GetFilePattern();
1007 fObservablesList = dS.GetObservablesList();
1009 fProcessObservablesList = dS.GetProcessObservablesList();
1010 fFilterMetadata = dS.GetFilterMetadata();
1011 fFilterContains = dS.GetFilterContains();
1012 fFilterGreaterThan = dS.GetFilterGreaterThan();
1013 fFilterLowerThan = dS.GetFilterLowerThan();
1014 fFilterEqualsTo = dS.GetFilterEqualsTo();
1015 fQuantity = dS.GetQuantity();
1016 fColumnNameExpressions = dS.GetAddedColumns();
1018 fCut = dS.GetCut();
1019
1020 return *this;
1021}
1022
1028 auto obsNames = GetObservablesList();
1029 for (const auto& obs : fObservablesList) {
1030 if (std::find(obsNames.begin(), obsNames.end(), obs) != obsNames.end()) {
1031 RESTError << "Cannot merge dataSets with different observable list " << RESTendl;
1032 return false;
1033 }
1034 }
1035
1036 if (fStartTime > dS.GetStartTime()) fStartTime = dS.GetStartTime();
1037 if (fEndTime < dS.GetEndTime()) fEndTime = dS.GetEndTime();
1038
1039 auto fileSelection = dS.GetFileSelection();
1040 fFileSelection.insert(fFileSelection.end(), fileSelection.begin(), fileSelection.end());
1041
1043
1044 return true;
1045}
1046
1052void TRestDataSet::Import(const std::string& fileName) {
1053 if (TRestTools::GetFileNameExtension(fileName) != "root") {
1054 RESTError << "Datasets can only be imported from root files" << RESTendl;
1055 return;
1056 }
1057
1058 TRestDataSet* dS = nullptr;
1059 TFile* file = TFile::Open(fileName.c_str(), "READ");
1060 if (file != nullptr) {
1061 TIter nextkey(file->GetListOfKeys());
1062 TKey* key;
1063 while ((key = (TKey*)nextkey())) {
1064 std::string kName = key->GetClassName();
1065 if (REST_Reflection::GetClassQuick(kName.c_str()) != nullptr &&
1066 REST_Reflection::GetClassQuick(kName.c_str())->InheritsFrom("TRestDataSet")) {
1067 dS = file->Get<TRestDataSet>(key->GetName());
1068 *this = *dS;
1069 }
1070 }
1071 }
1072
1073 if (dS == nullptr) {
1074 RESTError << fileName << " is not a valid dataSet" << RESTendl;
1075 return;
1076 }
1077
1078 if (fMT)
1079 ROOT::EnableImplicitMT();
1080 else
1081 ROOT::DisableImplicitMT();
1082
1083 fDataFrame = ROOT::RDataFrame("AnalysisTree", fileName);
1084
1085 fTree = (TChain*)file->Get("AnalysisTree");
1086}
1087
1097void TRestDataSet::Import(std::vector<std::string> fileNames) {
1098 for (const auto& fN : fileNames)
1099 if (TRestTools::GetFileNameExtension(fN) != "root") {
1100 RESTError << "Datasets can only be imported from root files" << RESTendl;
1101 return;
1102 }
1103
1104 int count = 0;
1105 auto it = fileNames.begin();
1106 while (it != fileNames.end()) {
1107 std::string fileName = *it;
1108 TFile* file = TFile::Open(fileName.c_str(), "READ");
1109 bool isValid = false;
1110 if (file != nullptr) {
1111 TIter nextkey(file->GetListOfKeys());
1112 TKey* key;
1113 while ((key = (TKey*)nextkey())) {
1114 std::string kName = key->GetClassName();
1115 if (REST_Reflection::GetClassQuick(kName.c_str()) != nullptr &&
1116 REST_Reflection::GetClassQuick(kName.c_str())->InheritsFrom("TRestDataSet")) {
1117 TRestDataSet* dS = file->Get<TRestDataSet>(key->GetName());
1119 dS->PrintMetadata();
1120
1121 if (count == 0) {
1122 *this = *dS;
1123 isValid = true;
1124 } else {
1125 isValid = Merge(*dS);
1126 }
1127
1128 if (isValid) count++;
1129 }
1130 }
1131 } else {
1132 RESTError << "Cannot open " << fileName << RESTendl;
1133 }
1134
1135 if (!isValid) {
1136 RESTError << fileName << " is not a valid dataSet skipping..." << RESTendl;
1137 it = fileNames.erase(it);
1138 } else {
1139 ++it;
1140 }
1141 }
1142
1143 if (fileNames.empty()) {
1144 RESTError << "File selection is empty, dataSet will not be imported " << RESTendl;
1145 return;
1146 }
1147
1148 RESTInfo << "Opening list of files. First file: " << fileNames[0] << RESTendl;
1149 fDataFrame = ROOT::RDataFrame("AnalysisTree", fileNames);
1150
1151 if (fTree != nullptr) {
1152 delete fTree;
1153 fTree = nullptr;
1154 }
1155 fTree = new TChain("AnalysisTree");
1156
1157 for (const auto& fN : fileNames) fTree->Add((TString)fN);
1158
1159 fMergedDataset = true;
1160 fImportedFiles = fileNames;
1161
1162 fQuantity.clear();
1163}
std::vector< std::string > GetObservableNames()
It returns a vector with strings containing all the observables that exist in the analysis tree.
A class to help on cuts definitions. To be used with TRestAnalysisTree.
Definition TRestCut.h:31
It allows to group a number of runs that satisfy given metadata conditions.
std::vector< std::string > fFilterContains
If not empty it will check if the metadata member contains the string.
virtual std::vector< std::string > FileSelection()
Function to determine the filenames that satisfy the dataset conditions.
std::vector< Double_t > fFilterLowerThan
If the corresponding element is not empty it will check if the metadata member is lower.
void PrintMetadata() override
Prints on screen the information about the metadata members of TRestDataSet.
TChain * fTree
A pointer to the generated tree.
std::vector< std::string > fProcessObservablesList
It contains a list of the process where all observables should be added.
void Import(const std::string &fileName)
This function imports metadata from a root file it import metadata info from the previous dataSet whi...
std::map< std::string, RelevantQuantity > fQuantity
The properties of a relevant quantity that we want to store together with the dataset.
ROOT::RDF::RNode Range(size_t from, size_t to)
This method returns a RDataFrame node with the number of samples inside the dataset by selecting a ra...
std::vector< std::pair< std::string, std::string > > fColumnNameExpressions
A list of new columns together with its corresponding expressions added to the dataset.
ROOT::RDF::RNode DefineColumn(const std::string &columnName, const std::string &formula)
This function will add a new column to the RDataFrame using the same scheme as the usual RDF::Define ...
Double_t fEndTime
TimeStamp for the end time of the last file.
ROOT::RDF::RNode fDataFrame
The resulting RDF::RNode object after initialization.
size_t GetNumberOfBranches()
Number of variables (or observables)
size_t GetEntries()
It returns the number of entries found inside fDataFrame and prints out a warning if the number of en...
TRestDataSet()
Default constructor.
Double_t GetTotalTimeInSeconds() const
It returns the accumulated run time in seconds.
ROOT::RDF::RNode MakeCut(const TRestCut *cut)
This function applies a TRestCut to the dataframe and returns a dataframe with the applied cuts....
void GenerateDataSet()
This function generates the data frame with the filelist and column names (or observables) that have ...
Bool_t fMT
A flag to enable Multithreading during dataframe generation.
TRestCut * fCut
Parameter cuts over the selected dataset.
void Export(const std::string &filename, std::vector< std::string > excludeColumns={})
It will generate an output file with the dataset compilation. Only the selected branches and the file...
std::string fFilterStartTime
All the selected runs will have a starting date after fStartTime.
Bool_t Merge(const TRestDataSet &dS)
This function merge different TRestDataSet metadata in current dataSet.
std::vector< std::string > fFilterMetadata
A list of metadata members where filters will be applied.
std::vector< std::string > fFileSelection
A list populated by the FileSelection method using the conditions of the dataset.
std::vector< std::string > GetFileSelection()
It returns a list of the files that have been finally selected.
std::string fFilterEndTime
All the selected runs will have an ending date before fEndTime.
Double_t fStartTime
TimeStamp for the start time of the first file.
std::vector< std::string > fObservablesList
It contains a list of the observables that will be added to the final tree or exported file.
TTree * GetTree() const
Gives access to the tree.
Bool_t fMergedDataset
It keeps track if the generated dataset is a pure dataset or a merged one.
void Initialize() override
This function initialize different parameters from the TRestDataSet.
void RegenerateTree(std::vector< std::string > finalList={})
It regenerates the tree so that it is an exact copy of the present DataFrame.
std::vector< std::string > fImportedFiles
The list of dataset files imported.
Double_t fTotalDuration
The total integrated run time of selected files.
std::string fFilePattern
A glob file pattern that must be satisfied by all files.
std::vector< Double_t > fFilterGreaterThan
If the corresponding element is not empty it will check if the metadata member is greater.
ROOT::RDF::RNode ApplyRange(size_t from, size_t to)
This method reduces the number of samples inside the dataset by selecting a range.
std::vector< Double_t > fFilterEqualsTo
If the corresponding element is not empty it will check if the metadata member is equal.
void InitFromConfigFile() override
Initialization of specific TRestDataSet members through an RML file.
TRestDataSet & operator=(TRestDataSet &dS)
Operator to copy TRestDataSet metadata.
~TRestDataSet()
Default destructor.
A base class for any REST metadata class.
virtual void PrintMetadata()
Implemented it in the derived metadata class to print out specific metadata information.
endl_t RESTendl
Termination flag object for TRestStringOutput.
TiXmlElement * GetElement(std::string eleDeclare, TiXmlElement *e=nullptr)
Get an xml element from a given parent element, according to its declaration.
Int_t LoadConfigFromFile(const std::string &configFilename, const std::string &sectionName="")
Give the file name, find out the corresponding section. Then call the main starter.
TRestMetadata * InstantiateChildMetadata(int index, std::string pattern="")
This method will retrieve a new TRestMetadata instance of a child element of the present TRestMetadat...
virtual void InitFromConfigFile()
To make settings from rml file. This method must be implemented in the derived class.
TRestStringOutput::REST_Verbose_Level GetVerboseLevel()
returns the verboselevel in type of REST_Verbose_Level enumerator
std::string GetFieldValue(std::string parName, TiXmlElement *e)
Returns the field value of an xml element which has the specified name.
void SetSectionName(std::string sName)
set the section name, clear the section content
std::string fConfigFileName
Full name of the rml file.
virtual Int_t Write(const char *name=nullptr, Int_t option=0, Int_t bufsize=0)
overwriting the write() method with fStore considered
TiXmlElement * GetNextElement(TiXmlElement *e)
Get the next sibling xml element of this element, with same eleDeclare.
Data provider and manager in REST.
Definition TRestRun.h:18
std::string ReplaceMetadataMembers(const std::string &instr, Int_t precision=8)
It will replace the data members contained inside the string given as input. The data members in the ...
@ REST_Info
+show most of the information for each steps
static std::pair< std::string, std::string > SeparatePathAndName(const std::string &fullname)
Separate path and filename in a full path+filename string, returns a pair of string.
static std::string GetFileNameExtension(const std::string &fullname)
Gets the file extension as the substring found after the latest ".".
static std::vector< std::string > GetFilesMatchingPattern(std::string pattern, bool unlimited=false)
Returns a list of files whose name match the pattern string. Key word is "*". e.g....
TClass * GetClassQuick()
Get the type of a "class" object, returning the wrapped type identifier "TClass".
time_t StringToTimeStamp(std::string time)
A method to convert a date/time formatted string to a timestamp.
std::vector< std::string > Split(std::string in, std::string separator, bool allowBlankString=false, bool removeWhiteSpaces=false, int startPos=-1)
Split the input string according to the given separator. Returning a vector of fragments.
Double_t StringToDouble(std::string in)
Gets a double from a string.
std::string ToDateTimeString(time_t time)
Format time_t into string.
std::string Replace(std::string in, std::string thisString, std::string byThisString, size_t fromPosition=0, Int_t N=0)
Replace any occurences of thisSring by byThisString inside string in.
std::string metadata
The associated metadata member used to register the relevant quantity.
std::string description
A user given description that can be used to define the relevant quantity.
std::string strategy
It determines how to produce the relevant quantity (accumulate/unique/last/max/min)
std::string value
The quantity value.