REST-for-Physics  v2.3
Rare Event Searches ToolKit for Physics
Loading...
Searching...
No Matches
TRestDataSet.cxx
1/*************************************************************************
2 * This file is part of the REST software framework. *
3 * *
4 * Copyright (C) 2016 GIFNA/TREX (University of Zaragoza) *
5 * For more information see https://gifna.unizar.es/trex *
6 * *
7 * REST is free software: you can redistribute it and/or modify *
8 * it under the terms of the GNU General Public License as published by *
9 * the Free Software Foundation, either version 3 of the License, or *
10 * (at your option) any later version. *
11 * *
12 * REST is distributed in the hope that it will be useful, *
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
15 * GNU General Public License for more details. *
16 * *
17 * You should have a copy of the GNU General Public License along with *
18 * REST in $REST_PATH/LICENSE. *
19 * If not, see https://www.gnu.org/licenses/. *
20 * For the list of contributors see $REST_PATH/CREDITS. *
21 *************************************************************************/
22
300#include "TRestDataSet.h"
301
302#include "TRestRun.h"
303#include "TRestTools.h"
304
305ClassImp(TRestDataSet);
306
311
326TRestDataSet::TRestDataSet(const char* cfgFileName, const std::string& name) : TRestMetadata(cfgFileName) {
328}
329
334
339void TRestDataSet::Initialize() { SetSectionName(this->ClassName()); }
340
346 if (fTree != nullptr) {
347 RESTWarning << "Tree has already been loaded. Skipping TRestDataSet::GenerateDataSet ... "
348 << RESTendl;
349 return;
350 }
351
352 if (fFileSelection.empty()) FileSelection();
353
354 // We are not ready yet
355 if (fFileSelection.empty()) {
356 RESTError << "File selection is empty " << RESTendl;
357 return;
358 }
359
361 TRestRun run(fFileSelection.front());
362 std::set<std::string> finalList;
363 finalList.insert("runOrigin");
364 finalList.insert("eventID");
365 finalList.insert("timeStamp");
366
367 auto obsNames = run.GetAnalysisTree()->GetObservableNames();
368 auto obsFromList = TRestTools::GetMatchingStrings(obsNames, fObservablesList);
369 finalList.insert(obsFromList.begin(), obsFromList.end());
370
371 if (fMT)
372 ROOT::EnableImplicitMT();
373 else
374 ROOT::DisableImplicitMT();
375
376 RESTInfo << "Initializing dataset" << RESTendl;
377 fDataFrame = ROOT::RDataFrame("AnalysisTree", fFileSelection);
378
379 RESTInfo << "Making cuts" << RESTendl;
381
382 // Adding new user columns added to the dataset
383 for (const auto& [cName, cExpression] : fColumnNameExpressions) {
384 RESTInfo << "Adding column to dataset: " << cName << RESTendl;
385 finalList.emplace(cName);
386 fDataFrame = DefineColumn(cName, cExpression);
387 }
388
389 RegenerateTree(std::vector<std::string>(finalList.begin(), finalList.end()));
390
391 RESTInfo << " - Dataset generated!" << RESTendl;
392}
393
397void TRestDataSet::RegenerateTree(std::vector<std::string> finalList) {
398 RESTInfo << "Generating snapshot." << RESTendl;
399 std::string user = getenv("USER");
400 std::string fOutName = "/tmp/rest_output_" + user + ".root";
401 if (!finalList.empty())
402 fDataFrame.Snapshot("AnalysisTree", fOutName, finalList);
403 else
404 fDataFrame.Snapshot("AnalysisTree", fOutName);
405
406 RESTInfo << "Re-importing analysis tree." << RESTendl;
407 fDataFrame = ROOT::RDataFrame("AnalysisTree", fOutName);
408
409 TFile* f = TFile::Open(fOutName.c_str());
410 fTree = (TChain*)f->Get("AnalysisTree");
411}
412
416std::vector<std::string> TRestDataSet::FileSelection() {
417 fFileSelection.clear();
418
419 std::time_t time_stamp_start = REST_StringHelper::StringToTimeStamp(fFilterStartTime);
420 std::time_t time_stamp_end = REST_StringHelper::StringToTimeStamp(fFilterEndTime);
421
422 if (!time_stamp_end || !time_stamp_start) {
423 RESTError << "TRestDataSet::FileSelect. Start or end dates not properly formed. Please, check "
424 "REST_StringHelper::StringToTimeStamp documentation for valid formats"
425 << RESTendl;
426 return fFileSelection;
427 }
428
429 std::vector<std::string> fileNames = TRestTools::GetFilesMatchingPattern(fFilePattern);
430
431 RESTInfo << "TRestDataSet::FileSelection. Starting file selection." << RESTendl;
432 RESTInfo << "Total files : " << fileNames.size() << RESTendl;
433 RESTInfo << "This process may take long computation time in case there are many files." << RESTendl;
434
435 fTotalDuration = 0;
436 std::cout << "Processing file selection.";
437 int cnt = 1;
438 for (const auto& file : fileNames) {
439 if (cnt % 100 == 0) {
440 std::cout << std::endl;
441 std::cout << "Files processed: " << cnt << " ." << std::flush;
442 }
443 cnt++;
444 TRestRun run(file);
445 std::cout << "." << std::flush;
446 double runStart = run.GetStartTimestamp();
447 double runEnd = run.GetEndTimestamp();
448
449 if (runStart < time_stamp_start || runEnd > time_stamp_end) {
450 RESTInfo << "Rejecting file out of date range: " << file << RESTendl;
451 continue;
452 }
453
454 int n = 0;
455 bool accept = true;
456 for (const auto& md : fFilterMetadata) {
457 std::string mdValue = run.GetMetadataMember(md);
458
459 if (!fFilterContains[n].empty())
460 if (mdValue.find(fFilterContains[n]) == std::string::npos) accept = false;
461
462 if (fFilterGreaterThan[n] != -1) {
463 if (StringToDouble(mdValue) <= fFilterGreaterThan[n]) accept = false;
464 }
465
466 if (fFilterLowerThan[n] != -1)
467 if (StringToDouble(mdValue) >= fFilterLowerThan[n]) accept = false;
468
469 if (fFilterEqualsTo[n] != -1)
470 if (StringToDouble(mdValue) != fFilterEqualsTo[n]) accept = false;
471
472 n++;
473 }
474
475 if (!accept) continue;
476
477 Double_t acc = 0;
478 for (auto& [name, properties] : fQuantity) {
479 std::string value = run.ReplaceMetadataMembers(properties.metadata);
480 const Double_t val = REST_StringHelper::StringToDouble(value);
481
482 if (properties.strategy == "accumulate") {
483 acc += val;
484 properties.value = StringWithPrecision(val, 2);
485 }
486
487 if (properties.strategy == "max")
488 if (properties.value.empty() || REST_StringHelper::StringToDouble(properties.value) < val)
489 properties.value = value;
490
491 if (properties.strategy == "min")
492 if (properties.value.empty() || REST_StringHelper::StringToDouble(properties.value) > val)
493 properties.value = value;
494
495 if (properties.strategy == "unique") {
496 if (properties.value.empty())
497 properties.value = value;
498 else if (properties.value != value) {
499 RESTWarning << "TRestDataSet::FileSelection. Relevant quantity retrieval." << RESTendl;
500 RESTWarning << "A unique metadata member used for the `" << name
501 << "` quantity is not unique!!" << RESTendl;
502 RESTWarning << "Pre-registered value : " << properties.value << " New value : " << value
503 << RESTendl;
504 }
505 }
506
507 if (properties.strategy == "last") properties.value = value;
508
509 if (properties.strategy.find("append") != std::string::npos) {
510 // Get appender from "append" til the end of string. E.g. "append," -> "," or "append" -> ""
511 std::string appender = properties.strategy.substr(properties.strategy.find("append") + 6);
512 if (properties.value.empty())
513 properties.value = value;
514 else
515 // do not append if value already contains the value to append
516 if (properties.value.find(value) == std::string::npos)
517 properties.value += appender + value;
518 }
519
520 if (properties.strategy.find("extend") != std::string::npos) {
521 // Get appender from "extend" til the end of string. E.g. "extend," -> "," or "extend" -> ""
522 std::string appender = properties.strategy.substr(properties.strategy.find("extend") + 6);
523 if (properties.value.empty())
524 properties.value = value;
525 else
526 properties.value += appender + value;
527 }
528 }
529
530 if (run.GetStartTimestamp() < fStartTime) fStartTime = run.GetStartTimestamp();
531
532 if (run.GetEndTimestamp() > fEndTime) fEndTime = run.GetEndTimestamp();
533
534 fTotalDuration += run.GetEndTimestamp() - run.GetStartTimestamp();
535 fFileSelection.push_back(file);
536 }
537 std::cout << std::endl;
538
539 return fFileSelection;
540}
541
548ROOT::RDF::RNode TRestDataSet::Range(size_t from, size_t to) { return fDataFrame.Range(from, to); }
549
554ROOT::RDF::RNode TRestDataSet::ApplyRange(size_t from, size_t to) {
555 fDataFrame = fDataFrame.Range(from, to);
557 return fDataFrame;
558}
559
566ROOT::RDF::RNode TRestDataSet::MakeCut(const TRestCut* cut) {
567 auto df = fDataFrame;
568
569 if (cut == nullptr) return df;
570
571 auto paramCut = cut->GetParamCut();
572 auto obsList = df.GetColumnNames();
573 for (const auto& [param, condition] : paramCut) {
574 if (std::find(obsList.begin(), obsList.end(), param) != obsList.end()) {
575 std::string pCut = param + condition;
576 RESTDebug << "Applying cut " << pCut << RESTendl;
577 df = df.Filter(pCut);
578 } else {
579 RESTWarning << " Cut observable " << param << " not found in observable list, skipping..."
580 << RESTendl;
581 }
582 }
583
584 auto cutString = cut->GetCutStrings();
585 for (const auto& pCut : cutString) {
586 bool added = false;
587 for (const auto& obs : obsList) {
588 if (pCut.find(obs) != std::string::npos) {
589 RESTDebug << "Applying cut " << pCut << RESTendl;
590 df = df.Filter(pCut);
591 added = true;
592 break;
593 }
594 }
595
596 if (!added) {
597 RESTWarning << " Cut string " << pCut << " not found in observable list, skipping..." << RESTendl;
598 }
599 }
600
601 return df;
602}
603
610 auto nEntries = fDataFrame.Count();
611 if (*nEntries == (long long unsigned int)GetTree()->GetEntries()) return *nEntries;
612 RESTWarning << "TRestDataSet::GetEntries. Number of tree entries is not the same as RDataFrame entries."
613 << RESTendl;
614 RESTWarning << "Returning RDataFrame entries" << RESTendl;
615 return *nEntries;
616}
617
630ROOT::RDF::RNode TRestDataSet::DefineColumn(const std::string& columnName, const std::string& formula) {
631 auto df = fDataFrame;
632
633 std::string evalFormula = formula;
634 for (auto const& [name, properties] : fQuantity)
635 evalFormula = REST_StringHelper::Replace(evalFormula, name, properties.value);
636
637 df = df.Define(columnName, evalFormula);
638
639 return df;
640}
641
647
648 RESTMetadata << " - StartTime : " << REST_StringHelper::ToDateTimeString(fStartTime) << RESTendl;
649 RESTMetadata << " - EndTime : " << REST_StringHelper::ToDateTimeString(fEndTime) << RESTendl;
650 RESTMetadata << " - Path : " << TRestTools::SeparatePathAndName(fFilePattern).first << RESTendl;
651 RESTMetadata << " - File pattern : " << TRestTools::SeparatePathAndName(fFilePattern).second << RESTendl;
652 RESTMetadata << " " << RESTendl;
653 RESTMetadata << " - Accumulated run time (seconds) : " << fTotalDuration << RESTendl;
654 RESTMetadata << " - Accumulated run time (hours) : " << fTotalDuration / 3600. << RESTendl;
655 RESTMetadata << " - Accumulated run time (days) : " << fTotalDuration / 3600. / 24. << RESTendl;
656
657 RESTMetadata << " " << RESTendl;
658
659 if (!fObservablesList.empty()) {
660 RESTMetadata << " Observables added:" << RESTendl;
661 RESTMetadata << " -------------------------" << RESTendl;
662 for (const auto& l : fObservablesList) RESTMetadata << " - " << l << RESTendl;
663
664 RESTMetadata << " " << RESTendl;
665 }
666
667 if (!fFilterMetadata.empty()) {
668 RESTMetadata << " Metadata filters: " << RESTendl;
669 RESTMetadata << " ----------------- " << RESTendl;
670 RESTMetadata << " - StartTime : " << fFilterStartTime << RESTendl;
671 RESTMetadata << " - EndTime : " << fFilterEndTime << RESTendl;
672 int n = 0;
673 for (const auto& mdFilter : fFilterMetadata) {
674 RESTMetadata << " - " << mdFilter << ".";
675
676 if (!fFilterContains[n].empty()) RESTMetadata << " Contains: " << fFilterContains[n];
677 if (fFilterGreaterThan[n] != -1) RESTMetadata << " Greater than: " << fFilterGreaterThan[n];
678 if (fFilterLowerThan[n] != -1) RESTMetadata << " Lower than: " << fFilterLowerThan[n];
679 if (fFilterEqualsTo[n] != -1) RESTMetadata << " Equals to: " << fFilterEqualsTo[n];
680
681 RESTMetadata << RESTendl;
682 n++;
683 }
684
685 RESTMetadata << " " << RESTendl;
686 }
687
688 if (!fQuantity.empty()) {
689 RESTMetadata << " Relevant quantities: " << RESTendl;
690 RESTMetadata << " -------------------- " << RESTendl;
691
692 for (auto const& [name, properties] : fQuantity) {
693 RESTMetadata << " - Name : " << name << ". Value : " << properties.value
694 << ". Strategy: " << properties.strategy << RESTendl;
695 RESTMetadata << " - Metadata: " << properties.metadata << RESTendl;
696 RESTMetadata << " - Description: " << properties.description << RESTendl;
697 RESTMetadata << " " << RESTendl;
698 }
699 }
700
701 if (!fColumnNameExpressions.empty()) {
702 RESTMetadata << " New columns added to generated dataframe: " << RESTendl;
703 RESTMetadata << " ---------------------------------------- " << RESTendl;
704 for (const auto& [cName, cExpression] : fColumnNameExpressions) {
705 RESTMetadata << " - Name : " << cName << RESTendl;
706 RESTMetadata << " - Expression: " << cExpression << RESTendl;
707 RESTMetadata << " " << RESTendl;
708 }
709 }
710
711 if (fMergedDataset) {
712 RESTMetadata << " " << RESTendl;
713 RESTMetadata << "This is a combined dataset." << RESTendl;
714 RESTMetadata << " -------------------- " << RESTendl;
715 RESTMetadata << " - Relevant quantities have been removed!" << RESTendl;
716 RESTMetadata << " - Dataset metadata properties correspond to the first file in the list."
717 << RESTendl;
718 RESTMetadata << " " << RESTendl;
719 RESTMetadata << "List of imported files: " << RESTendl;
720 RESTMetadata << " -------------------- " << RESTendl;
721 for (const auto& fn : fImportedFiles) RESTMetadata << " - " << fn << RESTendl;
722 }
723
724 RESTMetadata << " " << RESTendl;
725 if (fMT)
726 RESTMetadata << " - Multithreading was enabled" << RESTendl;
727 else
728 RESTMetadata << " - Multithreading was NOT enabled" << RESTendl;
729
730 RESTMetadata << "----" << RESTendl;
731}
732
738
740 TiXmlElement* filterDefinition = GetElement("filter");
741 while (filterDefinition != nullptr) {
742 std::string metadata = GetFieldValue("metadata", filterDefinition);
743 if (metadata.empty() || metadata == "Not defined") {
744 RESTError << "Filter key defined without metadata member!" << RESTendl;
745 exit(1);
746 }
747
748 fFilterMetadata.push_back(metadata);
749
750 std::string contains = GetFieldValue("contains", filterDefinition);
751 if (contains == "Not defined") contains = "";
752 Double_t greaterThan = StringToDouble(GetFieldValue("greaterThan", filterDefinition));
753 Double_t lowerThan = StringToDouble(GetFieldValue("lowerThan", filterDefinition));
754 Double_t equalsTo = StringToDouble(GetFieldValue("equalsTo", filterDefinition));
755
756 fFilterContains.push_back(contains);
757 fFilterGreaterThan.push_back(greaterThan);
758 fFilterLowerThan.push_back(lowerThan);
759 fFilterEqualsTo.push_back(equalsTo);
760
761 filterDefinition = GetNextElement(filterDefinition);
762 }
763
765 TiXmlElement* observablesDefinition = GetElement("observables");
766 while (observablesDefinition != nullptr) {
767 std::string observables = GetFieldValue("list", observablesDefinition);
768 if (observables.empty() || observables == "Not defined") {
769 RESTError << "<observables key does not contain a list!" << RESTendl;
770 exit(1);
771 }
772
773 std::vector<std::string> obsList = REST_StringHelper::Split(observables, ",");
774
775 fObservablesList.insert(fObservablesList.end(), obsList.begin(), obsList.end());
776
777 observablesDefinition = GetNextElement(observablesDefinition);
778 }
779
781 TiXmlElement* obsProcessDefinition = GetElement("processObservables");
782 while (obsProcessDefinition != nullptr) {
783 std::string observables = GetFieldValue("list", obsProcessDefinition);
784 if (observables.empty() || observables == "Not defined") {
785 RESTError << "<processObservables key does not contain a list!" << RESTendl;
786 exit(1);
787 }
788
789 std::vector<std::string> obsList = REST_StringHelper::Split(observables, ",");
790
791 for (const auto& l : obsList) {
792 std::string processObsPattern = l + "_*";
793 fObservablesList.push_back(processObsPattern);
794 }
795
796 obsProcessDefinition = GetNextElement(obsProcessDefinition);
797 }
798
800 TiXmlElement* quantityDefinition = GetElement("quantity");
801 while (quantityDefinition != nullptr) {
802 std::string name = GetFieldValue("name", quantityDefinition);
803 if (name.empty() || name == "Not defined") {
804 RESTError << "<quantity key does not contain a name!" << RESTendl;
805 exit(1);
806 }
807
808 std::string metadata = GetFieldValue("metadata", quantityDefinition);
809 if (metadata.empty() || metadata == "Not defined") {
810 RESTError << "<quantity key does not contain a metadata value!" << RESTendl;
811 exit(1);
812 }
813
814 std::string strategy = GetFieldValue("strategy", quantityDefinition);
815 if (strategy.empty() || strategy == "Not defined") {
816 strategy = "unique";
817 }
818
819 std::string description = GetFieldValue("description", quantityDefinition);
820
821 RelevantQuantity quantity;
822 quantity.metadata = metadata;
823 quantity.strategy = strategy;
824 quantity.description = description;
825 quantity.value = "";
826
827 fQuantity[name] = quantity;
828
829 quantityDefinition = GetNextElement(quantityDefinition);
830 }
831
833 TiXmlElement* columnDefinition = GetElement("addColumn");
834 while (columnDefinition != nullptr) {
835 std::string name = GetFieldValue("name", columnDefinition);
836 if (name.empty() || name == "Not defined") {
837 RESTError << "<define key does not contain a name name!" << RESTendl;
838 exit(1);
839 }
840
841 std::string expression = GetFieldValue("expression", columnDefinition);
842 if (expression.empty() || expression == "Not defined") {
843 RESTError << "<addColumn key does not contain a expression value!" << RESTendl;
844 exit(1);
845 }
846
847 fColumnNameExpressions.push_back({name, expression});
848
849 columnDefinition = GetNextElement(columnDefinition);
850 }
851
852 fCut = (TRestCut*)InstantiateChildMetadata("TRestCut");
853}
854
868void TRestDataSet::Export(const std::string& filename, std::vector<std::string> excludeColumns) {
869 RESTInfo << "Exporting dataset" << RESTendl;
870
871 std::vector<std::string> columns = fDataFrame.GetColumnNames();
872 if (!excludeColumns.empty()) {
873 columns.erase(std::remove_if(columns.begin(), columns.end(),
874 [&excludeColumns](std::string elem) {
875 return std::find(excludeColumns.begin(), excludeColumns.end(),
876 elem) != excludeColumns.end();
877 }),
878 columns.end());
879
880 RESTInfo << "Re-Generating snapshot." << RESTendl;
881 std::string user = getenv("USER");
882 std::string fOutName = "/tmp/rest_output_" + user + ".root";
883 fDataFrame.Snapshot("AnalysisTree", fOutName, columns);
884
885 RESTInfo << "Re-importing analysis tree." << RESTendl;
886 fDataFrame = ROOT::RDataFrame("AnalysisTree", fOutName);
887
888 TFile* f = TFile::Open(fOutName.c_str());
889 fTree = (TChain*)f->Get("AnalysisTree");
890 }
891
892 if (TRestTools::GetFileNameExtension(filename) == "txt" ||
893 TRestTools::GetFileNameExtension(filename) == "csv") {
894 if (excludeColumns.empty()) {
895 RESTInfo << "Re-Generating snapshot." << RESTendl;
896 std::string user = getenv("USER");
897 std::string fOutName = "/tmp/rest_output_" + user + ".root";
898 fDataFrame.Snapshot("AnalysisTree", fOutName);
899
900 TFile* f = TFile::Open(fOutName.c_str());
901 fTree = (TChain*)f->Get("AnalysisTree");
902 }
903
904 std::vector<std::string> dataTypes;
905 for (int n = 0; n < fTree->GetListOfBranches()->GetEntries(); n++) {
906 std::string bName = fTree->GetListOfBranches()->At(n)->GetName();
907 std::string type = fTree->GetLeaf((TString)bName)->GetTypeName();
908 dataTypes.push_back(type);
909 if (type != "Double_t" && type != "Int_t") {
910 RESTError << "Branch name : " << bName << " is type : " << type << RESTendl;
911 RESTError << "Only Int_t and Double_t types are allowed for "
912 "exporting to ASCII table"
913 << RESTendl;
914 RESTError << "File will not be generated" << RESTendl;
915 return;
916 }
917 }
918
919 FILE* f = fopen(filename.c_str(), "wt");
921 fprintf(f, "### TRestDataSet generated file\n");
922 fprintf(f, "### \n");
923 fprintf(f, "### StartTime : %s\n", fFilterStartTime.c_str());
924 fprintf(f, "### EndTime : %s\n", fFilterEndTime.c_str());
925 fprintf(f, "###\n");
926 fprintf(f, "### Accumulated run time (seconds) : %lf\n", fTotalDuration);
927 fprintf(f, "### Accumulated run time (hours) : %lf\n", fTotalDuration / 3600.);
928 fprintf(f, "### Accumulated run time (days) : %lf\n", fTotalDuration / 3600. / 24.);
929 fprintf(f, "###\n");
930 fprintf(f, "### Data path : %s\n", TRestTools::SeparatePathAndName(fFilePattern).first.c_str());
931 fprintf(f, "### File pattern : %s\n", TRestTools::SeparatePathAndName(fFilePattern).second.c_str());
932 fprintf(f, "###\n");
933 if (!fFilterMetadata.empty()) {
934 fprintf(f, "### Metadata filters : \n");
935 int n = 0;
936 for (const auto& md : fFilterMetadata) {
937 fprintf(f, "### - %s.", md.c_str());
938 if (!fFilterContains[n].empty()) fprintf(f, " Contains: %s.", fFilterContains[n].c_str());
939 if (fFilterGreaterThan[n] != -1) fprintf(f, " Greater than: %6.3lf.", fFilterGreaterThan[n]);
940 if (fFilterLowerThan[n] != -1) fprintf(f, " Lower than: %6.3lf.", fFilterLowerThan[n]);
941 if (fFilterEqualsTo[n] != -1) fprintf(f, " Equals to: %6.3lf.", fFilterLowerThan[n]);
942 fprintf(f, "\n");
943 n++;
944 }
945 }
946 fprintf(f, "###\n");
947 fprintf(f, "### Relevant quantities: \n");
948 for (auto& [name, properties] : fQuantity) {
949 fprintf(f, "### - %s : %s - %s\n", name.c_str(), properties.value.c_str(),
950 properties.description.c_str());
951 }
952 fprintf(f, "###\n");
953 fprintf(f, "### Observables list: ");
954 for (int n = 0; n < fTree->GetListOfBranches()->GetEntries(); n++) {
955 std::string bName = fTree->GetListOfBranches()->At(n)->GetName();
956 fprintf(f, " %s", bName.c_str());
957 }
958 fprintf(f, "\n");
959 fprintf(f, "###\n");
960 fprintf(f, "### Data starts here\n");
961
962 auto obsNames = fDataFrame.GetColumnNames();
963 std::string obsListStr = "";
964 for (const auto& l : obsNames) {
965 if (!obsListStr.empty()) obsListStr += ":";
966 obsListStr += l;
967 }
968
969 // We do this so that later we can recover the values using TTree::GetVal
970 fTree->Draw((TString)obsListStr, "", "goff");
971
972 for (unsigned int n = 0; n < fTree->GetEntries(); n++) {
973 for (unsigned int m = 0; m < GetNumberOfBranches(); m++) {
974 std::string bName = fTree->GetListOfBranches()->At(m)->GetName();
975 if (m > 0) fprintf(f, "\t");
976 if (dataTypes[m] == "Double_t")
977 if (bName == "timeStamp")
978 fprintf(f, "%010.0lf", fTree->GetVal(m)[n]);
979 else
980 fprintf(f, "%05.3e", fTree->GetVal(m)[n]);
981 else
982 fprintf(f, "%06d", (Int_t)fTree->GetVal(m)[n]);
983 }
984 fprintf(f, "\n");
985 }
986 fclose(f);
987
988 return;
989 } else if (TRestTools::GetFileNameExtension(filename) == "root") {
990 fDataFrame.Snapshot("AnalysisTree", filename);
991
992 TFile* f = TFile::Open(filename.c_str(), "UPDATE");
993 std::string name = this->GetName();
994 if (name.empty()) name = "mock";
995 this->Write(name.c_str());
996 f->Close();
997 } else {
998 RESTWarning << "TRestDataSet::Export. Extension " << TRestTools::GetFileNameExtension(filename)
999 << " not recognized" << RESTendl;
1000 }
1001 RESTInfo << "Dataset generated: " << filename << RESTendl;
1002}
1003
1008 SetName(dS.GetName());
1009 fFilterStartTime = dS.GetFilterStartTime();
1010 fFilterEndTime = dS.GetFilterEndTime();
1011 fStartTime = dS.GetStartTime();
1012 fEndTime = dS.GetEndTime();
1013 fFilePattern = dS.GetFilePattern();
1014 fObservablesList = dS.GetObservablesList();
1016 fFilterMetadata = dS.GetFilterMetadata();
1017 fFilterContains = dS.GetFilterContains();
1018 fFilterGreaterThan = dS.GetFilterGreaterThan();
1019 fFilterLowerThan = dS.GetFilterLowerThan();
1020 fFilterEqualsTo = dS.GetFilterEqualsTo();
1021 fQuantity = dS.GetQuantity();
1022 fColumnNameExpressions = dS.GetAddedColumns();
1024 fCut = dS.GetCut();
1025
1026 return *this;
1027}
1028
1034 auto obsNames = GetObservablesList();
1035 for (const auto& obs : fObservablesList) {
1036 if (std::find(obsNames.begin(), obsNames.end(), obs) != obsNames.end()) {
1037 RESTError << "Cannot merge dataSets with different observable list " << RESTendl;
1038 return false;
1039 }
1040 }
1041
1042 if (fStartTime > dS.GetStartTime()) fStartTime = dS.GetStartTime();
1043 if (fEndTime < dS.GetEndTime()) fEndTime = dS.GetEndTime();
1044
1045 auto fileSelection = dS.GetFileSelection();
1046 fFileSelection.insert(fFileSelection.end(), fileSelection.begin(), fileSelection.end());
1047
1049
1050 return true;
1051}
1052
1058void TRestDataSet::Import(const std::string& fileName) {
1059 if (TRestTools::GetFileNameExtension(fileName) != "root") {
1060 RESTError << "Datasets can only be imported from root files" << RESTendl;
1061 return;
1062 }
1063
1064 TRestDataSet* dS = nullptr;
1065 TFile* file = TFile::Open(fileName.c_str(), "READ");
1066 if (file != nullptr) {
1067 TIter nextkey(file->GetListOfKeys());
1068 TKey* key;
1069 while ((key = (TKey*)nextkey())) {
1070 std::string kName = key->GetClassName();
1071 if (REST_Reflection::GetClassQuick(kName.c_str()) != nullptr &&
1072 REST_Reflection::GetClassQuick(kName.c_str())->InheritsFrom("TRestDataSet")) {
1073 dS = file->Get<TRestDataSet>(key->GetName());
1074 *this = *dS;
1075 }
1076 }
1077 }
1078
1079 if (dS == nullptr) {
1080 RESTError << fileName << " is not a valid dataSet" << RESTendl;
1081 return;
1082 }
1083
1084 if (fMT)
1085 ROOT::EnableImplicitMT();
1086 else
1087 ROOT::DisableImplicitMT();
1088
1089 fDataFrame = ROOT::RDataFrame("AnalysisTree", fileName);
1090
1091 fTree = (TChain*)file->Get("AnalysisTree");
1092}
1093
1103void TRestDataSet::Import(std::vector<std::string> fileNames) {
1104 for (const auto& fN : fileNames)
1105 if (TRestTools::GetFileNameExtension(fN) != "root") {
1106 RESTError << "Datasets can only be imported from root files" << RESTendl;
1107 return;
1108 }
1109
1110 int count = 0;
1111 auto it = fileNames.begin();
1112 while (it != fileNames.end()) {
1113 std::string fileName = *it;
1114 TFile* file = TFile::Open(fileName.c_str(), "READ");
1115 bool isValid = false;
1116 if (file != nullptr) {
1117 TIter nextkey(file->GetListOfKeys());
1118 TKey* key;
1119 while ((key = (TKey*)nextkey())) {
1120 std::string kName = key->GetClassName();
1121 if (REST_Reflection::GetClassQuick(kName.c_str()) != nullptr &&
1122 REST_Reflection::GetClassQuick(kName.c_str())->InheritsFrom("TRestDataSet")) {
1123 TRestDataSet* dS = file->Get<TRestDataSet>(key->GetName());
1125 dS->PrintMetadata();
1126
1127 if (count == 0) {
1128 *this = *dS;
1129 isValid = true;
1130 } else {
1131 isValid = Merge(*dS);
1132 }
1133
1134 if (isValid) count++;
1135 }
1136 }
1137 } else {
1138 RESTError << "Cannot open " << fileName << RESTendl;
1139 }
1140
1141 if (!isValid) {
1142 RESTError << fileName << " is not a valid dataSet skipping..." << RESTendl;
1143 it = fileNames.erase(it);
1144 } else {
1145 ++it;
1146 }
1147 }
1148
1149 if (fileNames.empty()) {
1150 RESTError << "File selection is empty, dataSet will not be imported " << RESTendl;
1151 return;
1152 }
1153
1154 RESTInfo << "Opening list of files. First file: " << fileNames[0] << RESTendl;
1155 fDataFrame = ROOT::RDataFrame("AnalysisTree", fileNames);
1156
1157 if (fTree != nullptr) {
1158 delete fTree;
1159 fTree = nullptr;
1160 }
1161 fTree = new TChain("AnalysisTree");
1162
1163 for (const auto& fN : fileNames) fTree->Add((TString)fN);
1164
1165 fMergedDataset = true;
1166 fImportedFiles = fileNames;
1167
1168 fQuantity.clear();
1169}
std::vector< std::string > GetObservableNames()
It returns a vector with strings containing all the observables that exist in the analysis tree.
A class to help on cuts definitions. To be used with TRestAnalysisTree.
Definition TRestCut.h:31
It allows to group a number of runs that satisfy given metadata conditions.
std::vector< std::string > fFilterContains
If not empty it will check if the metadata member contains the string.
virtual std::vector< std::string > FileSelection()
Function to determine the filenames that satisfy the dataset conditions.
std::vector< Double_t > fFilterLowerThan
If the corresponding element is not empty it will check if the metadata member is lower.
void PrintMetadata() override
Prints on screen the information about the metadata members of TRestDataSet.
TChain * fTree
A pointer to the generated tree.
void Import(const std::string &fileName)
This function imports metadata from a root file it import metadata info from the previous dataSet whi...
std::map< std::string, RelevantQuantity > fQuantity
The properties of a relevant quantity that we want to store together with the dataset.
ROOT::RDF::RNode Range(size_t from, size_t to)
This method returns a RDataFrame node with the number of samples inside the dataset by selecting a ra...
std::vector< std::pair< std::string, std::string > > fColumnNameExpressions
A list of new columns together with its corresponding expressions added to the dataset.
ROOT::RDF::RNode DefineColumn(const std::string &columnName, const std::string &formula)
This function will add a new column to the RDataFrame using the same scheme as the usual RDF::Define ...
Double_t fEndTime
TimeStamp for the end time of the last file.
ROOT::RDF::RNode fDataFrame
The resulting RDF::RNode object after initialization.
size_t GetNumberOfBranches()
Number of variables (or observables)
size_t GetEntries()
It returns the number of entries found inside fDataFrame and prints out a warning if the number of en...
TRestDataSet()
Default constructor.
Double_t GetTotalTimeInSeconds() const
It returns the accumulated run time in seconds.
ROOT::RDF::RNode MakeCut(const TRestCut *cut)
This function applies a TRestCut to the dataframe and returns a dataframe with the applied cuts....
void GenerateDataSet()
This function generates the data frame with the filelist and column names (or observables) that have ...
Bool_t fMT
A flag to enable Multithreading during dataframe generation.
TRestCut * fCut
Parameter cuts over the selected dataset.
void Export(const std::string &filename, std::vector< std::string > excludeColumns={})
It will generate an output file with the dataset compilation. Only the selected branches and the file...
std::string fFilterStartTime
All the selected runs will have a starting date after fStartTime.
Bool_t Merge(const TRestDataSet &dS)
This function merge different TRestDataSet metadata in current dataSet.
std::vector< std::string > fFilterMetadata
A list of metadata members where filters will be applied.
std::vector< std::string > fFileSelection
A list populated by the FileSelection method using the conditions of the dataset.
std::vector< std::string > GetFileSelection()
It returns a list of the files that have been finally selected.
std::string fFilterEndTime
All the selected runs will have an ending date before fEndTime.
Double_t fStartTime
TimeStamp for the start time of the first file.
std::vector< std::string > fObservablesList
It contains a list of the observables that will be added to the final tree or exported file.
TTree * GetTree() const
Gives access to the tree.
Bool_t fMergedDataset
It keeps track if the generated dataset is a pure dataset or a merged one.
void Initialize() override
This function initialize different parameters from the TRestDataSet.
void RegenerateTree(std::vector< std::string > finalList={})
It regenerates the tree so that it is an exact copy of the present DataFrame.
std::vector< std::string > fImportedFiles
The list of dataset files imported.
Double_t fTotalDuration
The total integrated run time of selected files.
std::string fFilePattern
A glob file pattern that must be satisfied by all files.
std::vector< Double_t > fFilterGreaterThan
If the corresponding element is not empty it will check if the metadata member is greater.
ROOT::RDF::RNode ApplyRange(size_t from, size_t to)
This method reduces the number of samples inside the dataset by selecting a range.
std::vector< Double_t > fFilterEqualsTo
If the corresponding element is not empty it will check if the metadata member is equal.
void InitFromConfigFile() override
Initialization of specific TRestDataSet members through an RML file.
TRestDataSet & operator=(TRestDataSet &dS)
Operator to copy TRestDataSet metadata.
~TRestDataSet()
Default destructor.
A base class for any REST metadata class.
virtual void PrintMetadata()
Implemented it in the derived metadata class to print out specific metadata information.
endl_t RESTendl
Termination flag object for TRestStringOutput.
TiXmlElement * GetElement(std::string eleDeclare, TiXmlElement *e=nullptr)
Get an xml element from a given parent element, according to its declaration.
Int_t LoadConfigFromFile(const std::string &configFilename, const std::string &sectionName="")
Give the file name, find out the corresponding section. Then call the main starter.
TRestMetadata * InstantiateChildMetadata(int index, std::string pattern="")
This method will retrieve a new TRestMetadata instance of a child element of the present TRestMetadat...
virtual void InitFromConfigFile()
To make settings from rml file. This method must be implemented in the derived class.
TRestStringOutput::REST_Verbose_Level GetVerboseLevel()
returns the verboselevel in type of REST_Verbose_Level enumerator
std::string GetFieldValue(std::string parName, TiXmlElement *e)
Returns the field value of an xml element which has the specified name.
void SetSectionName(std::string sName)
set the section name, clear the section content
std::string fConfigFileName
Full name of the rml file.
virtual Int_t Write(const char *name=nullptr, Int_t option=0, Int_t bufsize=0)
overwriting the write() method with fStore considered
TiXmlElement * GetNextElement(TiXmlElement *e)
Get the next sibling xml element of this element, with same eleDeclare.
Data provider and manager in REST.
Definition TRestRun.h:18
std::string ReplaceMetadataMembers(const std::string &instr, Int_t precision=8)
It will replace the data members contained inside the string given as input. The data members in the ...
@ REST_Info
+show most of the information for each steps
static std::pair< std::string, std::string > SeparatePathAndName(const std::string &fullname)
Separate path and filename in a full path+filename string, returns a pair of string.
static std::string GetFileNameExtension(const std::string &fullname)
Gets the file extension as the substring found after the latest ".".
static std::set< std::string > GetMatchingStrings(const std::vector< std::string > &stack, const std::vector< std::string > &wantedStrings)
Returns a set of strings that match the wanted strings from the stack. The wanted strings can contain...
static std::vector< std::string > GetFilesMatchingPattern(std::string pattern, bool unlimited=false)
Returns a list of files whose name match the pattern string. Key word is "*". e.g....
TClass * GetClassQuick()
Get the type of a "class" object, returning the wrapped type identifier "TClass".
time_t StringToTimeStamp(std::string time)
A method to convert a date/time formatted string to a timestamp.
std::vector< std::string > Split(std::string in, std::string separator, bool allowBlankString=false, bool removeWhiteSpaces=false, int startPos=-1)
Split the input string according to the given separator. Returning a vector of fragments.
Double_t StringToDouble(std::string in)
Gets a double from a string.
std::string ToDateTimeString(time_t time)
Format time_t into string.
std::string Replace(std::string in, std::string thisString, std::string byThisString, size_t fromPosition=0, Int_t N=0)
Replace any occurences of thisSring by byThisString inside string in.
std::string metadata
The associated metadata member used to register the relevant quantity.
std::string description
A user given description that can be used to define the relevant quantity.
std::string strategy
It determines how to produce the relevant quantity (accumulate/unique/last/max/min)
std::string value
The quantity value.