REST-for-Physics  v2.3
Rare Event Searches ToolKit for Physics
Loading...
Searching...
No Matches
TRestDataSet.cxx
1/*************************************************************************
2 * This file is part of the REST software framework. *
3 * *
4 * Copyright (C) 2016 GIFNA/TREX (University of Zaragoza) *
5 * For more information see https://gifna.unizar.es/trex *
6 * *
7 * REST is free software: you can redistribute it and/or modify *
8 * it under the terms of the GNU General Public License as published by *
9 * the Free Software Foundation, either version 3 of the License, or *
10 * (at your option) any later version. *
11 * *
12 * REST is distributed in the hope that it will be useful, *
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
15 * GNU General Public License for more details. *
16 * *
17 * You should have a copy of the GNU General Public License along with *
18 * REST in $REST_PATH/LICENSE. *
19 * If not, see https://www.gnu.org/licenses/. *
20 * For the list of contributors see $REST_PATH/CREDITS. *
21 *************************************************************************/
22
300#include "TRestDataSet.h"
301
302#include <TLeaf.h>
303
304#include "TRestRun.h"
305#include "TRestTools.h"
306
307ClassImp(TRestDataSet);
308
313
328TRestDataSet::TRestDataSet(const char* cfgFileName, const std::string& name) : TRestMetadata(cfgFileName) {
330}
331
336
341void TRestDataSet::Initialize() { SetSectionName(this->ClassName()); }
342
348 if (fTree != nullptr) {
349 RESTWarning << "Tree has already been loaded. Skipping TRestDataSet::GenerateDataSet ... "
350 << RESTendl;
351 return;
352 }
353
354 if (fFileSelection.empty()) FileSelection();
355
356 // We are not ready yet
357 if (fFileSelection.empty()) {
358 RESTError << "File selection is empty " << RESTendl;
359 return;
360 }
361
363 TRestRun run(fFileSelection.front());
364 std::set<std::string> finalList;
365 finalList.insert("runOrigin");
366 finalList.insert("eventID");
367 finalList.insert("timeStamp");
368
369 auto obsNames = run.GetAnalysisTree()->GetObservableNames();
370 auto obsFromList = TRestTools::GetMatchingStrings(obsNames, fObservablesList);
371 finalList.insert(obsFromList.begin(), obsFromList.end());
372
373 if (fMT)
374 ROOT::EnableImplicitMT();
375 else
376 ROOT::DisableImplicitMT();
377
378 RESTInfo << "Initializing dataset" << RESTendl;
379 fDataFrame = ROOT::RDataFrame("AnalysisTree", fFileSelection);
380
381 RESTInfo << "Making cuts" << RESTendl;
383
384 // Adding new user columns added to the dataset
385 for (const auto& [cName, cExpression] : fColumnNameExpressions) {
386 RESTInfo << "Adding column to dataset: " << cName << RESTendl;
387 finalList.emplace(cName);
388 fDataFrame = DefineColumn(cName, cExpression);
389 }
390
391 RegenerateTree(std::vector<std::string>(finalList.begin(), finalList.end()));
392
393 RESTInfo << " - Dataset generated!" << RESTendl;
394}
395
399void TRestDataSet::RegenerateTree(std::vector<std::string> finalList) {
400 RESTInfo << "Generating snapshot." << RESTendl;
401 std::string user = getenv("USER");
402 std::string fOutName = "/tmp/rest_output_" + user + ".root";
403 if (!finalList.empty())
404 fDataFrame.Snapshot("AnalysisTree", fOutName, finalList);
405 else
406 fDataFrame.Snapshot("AnalysisTree", fOutName);
407
408 RESTInfo << "Re-importing analysis tree." << RESTendl;
409 fDataFrame = ROOT::RDataFrame("AnalysisTree", fOutName);
410
411 TFile* f = TFile::Open(fOutName.c_str());
412 fTree = (TChain*)f->Get("AnalysisTree");
413}
414
418std::vector<std::string> TRestDataSet::FileSelection() {
419 fFileSelection.clear();
420
421 std::time_t time_stamp_start = REST_StringHelper::StringToTimeStamp(fFilterStartTime);
422 std::time_t time_stamp_end = REST_StringHelper::StringToTimeStamp(fFilterEndTime);
423
424 if (!time_stamp_end || !time_stamp_start) {
425 RESTError << "TRestDataSet::FileSelect. Start or end dates not properly formed. Please, check "
426 "REST_StringHelper::StringToTimeStamp documentation for valid formats"
427 << RESTendl;
428 return fFileSelection;
429 }
430
431 std::vector<std::string> fileNames = TRestTools::GetFilesMatchingPattern(fFilePattern);
432
433 RESTInfo << "TRestDataSet::FileSelection. Starting file selection." << RESTendl;
434 RESTInfo << "Total files : " << fileNames.size() << RESTendl;
435 RESTInfo << "This process may take long computation time in case there are many files." << RESTendl;
436
437 fTotalDuration = 0;
438 std::cout << "Processing file selection.";
439 int cnt = 1;
440 for (const auto& file : fileNames) {
441 if (cnt % 100 == 0) {
442 std::cout << std::endl;
443 std::cout << "Files processed: " << cnt << " ." << std::flush;
444 }
445 cnt++;
446 TRestRun run(file);
447 std::cout << "." << std::flush;
448 double runStart = run.GetStartTimestamp();
449 double runEnd = run.GetEndTimestamp();
450
451 if (runStart < time_stamp_start || runEnd > time_stamp_end) {
452 RESTInfo << "Rejecting file out of date range: " << file << RESTendl;
453 continue;
454 }
455
456 int n = 0;
457 bool accept = true;
458 for (const auto& md : fFilterMetadata) {
459 std::string mdValue = run.GetMetadataMember(md);
460
461 if (!fFilterContains[n].empty())
462 if (mdValue.find(fFilterContains[n]) == std::string::npos) accept = false;
463
464 if (fFilterGreaterThan[n] != -1) {
465 if (StringToDouble(mdValue) <= fFilterGreaterThan[n]) accept = false;
466 }
467
468 if (fFilterLowerThan[n] != -1)
469 if (StringToDouble(mdValue) >= fFilterLowerThan[n]) accept = false;
470
471 if (fFilterEqualsTo[n] != -1)
472 if (StringToDouble(mdValue) != fFilterEqualsTo[n]) accept = false;
473
474 n++;
475 }
476
477 if (!accept) continue;
478
479 Double_t acc = 0;
480 for (auto& [name, properties] : fQuantity) {
481 std::string value = run.ReplaceMetadataMembers(properties.metadata);
482 const Double_t val = REST_StringHelper::StringToDouble(value);
483
484 if (properties.strategy == "accumulate") {
485 acc += val;
486 properties.value = StringWithPrecision(val, 2);
487 }
488
489 if (properties.strategy == "max")
490 if (properties.value.empty() || REST_StringHelper::StringToDouble(properties.value) < val)
491 properties.value = value;
492
493 if (properties.strategy == "min")
494 if (properties.value.empty() || REST_StringHelper::StringToDouble(properties.value) > val)
495 properties.value = value;
496
497 if (properties.strategy == "unique") {
498 if (properties.value.empty())
499 properties.value = value;
500 else if (properties.value != value) {
501 RESTWarning << "TRestDataSet::FileSelection. Relevant quantity retrieval." << RESTendl;
502 RESTWarning << "A unique metadata member used for the `" << name
503 << "` quantity is not unique!!" << RESTendl;
504 RESTWarning << "Pre-registered value : " << properties.value << " New value : " << value
505 << RESTendl;
506 }
507 }
508
509 if (properties.strategy == "last") properties.value = value;
510
511 if (properties.strategy.find("append") != std::string::npos) {
512 // Get appender from "append" til the end of string. E.g. "append," -> "," or "append" -> ""
513 std::string appender = properties.strategy.substr(properties.strategy.find("append") + 6);
514 if (properties.value.empty())
515 properties.value = value;
516 else
517 // do not append if value already contains the value to append
518 if (properties.value.find(value) == std::string::npos)
519 properties.value += appender + value;
520 }
521
522 if (properties.strategy.find("extend") != std::string::npos) {
523 // Get appender from "extend" til the end of string. E.g. "extend," -> "," or "extend" -> ""
524 std::string appender = properties.strategy.substr(properties.strategy.find("extend") + 6);
525 if (properties.value.empty())
526 properties.value = value;
527 else
528 properties.value += appender + value;
529 }
530 }
531
532 if (run.GetStartTimestamp() < fStartTime) fStartTime = run.GetStartTimestamp();
533
534 if (run.GetEndTimestamp() > fEndTime) fEndTime = run.GetEndTimestamp();
535
536 fTotalDuration += run.GetEndTimestamp() - run.GetStartTimestamp();
537 fFileSelection.push_back(file);
538 }
539 std::cout << std::endl;
540
541 return fFileSelection;
542}
543
550ROOT::RDF::RNode TRestDataSet::Range(size_t from, size_t to) { return fDataFrame.Range(from, to); }
551
556ROOT::RDF::RNode TRestDataSet::ApplyRange(size_t from, size_t to) {
557 fDataFrame = fDataFrame.Range(from, to);
559 return fDataFrame;
560}
561
568ROOT::RDF::RNode TRestDataSet::MakeCut(const TRestCut* cut) {
569 auto df = fDataFrame;
570
571 if (cut == nullptr) return df;
572
573 auto paramCut = cut->GetParamCut();
574 auto obsList = df.GetColumnNames();
575 for (const auto& [param, condition] : paramCut) {
576 if (std::find(obsList.begin(), obsList.end(), param) != obsList.end()) {
577 std::string pCut = param + condition;
578 RESTDebug << "Applying cut " << pCut << RESTendl;
579 df = df.Filter(pCut);
580 } else {
581 RESTWarning << " Cut observable " << param << " not found in observable list, skipping..."
582 << RESTendl;
583 }
584 }
585
586 auto cutString = cut->GetCutStrings();
587 for (const auto& pCut : cutString) {
588 bool added = false;
589 for (const auto& obs : obsList) {
590 if (pCut.find(obs) != std::string::npos) {
591 RESTDebug << "Applying cut " << pCut << RESTendl;
592 df = df.Filter(pCut);
593 added = true;
594 break;
595 }
596 }
597
598 if (!added) {
599 RESTWarning << " Cut string " << pCut << " not found in observable list, skipping..." << RESTendl;
600 }
601 }
602
603 return df;
604}
605
612 auto nEntries = fDataFrame.Count();
613 if (*nEntries == (long long unsigned int)GetTree()->GetEntries()) return *nEntries;
614 RESTWarning << "TRestDataSet::GetEntries. Number of tree entries is not the same as RDataFrame entries."
615 << RESTendl;
616 RESTWarning << "Returning RDataFrame entries" << RESTendl;
617 return *nEntries;
618}
619
632ROOT::RDF::RNode TRestDataSet::DefineColumn(const std::string& columnName, const std::string& formula) {
633 auto df = fDataFrame;
634
635 std::string evalFormula = formula;
636 for (auto const& [name, properties] : fQuantity)
637 evalFormula = REST_StringHelper::Replace(evalFormula, name, properties.value);
638
639 df = df.Define(columnName, evalFormula);
640
641 return df;
642}
643
649
650 RESTMetadata << " - StartTime : " << REST_StringHelper::ToDateTimeString(fStartTime) << RESTendl;
651 RESTMetadata << " - EndTime : " << REST_StringHelper::ToDateTimeString(fEndTime) << RESTendl;
652 RESTMetadata << " - Path : " << TRestTools::SeparatePathAndName(fFilePattern).first << RESTendl;
653 RESTMetadata << " - File pattern : " << TRestTools::SeparatePathAndName(fFilePattern).second << RESTendl;
654 RESTMetadata << " " << RESTendl;
655 RESTMetadata << " - Accumulated run time (seconds) : " << fTotalDuration << RESTendl;
656 RESTMetadata << " - Accumulated run time (hours) : " << fTotalDuration / 3600. << RESTendl;
657 RESTMetadata << " - Accumulated run time (days) : " << fTotalDuration / 3600. / 24. << RESTendl;
658
659 RESTMetadata << " " << RESTendl;
660
661 if (!fObservablesList.empty()) {
662 RESTMetadata << " Observables added:" << RESTendl;
663 RESTMetadata << " -------------------------" << RESTendl;
664 for (const auto& l : fObservablesList) RESTMetadata << " - " << l << RESTendl;
665
666 RESTMetadata << " " << RESTendl;
667 }
668
669 if (!fFilterMetadata.empty()) {
670 RESTMetadata << " Metadata filters: " << RESTendl;
671 RESTMetadata << " ----------------- " << RESTendl;
672 RESTMetadata << " - StartTime : " << fFilterStartTime << RESTendl;
673 RESTMetadata << " - EndTime : " << fFilterEndTime << RESTendl;
674 int n = 0;
675 for (const auto& mdFilter : fFilterMetadata) {
676 RESTMetadata << " - " << mdFilter << ".";
677
678 if (!fFilterContains[n].empty()) RESTMetadata << " Contains: " << fFilterContains[n];
679 if (fFilterGreaterThan[n] != -1) RESTMetadata << " Greater than: " << fFilterGreaterThan[n];
680 if (fFilterLowerThan[n] != -1) RESTMetadata << " Lower than: " << fFilterLowerThan[n];
681 if (fFilterEqualsTo[n] != -1) RESTMetadata << " Equals to: " << fFilterEqualsTo[n];
682
683 RESTMetadata << RESTendl;
684 n++;
685 }
686
687 RESTMetadata << " " << RESTendl;
688 }
689
690 if (!fQuantity.empty()) {
691 RESTMetadata << " Relevant quantities: " << RESTendl;
692 RESTMetadata << " -------------------- " << RESTendl;
693
694 for (auto const& [name, properties] : fQuantity) {
695 RESTMetadata << " - Name : " << name << ". Value : " << properties.value
696 << ". Strategy: " << properties.strategy << RESTendl;
697 RESTMetadata << " - Metadata: " << properties.metadata << RESTendl;
698 RESTMetadata << " - Description: " << properties.description << RESTendl;
699 RESTMetadata << " " << RESTendl;
700 }
701 }
702
703 if (!fColumnNameExpressions.empty()) {
704 RESTMetadata << " New columns added to generated dataframe: " << RESTendl;
705 RESTMetadata << " ---------------------------------------- " << RESTendl;
706 for (const auto& [cName, cExpression] : fColumnNameExpressions) {
707 RESTMetadata << " - Name : " << cName << RESTendl;
708 RESTMetadata << " - Expression: " << cExpression << RESTendl;
709 RESTMetadata << " " << RESTendl;
710 }
711 }
712
713 if (fMergedDataset) {
714 RESTMetadata << " " << RESTendl;
715 RESTMetadata << "This is a combined dataset." << RESTendl;
716 RESTMetadata << " -------------------- " << RESTendl;
717 RESTMetadata << " - Relevant quantities have been removed!" << RESTendl;
718 RESTMetadata << " - Dataset metadata properties correspond to the first file in the list."
719 << RESTendl;
720 RESTMetadata << " " << RESTendl;
721 RESTMetadata << "List of imported files: " << RESTendl;
722 RESTMetadata << " -------------------- " << RESTendl;
723 for (const auto& fn : fImportedFiles) RESTMetadata << " - " << fn << RESTendl;
724 }
725
726 RESTMetadata << " " << RESTendl;
727 if (fMT)
728 RESTMetadata << " - Multithreading was enabled" << RESTendl;
729 else
730 RESTMetadata << " - Multithreading was NOT enabled" << RESTendl;
731
732 RESTMetadata << "----" << RESTendl;
733}
734
740
742 TiXmlElement* filterDefinition = GetElement("filter");
743 while (filterDefinition != nullptr) {
744 std::string metadata = GetFieldValue("metadata", filterDefinition);
745 if (metadata.empty() || metadata == "Not defined") {
746 RESTError << "Filter key defined without metadata member!" << RESTendl;
747 exit(1);
748 }
749
750 fFilterMetadata.push_back(metadata);
751
752 std::string contains = GetFieldValue("contains", filterDefinition);
753 if (contains == "Not defined") contains = "";
754 Double_t greaterThan = StringToDouble(GetFieldValue("greaterThan", filterDefinition));
755 Double_t lowerThan = StringToDouble(GetFieldValue("lowerThan", filterDefinition));
756 Double_t equalsTo = StringToDouble(GetFieldValue("equalsTo", filterDefinition));
757
758 fFilterContains.push_back(contains);
759 fFilterGreaterThan.push_back(greaterThan);
760 fFilterLowerThan.push_back(lowerThan);
761 fFilterEqualsTo.push_back(equalsTo);
762
763 filterDefinition = GetNextElement(filterDefinition);
764 }
765
767 TiXmlElement* observablesDefinition = GetElement("observables");
768 while (observablesDefinition != nullptr) {
769 std::string observables = GetFieldValue("list", observablesDefinition);
770 if (observables.empty() || observables == "Not defined") {
771 RESTError << "<observables key does not contain a list!" << RESTendl;
772 exit(1);
773 }
774
775 std::vector<std::string> obsList = REST_StringHelper::Split(observables, ",");
776
777 fObservablesList.insert(fObservablesList.end(), obsList.begin(), obsList.end());
778
779 observablesDefinition = GetNextElement(observablesDefinition);
780 }
781
783 TiXmlElement* obsProcessDefinition = GetElement("processObservables");
784 while (obsProcessDefinition != nullptr) {
785 std::string observables = GetFieldValue("list", obsProcessDefinition);
786 if (observables.empty() || observables == "Not defined") {
787 RESTError << "<processObservables key does not contain a list!" << RESTendl;
788 exit(1);
789 }
790
791 std::vector<std::string> obsList = REST_StringHelper::Split(observables, ",");
792
793 for (const auto& l : obsList) {
794 std::string processObsPattern = l + "_*";
795 fObservablesList.push_back(processObsPattern);
796 }
797
798 obsProcessDefinition = GetNextElement(obsProcessDefinition);
799 }
800
802 TiXmlElement* quantityDefinition = GetElement("quantity");
803 while (quantityDefinition != nullptr) {
804 std::string name = GetFieldValue("name", quantityDefinition);
805 if (name.empty() || name == "Not defined") {
806 RESTError << "<quantity key does not contain a name!" << RESTendl;
807 exit(1);
808 }
809
810 std::string metadata = GetFieldValue("metadata", quantityDefinition);
811 if (metadata.empty() || metadata == "Not defined") {
812 RESTError << "<quantity key does not contain a metadata value!" << RESTendl;
813 exit(1);
814 }
815
816 std::string strategy = GetFieldValue("strategy", quantityDefinition);
817 if (strategy.empty() || strategy == "Not defined") {
818 strategy = "unique";
819 }
820
821 std::string description = GetFieldValue("description", quantityDefinition);
822
823 RelevantQuantity quantity;
824 quantity.metadata = metadata;
825 quantity.strategy = strategy;
826 quantity.description = description;
827 quantity.value = "";
828
829 fQuantity[name] = quantity;
830
831 quantityDefinition = GetNextElement(quantityDefinition);
832 }
833
835 TiXmlElement* columnDefinition = GetElement("addColumn");
836 while (columnDefinition != nullptr) {
837 std::string name = GetFieldValue("name", columnDefinition);
838 if (name.empty() || name == "Not defined") {
839 RESTError << "<define key does not contain a name name!" << RESTendl;
840 exit(1);
841 }
842
843 std::string expression = GetFieldValue("expression", columnDefinition);
844 if (expression.empty() || expression == "Not defined") {
845 RESTError << "<addColumn key does not contain a expression value!" << RESTendl;
846 exit(1);
847 }
848
849 fColumnNameExpressions.push_back({name, expression});
850
851 columnDefinition = GetNextElement(columnDefinition);
852 }
853
854 fCut = (TRestCut*)InstantiateChildMetadata("TRestCut");
855}
856
870void TRestDataSet::Export(const std::string& filename, std::vector<std::string> excludeColumns) {
871 RESTInfo << "Exporting dataset" << RESTendl;
872
873 std::vector<std::string> columns = fDataFrame.GetColumnNames();
874 if (!excludeColumns.empty()) {
875 columns.erase(std::remove_if(columns.begin(), columns.end(),
876 [&excludeColumns](std::string elem) {
877 return std::find(excludeColumns.begin(), excludeColumns.end(),
878 elem) != excludeColumns.end();
879 }),
880 columns.end());
881
882 RESTInfo << "Re-Generating snapshot." << RESTendl;
883 std::string user = getenv("USER");
884 std::string fOutName = "/tmp/rest_output_" + user + ".root";
885 fDataFrame.Snapshot("AnalysisTree", fOutName, columns);
886
887 RESTInfo << "Re-importing analysis tree." << RESTendl;
888 fDataFrame = ROOT::RDataFrame("AnalysisTree", fOutName);
889
890 TFile* f = TFile::Open(fOutName.c_str());
891 fTree = (TChain*)f->Get("AnalysisTree");
892 }
893
894 if (TRestTools::GetFileNameExtension(filename) == "txt" ||
895 TRestTools::GetFileNameExtension(filename) == "csv") {
896 if (excludeColumns.empty()) {
897 RESTInfo << "Re-Generating snapshot." << RESTendl;
898 std::string user = getenv("USER");
899 std::string fOutName = "/tmp/rest_output_" + user + ".root";
900 fDataFrame.Snapshot("AnalysisTree", fOutName);
901
902 TFile* f = TFile::Open(fOutName.c_str());
903 fTree = (TChain*)f->Get("AnalysisTree");
904 }
905
906 std::vector<std::string> dataTypes;
907 for (int n = 0; n < fTree->GetListOfBranches()->GetEntries(); n++) {
908 std::string bName = fTree->GetListOfBranches()->At(n)->GetName();
909 std::string type = fTree->GetLeaf((TString)bName)->GetTypeName();
910 dataTypes.push_back(type);
911 if (type != "Double_t" && type != "Int_t") {
912 RESTError << "Branch name : " << bName << " is type : " << type << RESTendl;
913 RESTError << "Only Int_t and Double_t types are allowed for "
914 "exporting to ASCII table"
915 << RESTendl;
916 RESTError << "File will not be generated" << RESTendl;
917 return;
918 }
919 }
920
921 FILE* f = fopen(filename.c_str(), "wt");
923 fprintf(f, "### TRestDataSet generated file\n");
924 fprintf(f, "### \n");
925 fprintf(f, "### StartTime : %s\n", fFilterStartTime.c_str());
926 fprintf(f, "### EndTime : %s\n", fFilterEndTime.c_str());
927 fprintf(f, "###\n");
928 fprintf(f, "### Accumulated run time (seconds) : %lf\n", fTotalDuration);
929 fprintf(f, "### Accumulated run time (hours) : %lf\n", fTotalDuration / 3600.);
930 fprintf(f, "### Accumulated run time (days) : %lf\n", fTotalDuration / 3600. / 24.);
931 fprintf(f, "###\n");
932 fprintf(f, "### Data path : %s\n", TRestTools::SeparatePathAndName(fFilePattern).first.c_str());
933 fprintf(f, "### File pattern : %s\n", TRestTools::SeparatePathAndName(fFilePattern).second.c_str());
934 fprintf(f, "###\n");
935 if (!fFilterMetadata.empty()) {
936 fprintf(f, "### Metadata filters : \n");
937 int n = 0;
938 for (const auto& md : fFilterMetadata) {
939 fprintf(f, "### - %s.", md.c_str());
940 if (!fFilterContains[n].empty()) fprintf(f, " Contains: %s.", fFilterContains[n].c_str());
941 if (fFilterGreaterThan[n] != -1) fprintf(f, " Greater than: %6.3lf.", fFilterGreaterThan[n]);
942 if (fFilterLowerThan[n] != -1) fprintf(f, " Lower than: %6.3lf.", fFilterLowerThan[n]);
943 if (fFilterEqualsTo[n] != -1) fprintf(f, " Equals to: %6.3lf.", fFilterLowerThan[n]);
944 fprintf(f, "\n");
945 n++;
946 }
947 }
948 fprintf(f, "###\n");
949 fprintf(f, "### Relevant quantities: \n");
950 for (auto& [name, properties] : fQuantity) {
951 fprintf(f, "### - %s : %s - %s\n", name.c_str(), properties.value.c_str(),
952 properties.description.c_str());
953 }
954 fprintf(f, "###\n");
955 fprintf(f, "### Observables list: ");
956 for (int n = 0; n < fTree->GetListOfBranches()->GetEntries(); n++) {
957 std::string bName = fTree->GetListOfBranches()->At(n)->GetName();
958 fprintf(f, " %s", bName.c_str());
959 }
960 fprintf(f, "\n");
961 fprintf(f, "###\n");
962 fprintf(f, "### Data starts here\n");
963
964 auto obsNames = fDataFrame.GetColumnNames();
965 std::string obsListStr = "";
966 for (const auto& l : obsNames) {
967 if (!obsListStr.empty()) obsListStr += ":";
968 obsListStr += l;
969 }
970
971 // We do this so that later we can recover the values using TTree::GetVal
972 fTree->Draw((TString)obsListStr, "", "goff");
973
974 for (unsigned int n = 0; n < fTree->GetEntries(); n++) {
975 for (unsigned int m = 0; m < GetNumberOfBranches(); m++) {
976 std::string bName = fTree->GetListOfBranches()->At(m)->GetName();
977 if (m > 0) fprintf(f, "\t");
978 if (dataTypes[m] == "Double_t")
979 if (bName == "timeStamp")
980 fprintf(f, "%010.0lf", fTree->GetVal(m)[n]);
981 else
982 fprintf(f, "%05.3e", fTree->GetVal(m)[n]);
983 else
984 fprintf(f, "%06d", (Int_t)fTree->GetVal(m)[n]);
985 }
986 fprintf(f, "\n");
987 }
988 fclose(f);
989
990 return;
991 } else if (TRestTools::GetFileNameExtension(filename) == "root") {
992 fDataFrame.Snapshot("AnalysisTree", filename);
993
994 TFile* f = TFile::Open(filename.c_str(), "UPDATE");
995 std::string name = this->GetName();
996 if (name.empty()) name = "mock";
997 this->Write(name.c_str());
998 f->Close();
999 } else {
1000 RESTWarning << "TRestDataSet::Export. Extension " << TRestTools::GetFileNameExtension(filename)
1001 << " not recognized" << RESTendl;
1002 }
1003 RESTInfo << "Dataset generated: " << filename << RESTendl;
1004}
1005
1010 SetName(dS.GetName());
1011 fFilterStartTime = dS.GetFilterStartTime();
1012 fFilterEndTime = dS.GetFilterEndTime();
1013 fStartTime = dS.GetStartTime();
1014 fEndTime = dS.GetEndTime();
1015 fFilePattern = dS.GetFilePattern();
1016 fObservablesList = dS.GetObservablesList();
1018 fFilterMetadata = dS.GetFilterMetadata();
1019 fFilterContains = dS.GetFilterContains();
1020 fFilterGreaterThan = dS.GetFilterGreaterThan();
1021 fFilterLowerThan = dS.GetFilterLowerThan();
1022 fFilterEqualsTo = dS.GetFilterEqualsTo();
1023 fQuantity = dS.GetQuantity();
1024 fColumnNameExpressions = dS.GetAddedColumns();
1026 fCut = dS.GetCut();
1027
1028 return *this;
1029}
1030
1036 auto obsNames = GetObservablesList();
1037 for (const auto& obs : fObservablesList) {
1038 if (std::find(obsNames.begin(), obsNames.end(), obs) != obsNames.end()) {
1039 RESTError << "Cannot merge dataSets with different observable list " << RESTendl;
1040 return false;
1041 }
1042 }
1043
1044 if (fStartTime > dS.GetStartTime()) fStartTime = dS.GetStartTime();
1045 if (fEndTime < dS.GetEndTime()) fEndTime = dS.GetEndTime();
1046
1047 auto fileSelection = dS.GetFileSelection();
1048 fFileSelection.insert(fFileSelection.end(), fileSelection.begin(), fileSelection.end());
1049
1051
1052 return true;
1053}
1054
1060void TRestDataSet::Import(const std::string& fileName) {
1061 if (TRestTools::GetFileNameExtension(fileName) != "root") {
1062 RESTError << "Datasets can only be imported from root files" << RESTendl;
1063 return;
1064 }
1065
1066 TRestDataSet* dS = nullptr;
1067 TFile* file = TFile::Open(fileName.c_str(), "READ");
1068 if (file != nullptr) {
1069 TIter nextkey(file->GetListOfKeys());
1070 TKey* key;
1071 while ((key = (TKey*)nextkey())) {
1072 std::string kName = key->GetClassName();
1073 if (REST_Reflection::GetClassQuick(kName.c_str()) != nullptr &&
1074 REST_Reflection::GetClassQuick(kName.c_str())->InheritsFrom("TRestDataSet")) {
1075 dS = file->Get<TRestDataSet>(key->GetName());
1076 *this = *dS;
1077 }
1078 }
1079 }
1080
1081 if (dS == nullptr) {
1082 RESTError << fileName << " is not a valid dataSet" << RESTendl;
1083 return;
1084 }
1085
1086 if (fMT)
1087 ROOT::EnableImplicitMT();
1088 else
1089 ROOT::DisableImplicitMT();
1090
1091 fDataFrame = ROOT::RDataFrame("AnalysisTree", fileName);
1092
1093 fTree = (TChain*)file->Get("AnalysisTree");
1094}
1095
1105void TRestDataSet::Import(std::vector<std::string> fileNames) {
1106 for (const auto& fN : fileNames)
1107 if (TRestTools::GetFileNameExtension(fN) != "root") {
1108 RESTError << "Datasets can only be imported from root files" << RESTendl;
1109 return;
1110 }
1111
1112 int count = 0;
1113 auto it = fileNames.begin();
1114 while (it != fileNames.end()) {
1115 std::string fileName = *it;
1116 TFile* file = TFile::Open(fileName.c_str(), "READ");
1117 bool isValid = false;
1118 if (file != nullptr) {
1119 TIter nextkey(file->GetListOfKeys());
1120 TKey* key;
1121 while ((key = (TKey*)nextkey())) {
1122 std::string kName = key->GetClassName();
1123 if (REST_Reflection::GetClassQuick(kName.c_str()) != nullptr &&
1124 REST_Reflection::GetClassQuick(kName.c_str())->InheritsFrom("TRestDataSet")) {
1125 TRestDataSet* dS = file->Get<TRestDataSet>(key->GetName());
1127 dS->PrintMetadata();
1128
1129 if (count == 0) {
1130 *this = *dS;
1131 isValid = true;
1132 } else {
1133 isValid = Merge(*dS);
1134 }
1135
1136 if (isValid) count++;
1137 }
1138 }
1139 } else {
1140 RESTError << "Cannot open " << fileName << RESTendl;
1141 }
1142
1143 if (!isValid) {
1144 RESTError << fileName << " is not a valid dataSet skipping..." << RESTendl;
1145 it = fileNames.erase(it);
1146 } else {
1147 ++it;
1148 }
1149 }
1150
1151 if (fileNames.empty()) {
1152 RESTError << "File selection is empty, dataSet will not be imported " << RESTendl;
1153 return;
1154 }
1155
1156 RESTInfo << "Opening list of files. First file: " << fileNames[0] << RESTendl;
1157 fDataFrame = ROOT::RDataFrame("AnalysisTree", fileNames);
1158
1159 if (fTree != nullptr) {
1160 delete fTree;
1161 fTree = nullptr;
1162 }
1163 fTree = new TChain("AnalysisTree");
1164
1165 for (const auto& fN : fileNames) fTree->Add((TString)fN);
1166
1167 fMergedDataset = true;
1168 fImportedFiles = fileNames;
1169
1170 fQuantity.clear();
1171}
std::vector< std::string > GetObservableNames()
It returns a vector with strings containing all the observables that exist in the analysis tree.
A class to help on cuts definitions. To be used with TRestAnalysisTree.
Definition TRestCut.h:31
It allows to group a number of runs that satisfy given metadata conditions.
std::vector< std::string > fFilterContains
If not empty it will check if the metadata member contains the string.
virtual std::vector< std::string > FileSelection()
Function to determine the filenames that satisfy the dataset conditions.
std::vector< Double_t > fFilterLowerThan
If the corresponding element is not empty it will check if the metadata member is lower.
void PrintMetadata() override
Prints on screen the information about the metadata members of TRestDataSet.
TChain * fTree
A pointer to the generated tree.
void Import(const std::string &fileName)
This function imports metadata from a root file it import metadata info from the previous dataSet whi...
std::map< std::string, RelevantQuantity > fQuantity
The properties of a relevant quantity that we want to store together with the dataset.
ROOT::RDF::RNode Range(size_t from, size_t to)
This method returns a RDataFrame node with the number of samples inside the dataset by selecting a ra...
std::vector< std::pair< std::string, std::string > > fColumnNameExpressions
A list of new columns together with its corresponding expressions added to the dataset.
ROOT::RDF::RNode DefineColumn(const std::string &columnName, const std::string &formula)
This function will add a new column to the RDataFrame using the same scheme as the usual RDF::Define ...
Double_t fEndTime
TimeStamp for the end time of the last file.
ROOT::RDF::RNode fDataFrame
The resulting RDF::RNode object after initialization.
size_t GetNumberOfBranches()
Number of variables (or observables)
size_t GetEntries()
It returns the number of entries found inside fDataFrame and prints out a warning if the number of en...
TRestDataSet()
Default constructor.
Double_t GetTotalTimeInSeconds() const
It returns the accumulated run time in seconds.
ROOT::RDF::RNode MakeCut(const TRestCut *cut)
This function applies a TRestCut to the dataframe and returns a dataframe with the applied cuts....
void GenerateDataSet()
This function generates the data frame with the filelist and column names (or observables) that have ...
Bool_t fMT
A flag to enable Multithreading during dataframe generation.
TRestCut * fCut
Parameter cuts over the selected dataset.
void Export(const std::string &filename, std::vector< std::string > excludeColumns={})
It will generate an output file with the dataset compilation. Only the selected branches and the file...
std::string fFilterStartTime
All the selected runs will have a starting date after fStartTime.
Bool_t Merge(const TRestDataSet &dS)
This function merge different TRestDataSet metadata in current dataSet.
std::vector< std::string > fFilterMetadata
A list of metadata members where filters will be applied.
std::vector< std::string > fFileSelection
A list populated by the FileSelection method using the conditions of the dataset.
std::vector< std::string > GetFileSelection()
It returns a list of the files that have been finally selected.
std::string fFilterEndTime
All the selected runs will have an ending date before fEndTime.
Double_t fStartTime
TimeStamp for the start time of the first file.
std::vector< std::string > fObservablesList
It contains a list of the observables that will be added to the final tree or exported file.
TTree * GetTree() const
Gives access to the tree.
Bool_t fMergedDataset
It keeps track if the generated dataset is a pure dataset or a merged one.
void Initialize() override
This function initialize different parameters from the TRestDataSet.
void RegenerateTree(std::vector< std::string > finalList={})
It regenerates the tree so that it is an exact copy of the present DataFrame.
std::vector< std::string > fImportedFiles
The list of dataset files imported.
Double_t fTotalDuration
The total integrated run time of selected files.
std::string fFilePattern
A glob file pattern that must be satisfied by all files.
std::vector< Double_t > fFilterGreaterThan
If the corresponding element is not empty it will check if the metadata member is greater.
ROOT::RDF::RNode ApplyRange(size_t from, size_t to)
This method reduces the number of samples inside the dataset by selecting a range.
std::vector< Double_t > fFilterEqualsTo
If the corresponding element is not empty it will check if the metadata member is equal.
void InitFromConfigFile() override
Initialization of specific TRestDataSet members through an RML file.
TRestDataSet & operator=(TRestDataSet &dS)
Operator to copy TRestDataSet metadata.
~TRestDataSet()
Default destructor.
A base class for any REST metadata class.
virtual void PrintMetadata()
Implemented it in the derived metadata class to print out specific metadata information.
endl_t RESTendl
Termination flag object for TRestStringOutput.
TiXmlElement * GetElement(std::string eleDeclare, TiXmlElement *e=nullptr)
Get an xml element from a given parent element, according to its declaration.
Int_t LoadConfigFromFile(const std::string &configFilename, const std::string &sectionName="")
Give the file name, find out the corresponding section. Then call the main starter.
TRestMetadata * InstantiateChildMetadata(int index, std::string pattern="")
This method will retrieve a new TRestMetadata instance of a child element of the present TRestMetadat...
virtual void InitFromConfigFile()
To make settings from rml file. This method must be implemented in the derived class.
TRestStringOutput::REST_Verbose_Level GetVerboseLevel()
returns the verboselevel in type of REST_Verbose_Level enumerator
std::string GetFieldValue(std::string parName, TiXmlElement *e)
Returns the field value of an xml element which has the specified name.
void SetSectionName(std::string sName)
set the section name, clear the section content
std::string fConfigFileName
Full name of the rml file.
virtual Int_t Write(const char *name=nullptr, Int_t option=0, Int_t bufsize=0)
overwriting the write() method with fStore considered
TiXmlElement * GetNextElement(TiXmlElement *e)
Get the next sibling xml element of this element, with same eleDeclare.
Data provider and manager in REST.
Definition TRestRun.h:18
std::string ReplaceMetadataMembers(const std::string &instr, Int_t precision=8)
It will replace the data members contained inside the string given as input. The data members in the ...
@ REST_Info
+show most of the information for each steps
static std::pair< std::string, std::string > SeparatePathAndName(const std::string &fullname)
Separate path and filename in a full path+filename string, returns a pair of string.
static std::string GetFileNameExtension(const std::string &fullname)
Gets the file extension as the substring found after the latest ".".
static std::set< std::string > GetMatchingStrings(const std::vector< std::string > &stack, const std::vector< std::string > &wantedStrings)
Returns a set of strings that match the wanted strings from the stack. The wanted strings can contain...
static std::vector< std::string > GetFilesMatchingPattern(std::string pattern, bool unlimited=false)
Returns a list of files whose name match the pattern string. Key word is "*". e.g....
TClass * GetClassQuick()
Get the type of a "class" object, returning the wrapped type identifier "TClass".
time_t StringToTimeStamp(std::string time)
A method to convert a date/time formatted string to a timestamp.
std::vector< std::string > Split(std::string in, std::string separator, bool allowBlankString=false, bool removeWhiteSpaces=false, int startPos=-1)
Split the input string according to the given separator. Returning a vector of fragments.
Double_t StringToDouble(std::string in)
Gets a double from a string.
std::string ToDateTimeString(time_t time)
Format time_t into string.
std::string Replace(std::string in, std::string thisString, std::string byThisString, size_t fromPosition=0, Int_t N=0)
Replace any occurences of thisSring by byThisString inside string in.
std::string metadata
The associated metadata member used to register the relevant quantity.
std::string description
A user given description that can be used to define the relevant quantity.
std::string strategy
It determines how to produce the relevant quantity (accumulate/unique/last/max/min)
std::string value
The quantity value.