82 TFile* TMVA::Factory::fgTargetFile = 0;
86 #define RECREATE_METHODS kTRUE 98 fDataSetManager (
NULL ),
100 fTransformations (
"I" ),
102 fJobName ( jobName ),
103 fDataAssignType ( kAssignEvents ),
104 fATreeEvent (
NULL ),
105 fAnalysisType (
Types::kClassification )
134 DeclareOptionRef( color,
"Color",
"Flag for coloured screen output (default: True, if in batch mode: False)" );
135 DeclareOptionRef(
fTransformations,
"Transformations",
"List of transformations to test; formatting example: \"Transformations=I;D;P;U;G,D\", for identity, decorrelation, PCA, Uniform and Gaussianisation followed by decorrelation transformations" );
136 DeclareOptionRef( silent,
"Silent",
"Batch mode: boolean silent flag inhibiting any output from TMVA after the creation of the factory class object (default: False)" );
138 "DrawProgressBar",
"Draw progress bar to display training, testing and evaluation schedule (default: True)" );
142 "AnalysisType",
"Set the analysis type (Classification, Regression, Multiclass, Auto) (default: Auto)" );
184 std::vector<TMVA::VariableTransformBase*>::iterator trfIt =
fDefaultTrfs.begin();
185 for (;trfIt !=
fDefaultTrfs.end(); trfIt++)
delete (*trfIt);
206 MVector::iterator itrMethod =
fMethods.begin();
207 for (; itrMethod !=
fMethods.end(); itrMethod++) {
208 Log() <<
kDEBUG <<
"Delete method: " << (*itrMethod)->GetName() <<
Endl;
234 if (dsi!=0)
return *dsi;
258 for (
UInt_t ivar=0; ivar<vars.size(); ivar++) {
259 TString vname = vars[ivar].GetExpression();
263 for (
UInt_t itgt=0; itgt<tgts.size(); itgt++) {
264 TString vname = tgts[itgt].GetExpression();
268 for (
UInt_t ispc=0; ispc<spec.size(); ispc++) {
269 TString vname = spec[ispc].GetExpression();
270 assignTree->
Branch( vname, &(
fATreeEvent[vars.size()+tgts.size()+ispc]), vname +
"/F" );
328 const std::vector<Double_t>& event,
Double_t weight )
372 for(
UInt_t i=0; i<size; i++) {
393 Log() <<
kFATAL <<
"<AddTree> cannot interpret tree type: \"" << treetype
394 <<
"\" should be \"Training\" or \"Test\" or \"Training and Testing\"" <<
Endl;
396 AddTree( tree, className, weight, cut, tt );
405 Log() <<
kFATAL <<
"Tree does not exist (empty pointer)." <<
Endl;
413 Log() <<
kINFO <<
"Add Tree " << tree->
GetName() <<
" of type " << className
423 AddTree( signal,
"Signal", weight,
TCut(
""), treetype );
432 TTree* signalTree =
new TTree(
"TreeS",
"Tree (S)" );
435 Log() <<
kINFO <<
"Create TTree objects from ASCII input files ... \n- Signal file : \"" 439 AddTree( signalTree,
"Signal", weight,
TCut(
""), treetype );
446 AddTree( signal,
"Signal", weight,
TCut(
""), treetype );
454 AddTree( signal,
"Background", weight,
TCut(
""), treetype );
462 TTree* bkgTree =
new TTree(
"TreeB",
"Tree (B)" );
465 Log() <<
kINFO <<
"Create TTree objects from ASCII input files ... \n- Background file : \"" 469 AddTree( bkgTree,
"Background", weight,
TCut(
""), treetype );
476 AddTree( signal,
"Background", weight,
TCut(
""), treetype );
483 AddTree( tree,
"Signal", weight );
490 AddTree( tree,
"Background", weight );
583 for (std::vector<TString>::iterator it=theVariables->begin();
652 NsigTrain, NbkgTrain, NsigTest, NbkgTest, otherOpt.
Data()) );
666 Ntrain, Ntrain, Ntest, Ntest) );
690 Log() <<
kINFO <<
"Preparing trees for training and testing..." <<
Endl;
691 AddCut( sigcut,
"Signal" );
692 AddCut( bkgcut,
"Background" );
718 Log() <<
kFATAL <<
"Booking failed since method with title <" 719 << methodTitle <<
"> already exists" 730 "Number of times the classifier will be boosted" );
745 Log() <<
"Boost Number is " << boostNum <<
" > 0: train boosted classifier" <<
Endl;
753 Log() <<
kFATAL <<
"Method with type kBoost cannot be casted to MethodCategory. /Factory" <<
Endl;
760 if (method==0)
return 0;
766 Log() <<
kFATAL <<
"Method with type kCategory cannot be casted to MethodCategory. /Factory" <<
Endl;
817 MVector::const_iterator itrMethod =
fMethods.begin();
818 MVector::const_iterator itrMethodEnd =
fMethods.end();
820 for (; itrMethod != itrMethodEnd; itrMethod++) {
884 std::vector<TMVA::TransformationHandler*> trfs;
888 std::vector<TString>::iterator trfsDefIt = trfsDef.begin();
889 for (; trfsDefIt!=trfsDef.end(); trfsDefIt++) {
894 Log() <<
kINFO <<
"current transformation string: '" << trfS.
Data() <<
"'" <<
Endl;
900 if (trfS.
BeginsWith(
'I')) identityTrHandler = trfs.back();
906 std::vector<TMVA::TransformationHandler*>::iterator trfIt = trfs.begin();
908 for (;trfIt != trfs.end(); trfIt++) {
911 (*trfIt)->CalcTransformations(inputEvents);
916 for (trfIt = trfs.begin(); trfIt != trfs.end(); trfIt++)
delete *trfIt;
928 MVector::iterator itrMethod;
931 for( itrMethod =
fMethods.begin(); itrMethod !=
fMethods.end(); ++itrMethod ) {
935 Log() <<
kFATAL <<
"Dynamic cast to MethodBase failed" <<
Endl;
941 <<
" not trained (training tree has less entries [" 943 <<
"] than required [" << MinNoTrainingEvents <<
"]" <<
Endl;
952 Log() <<
kINFO <<
"Optimization of tuning paremters finished for Method:"<<mva->
GetName() <<
Endl;
962 Log() <<
kFATAL <<
"No input data for the training provided!" <<
Endl;
966 Log() <<
kFATAL <<
"You want to do regression training without specifying a target." <<
Endl;
969 Log() <<
kFATAL <<
"You want to do classification training, but specified less than two classes." <<
Endl;
984 Log() <<
kINFO <<
"Train all methods for " 988 MVector::iterator itrMethod;
991 for( itrMethod =
fMethods.begin(); itrMethod !=
fMethods.end(); ++itrMethod ) {
998 <<
" not trained (training tree has less entries [" 1000 <<
"] than required [" << MinNoTrainingEvents <<
"]" <<
Endl;
1015 Log() <<
kINFO <<
"Ranking input variables (method specific)..." <<
Endl;
1016 for (itrMethod =
fMethods.begin(); itrMethod !=
fMethods.end(); itrMethod++) {
1021 const Ranking* ranking = (*itrMethod)->CreateRanking();
1022 if (ranking != 0) ranking->
Print();
1023 else Log() <<
kINFO <<
"No variable ranking supplied by classifier: " 1035 Log() <<
kINFO <<
"=== Destroy and recreate all methods via weight files for testing ===" << Endl <<
Endl;
1058 dataSetInfo, weightfile ) );
1061 if( !methCat )
Log() <<
kFATAL <<
"Method with type kCategory cannot be casted to MethodCategory. /Factory" <<
Endl;
1091 MVector::iterator itrMethod =
fMethods.begin();
1092 MVector::iterator itrMethodEnd =
fMethods.end();
1093 for (; itrMethod != itrMethodEnd; itrMethod++) {
1096 if(mva==0)
continue;
1100 (analysisType ==
Types::kMulticlass ?
"Multiclass classification" :
"Classification")) <<
" performance" <<
Endl;
1111 if (methodTitle !=
"") {
1115 Log() <<
kWARNING <<
"<MakeClass> Could not find classifier \"" << methodTitle
1116 <<
"\" in list" <<
Endl;
1122 MVector::const_iterator itrMethod =
fMethods.begin();
1123 MVector::const_iterator itrMethodEnd =
fMethods.end();
1124 for (; itrMethod != itrMethodEnd; itrMethod++) {
1126 if(method==0)
continue;
1139 if (methodTitle !=
"") {
1143 Log() <<
kWARNING <<
"<PrintHelpMessage> Could not find classifier \"" << methodTitle
1144 <<
"\" in list" <<
Endl;
1150 MVector::const_iterator itrMethod =
fMethods.begin();
1151 MVector::const_iterator itrMethodEnd =
fMethods.end();
1152 for (; itrMethod != itrMethodEnd; itrMethod++) {
1154 if(method==0)
continue;
1166 Log() <<
kINFO <<
"Evaluating all variables..." <<
Endl;
1171 if (options.
Contains(
"V")) s +=
":V";
1185 Log() <<
kINFO <<
"...nothing found to evaluate" <<
Endl;
1197 Int_t nmeth_used[2] = {0,0};
1199 std::vector<std::vector<TString> > mname(2);
1200 std::vector<std::vector<Double_t> > sig(2),
sep(2), roc(2);
1201 std::vector<std::vector<Double_t> > eff01(2), eff10(2), eff30(2), effArea(2);
1202 std::vector<std::vector<Double_t> > eff01err(2), eff10err(2), eff30err(2);
1203 std::vector<std::vector<Double_t> > trainEff01(2), trainEff10(2), trainEff30(2);
1205 std::vector<std::vector<Float_t> > multiclass_testEff;
1206 std::vector<std::vector<Float_t> > multiclass_trainEff;
1207 std::vector<std::vector<Float_t> > multiclass_testPur;
1208 std::vector<std::vector<Float_t> > multiclass_trainPur;
1210 std::vector<std::vector<Double_t> > biastrain(1);
1211 std::vector<std::vector<Double_t> > biastest(1);
1212 std::vector<std::vector<Double_t> > devtrain(1);
1213 std::vector<std::vector<Double_t> > devtest(1);
1214 std::vector<std::vector<Double_t> > rmstrain(1);
1215 std::vector<std::vector<Double_t> > rmstest(1);
1216 std::vector<std::vector<Double_t> > minftrain(1);
1217 std::vector<std::vector<Double_t> > minftest(1);
1218 std::vector<std::vector<Double_t> > rhotrain(1);
1219 std::vector<std::vector<Double_t> > rhotest(1);
1222 std::vector<std::vector<Double_t> > biastrainT(1);
1223 std::vector<std::vector<Double_t> > biastestT(1);
1224 std::vector<std::vector<Double_t> > devtrainT(1);
1225 std::vector<std::vector<Double_t> > devtestT(1);
1226 std::vector<std::vector<Double_t> > rmstrainT(1);
1227 std::vector<std::vector<Double_t> > rmstestT(1);
1228 std::vector<std::vector<Double_t> > minftrainT(1);
1229 std::vector<std::vector<Double_t> > minftestT(1);
1238 MVector::iterator itrMethod =
fMethods.begin();
1239 MVector::iterator itrMethodEnd =
fMethods.end();
1240 for (; itrMethod != itrMethodEnd; itrMethod++) {
1243 if(theMethod==0)
continue;
1247 doRegression =
kTRUE;
1255 biastest[0] .push_back( bias );
1256 devtest[0] .push_back( dev );
1257 rmstest[0] .push_back( rms );
1258 minftest[0] .push_back( mInf );
1259 rhotest[0] .push_back( rho );
1260 biastestT[0] .push_back( biasT );
1261 devtestT[0] .push_back( devT );
1262 rmstestT[0] .push_back( rmsT );
1263 minftestT[0] .push_back( mInfT );
1266 biastrain[0] .push_back( bias );
1267 devtrain[0] .push_back( dev );
1268 rmstrain[0] .push_back( rms );
1269 minftrain[0] .push_back( mInf );
1270 rhotrain[0] .push_back( rho );
1271 biastrainT[0].push_back( biasT );
1272 devtrainT[0] .push_back( devT );
1273 rmstrainT[0] .push_back( rmsT );
1274 minftrainT[0].push_back( mInfT );
1279 Log() <<
kINFO <<
"Write evaluation histograms to file" <<
Endl;
1284 doMulticlass =
kTRUE;
1286 Log() <<
kINFO <<
"Write evaluation histograms to file" <<
Endl;
1312 eff01err[isel].push_back( err );
1314 eff10err[isel].push_back( err );
1316 eff30err[isel].push_back( err );
1325 Log() <<
kINFO <<
"Write evaluation histograms to file" <<
Endl;
1332 std::vector<TString> vtemps = mname[0];
1333 std::vector< std::vector<Double_t> > vtmp;
1334 vtmp.push_back( devtest[0] );
1335 vtmp.push_back( devtrain[0] );
1336 vtmp.push_back( biastest[0] );
1337 vtmp.push_back( biastrain[0] );
1338 vtmp.push_back( rmstest[0] );
1339 vtmp.push_back( rmstrain[0] );
1340 vtmp.push_back( minftest[0] );
1341 vtmp.push_back( minftrain[0] );
1342 vtmp.push_back( rhotest[0] );
1343 vtmp.push_back( rhotrain[0] );
1344 vtmp.push_back( devtestT[0] );
1345 vtmp.push_back( devtrainT[0] );
1346 vtmp.push_back( biastestT[0] );
1347 vtmp.push_back( biastrainT[0]);
1348 vtmp.push_back( rmstestT[0] );
1349 vtmp.push_back( rmstrainT[0] );
1350 vtmp.push_back( minftestT[0] );
1351 vtmp.push_back( minftrainT[0]);
1354 devtest[0] = vtmp[0];
1355 devtrain[0] = vtmp[1];
1356 biastest[0] = vtmp[2];
1357 biastrain[0] = vtmp[3];
1358 rmstest[0] = vtmp[4];
1359 rmstrain[0] = vtmp[5];
1360 minftest[0] = vtmp[6];
1361 minftrain[0] = vtmp[7];
1362 rhotest[0] = vtmp[8];
1363 rhotrain[0] = vtmp[9];
1364 devtestT[0] = vtmp[10];
1365 devtrainT[0] = vtmp[11];
1366 biastestT[0] = vtmp[12];
1367 biastrainT[0] = vtmp[13];
1368 rmstestT[0] = vtmp[14];
1369 rmstrainT[0] = vtmp[15];
1370 minftestT[0] = vtmp[16];
1371 minftrainT[0] = vtmp[17];
1373 else if (doMulticlass) {
1379 for (
Int_t k=0; k<2; k++) {
1380 std::vector< std::vector<Double_t> > vtemp;
1381 vtemp.push_back( effArea[k] );
1382 vtemp.push_back( eff10[k] );
1383 vtemp.push_back( eff01[k] );
1384 vtemp.push_back( eff30[k] );
1385 vtemp.push_back( eff10err[k] );
1386 vtemp.push_back( eff01err[k] );
1387 vtemp.push_back( eff30err[k] );
1388 vtemp.push_back( trainEff10[k] );
1389 vtemp.push_back( trainEff01[k] );
1390 vtemp.push_back( trainEff30[k] );
1391 vtemp.push_back( sig[k] );
1392 vtemp.push_back(
sep[k] );
1393 vtemp.push_back( roc[k] );
1394 std::vector<TString> vtemps = mname[k];
1396 effArea[k] = vtemp[0];
1397 eff10[k] = vtemp[1];
1398 eff01[k] = vtemp[2];
1399 eff30[k] = vtemp[3];
1400 eff10err[k] = vtemp[4];
1401 eff01err[k] = vtemp[5];
1402 eff30err[k] = vtemp[6];
1403 trainEff10[k] = vtemp[7];
1404 trainEff01[k] = vtemp[8];
1405 trainEff30[k] = vtemp[9];
1420 const Int_t nmeth = methodsNoCuts.size();
1422 if (!doRegression && !doMulticlass ) {
1428 std::vector<Double_t> rvec;
1436 std::vector<TString>* theVars =
new std::vector<TString>;
1437 std::vector<ResultsClassification*> mvaRes;
1438 for (itrMethod = methodsNoCuts.begin(); itrMethod != methodsNoCuts.end(); itrMethod++, ivar++) {
1443 theVars->back().ReplaceAll(
"MVA_",
"" );
1463 for (
Int_t im=0; im<nmeth; im++) {
1467 Log() <<
kWARNING <<
"Found NaN return value in event: " << ievt
1468 <<
" for method \"" << methodsNoCuts[im]->GetName() <<
"\"" <<
Endl;
1471 else dvec[im] = retval;
1475 else { tpBkg->
AddRow( dvec ); theMat = overlapB; }
1478 for (
Int_t im=0; im<nmeth; im++) {
1479 for (
Int_t jm=im; jm<nmeth; jm++) {
1480 if ((dvec[im] - rvec[im])*(dvec[jm] - rvec[jm]) > 0) {
1482 if (im != jm) (*theMat)(jm,im)++;
1502 if (corrMatS != 0 && corrMatB != 0) {
1507 for (
Int_t im=0; im<nmeth; im++) {
1508 for (
Int_t jm=0; jm<nmeth; jm++) {
1509 mvaMatS(im,jm) = (*corrMatS)(im,jm);
1510 mvaMatB(im,jm) = (*corrMatB)(im,jm);
1515 std::vector<TString> theInputVars;
1518 for (
Int_t iv=0; iv<nvar; iv++) {
1520 for (
Int_t jm=0; jm<nmeth; jm++) {
1521 varmvaMatS(iv,jm) = (*corrMatS)(nmeth+iv,jm);
1522 varmvaMatB(iv,jm) = (*corrMatB)(nmeth+iv,jm);
1528 Log() <<
kINFO <<
"Inter-MVA correlation matrix (signal):" <<
Endl;
1532 Log() <<
kINFO <<
"Inter-MVA correlation matrix (background):" <<
Endl;
1537 Log() <<
kINFO <<
"Correlations between input variables and MVA response (signal):" <<
Endl;
1541 Log() <<
kINFO <<
"Correlations between input variables and MVA response (background):" <<
Endl;
1545 else Log() <<
kWARNING <<
"<TestAllMethods> cannot compute correlation matrices" <<
Endl;
1548 Log() <<
kINFO <<
"The following \"overlap\" matrices contain the fraction of events for which " <<
Endl;
1549 Log() <<
kINFO <<
"the MVAs 'i' and 'j' have returned conform answers about \"signal-likeness\"" <<
Endl;
1550 Log() <<
kINFO <<
"An event is signal-like, if its MVA output exceeds the following value:" <<
Endl;
1552 Log() <<
kINFO <<
"which correspond to the working point: eff(signal) = 1 - eff(background)" <<
Endl;
1556 Log() <<
kINFO <<
"Note: no correlations and overlap with cut method are provided at present" <<
Endl;
1560 Log() <<
kINFO <<
"Inter-MVA overlap matrix (signal):" <<
Endl;
1564 Log() <<
kINFO <<
"Inter-MVA overlap matrix (background):" <<
Endl;
1588 TString hLine =
"-------------------------------------------------------------------------";
1589 Log() <<
kINFO <<
"Evaluation results ranked by smallest RMS on test sample:" <<
Endl;
1590 Log() <<
kINFO <<
"(\"Bias\" quotes the mean deviation of the regression from true target." <<
Endl;
1591 Log() <<
kINFO <<
" \"MutInf\" is the \"Mutual Information\" between regression and target." <<
Endl;
1592 Log() <<
kINFO <<
" Indicated by \"_T\" are the corresponding \"truncated\" quantities ob-" <<
Endl;
1593 Log() <<
kINFO <<
" tained when removing events deviating more than 2sigma from average.)" <<
Endl;
1595 Log() <<
kINFO <<
"MVA Method: <Bias> <Bias_T> RMS RMS_T | MutInf MutInf_T" <<
Endl;
1598 for (
Int_t i=0; i<nmeth_used[0]; i++) {
1599 Log() <<
kINFO <<
Form(
"%-15s:%#9.3g%#9.3g%#9.3g%#9.3g | %#5.3f %#5.3f",
1600 (
const char*)mname[0][i],
1601 biastest[0][i], biastestT[0][i],
1602 rmstest[0][i], rmstestT[0][i],
1603 minftest[0][i], minftestT[0][i] )
1608 Log() <<
kINFO <<
"Evaluation results ranked by smallest RMS on training sample:" <<
Endl;
1611 Log() <<
kINFO <<
"MVA Method: <Bias> <Bias_T> RMS RMS_T | MutInf MutInf_T" <<
Endl;
1614 for (
Int_t i=0; i<nmeth_used[0]; i++) {
1615 Log() <<
kINFO <<
Form(
"%-15s:%#9.3g%#9.3g%#9.3g%#9.3g | %#5.3f %#5.3f",
1616 (
const char*)mname[0][i],
1617 biastrain[0][i], biastrainT[0][i],
1618 rmstrain[0][i], rmstrainT[0][i],
1619 minftrain[0][i], minftrainT[0][i] )
1625 else if( doMulticlass ){
1627 TString hLine =
"--------------------------------------------------------------------------------";
1628 Log() <<
kINFO <<
"Evaluation results ranked by best signal efficiency times signal purity " <<
Endl;
1630 TString header=
"MVA Method ";
1636 for (
Int_t i=0; i<nmeth_used[0]; i++) {
1637 TString res =
Form(
"%-15s",(
const char*)mname[0][i]);
1639 res +=
Form(
"%#1.3f ",(multiclass_testEff[i][icls])*(multiclass_testPur[i][icls]));
1649 TString hLine =
"--------------------------------------------------------------------------------";
1650 Log() <<
kINFO <<
"Evaluation results ranked by best signal efficiency and purity (area)" <<
Endl;
1652 Log() <<
kINFO <<
"MVA Signal efficiency at bkg eff.(error): | Sepa- Signifi- " <<
Endl;
1653 Log() <<
kINFO <<
"Method: @B=0.01 @B=0.10 @B=0.30 ROC-integ. | ration: cance: " <<
Endl;
1655 for (
Int_t k=0; k<2; k++) {
1656 if (k == 1 && nmeth_used[k] > 0) {
1658 Log() <<
kINFO <<
"Input Variables: " << Endl << hLine <<
Endl;
1660 for (
Int_t i=0; i<nmeth_used[k]; i++) {
1661 if (k == 1) mname[k][i].ReplaceAll(
"Variable_",
"" );
1662 if (
sep[k][i] < 0 || sig[k][i] < 0) {
1664 Log() <<
kINFO <<
Form(
"%-15s: %#1.3f(%02i) %#1.3f(%02i) %#1.3f(%02i) %#1.3f | -- --",
1665 (
const char*)mname[k][i],
1666 eff01[k][i],
Int_t(1000*eff01err[k][i]),
1667 eff10[k][i],
Int_t(1000*eff10err[k][i]),
1668 eff30[k][i],
Int_t(1000*eff30err[k][i]),
1669 effArea[k][i]) <<
Endl;
1672 Log() <<
kINFO <<
Form(
"%-15s: %#1.3f(%02i) %#1.3f(%02i) %#1.3f(%02i) %#1.3f | %#1.3f %#1.3f",
1673 (
const char*)mname[k][i],
1674 eff01[k][i],
Int_t(1000*eff01err[k][i]),
1675 eff10[k][i],
Int_t(1000*eff10err[k][i]),
1676 eff30[k][i],
Int_t(1000*eff30err[k][i]),
1678 sep[k][i], sig[k][i]) <<
Endl;
1684 Log() <<
kINFO <<
"Testing efficiency compared to training efficiency (overtraining check)" <<
Endl;
1686 Log() <<
kINFO <<
"MVA Signal efficiency: from test sample (from training sample) " <<
Endl;
1687 Log() <<
kINFO <<
"Method: @B=0.01 @B=0.10 @B=0.30 " <<
Endl;
1689 for (
Int_t k=0; k<2; k++) {
1690 if (k == 1 && nmeth_used[k] > 0) {
1692 Log() <<
kINFO <<
"Input Variables: " << Endl << hLine <<
Endl;
1694 for (
Int_t i=0; i<nmeth_used[k]; i++) {
1695 if (k == 1) mname[k][i].ReplaceAll(
"Variable_",
"" );
1696 Log() <<
kINFO <<
Form(
"%-15s: %#1.3f (%#1.3f) %#1.3f (%#1.3f) %#1.3f (%#1.3f)",
1697 (
const char*)mname[k][i],
1698 eff01[k][i],trainEff01[k][i],
1699 eff10[k][i],trainEff10[k][i],
1700 eff30[k][i],trainEff30[k][i]) <<
Endl;
IMethod * Create(const std::string &name, const TString &job, const TString &title, DataSetInfo &dsi, const TString &option)
creates the method if needed based on the method name using the creator function the factory has stor...
static ClassifierFactory & Instance()
access to the ClassifierFactory singleton creates the instance if needed
DataSetInfo * GetDataSetInfo(const TString &dsiName)
returns datasetinfo object for given name
virtual const char * GetName() const
Returns name of object.
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
virtual void MakeClass(const TString &classFileName=TString("")) const
create reader class for method (classification only at present)
UInt_t GetNVariables() const
Int_t fATreeType
for each class: tmp tree if user wants to assign the events directly
Principal Components Analysis (PCA)
void AddSignalTrainingEvent(const std::vector< Double_t > &event, Double_t weight=1.0)
add signal training event
void SetInputTrees(const TString &signalFileName, const TString &backgroundFileName, Double_t signalWeight=1.0, Double_t backgroundWeight=1.0)
static TDirectory * RootBaseDir()
void OptimizeAllMethods(TString fomType="ROCIntegral", TString fitType="FitGA")
iterates through all booked methods and sees if they use parameter tuning and if so.
static Vc_ALWAYS_INLINE int_v min(const int_v &x, const int_v &y)
MsgLogger & Endl(MsgLogger &ml)
void AddOutput(Types::ETreeType type, Types::EAnalysisType analysisType)
VariableInfo & AddTarget(const TString &expression, const TString &title, const TString &unit, Double_t min, Double_t max, Bool_t normalized=kTRUE, void *external=0)
add a variable (can be a complex expression) to the set of variables used in the MV analysis ...
static void CreateVariableTransforms(const TString &trafoDefinition, TMVA::DataSetInfo &dataInfo, TMVA::TransformationHandler &transformationHandler, TMVA::MsgLogger &log)
create variable transformations
std::vector< TMVA::VariableTransformBase * > fDefaultTrfs
DataInputHandler * fDataInputHandler
ROOT output file.
std::vector< TTree * > fTestAssignTree
for each class: tmp tree if user wants to assign the events directly
std::vector< VariableInfo > & GetSpectatorInfos()
void SetCut(const TCut &cut, const TString &className)
set the cut for the classes
TString & ReplaceAll(const TString &s1, const TString &s2)
virtual void WriteEvaluationHistosToFile(Types::ETreeType treetype)
writes all MVA evaluation histograms to file
void SetSignalWeightExpression(const TString &variable)
virtual std::map< TString, Double_t > OptimizeTuningParameters(TString fomType="ROCIntegral", TString fitType="FitGA")
call the Optimzier with the set of paremeters and ranges that are meant to be tuned.
void SetInputVariables(std::vector< TString > *theVariables)
fill input variables in data set
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format...
Double_t background(Double_t *x, Double_t *par)
Bool_t Verbose(void) const
virtual void MakeClass(const TString &classFileName=TString("")) const =0
void AddBackgroundTrainingEvent(const std::vector< Double_t > &event, Double_t weight=1.0)
add signal training event
TString fTransformations
option string given by construction (presently only "V")
void AddSpectator(const TString &expression, const TString &title="", const TString &unit="", Double_t min=0, Double_t max=0)
user inserts target in data set info
std::vector< TTree * > fTrainAssignTree
flags for data assigning
void ToLower()
Change string to lower-case.
void AddVariable(const TString &expression, const TString &title, const TString &unit, char type='F', Double_t min=0, Double_t max=0)
user inserts discriminating variable in data set info
void TrainAllMethods()
iterates through all booked methods and calls training
virtual void TestMulticlass()
test multiclass classification
UInt_t GetNClasses() const
DataSetInfo & DefaultDataSetInfo()
default creation
void AddCut(const TCut &cut, const TString &className)
set the cut for the classes
const std::vector< Event * > & GetEventCollection(Types::ETreeType type=Types::kMaxTreeType) const
const TString & GetLabel() const
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)=0
static TFile * fgTargetFile
void AddTrainingEvent(const TString &className, const std::vector< Double_t > &event, Double_t weight)
add signal training event
virtual Double_t GetROCIntegral(TH1D *histS, TH1D *histB) const
calculate the area (integral) under the ROC curve as a overall quality measure of the classification ...
void AddTarget(const TString &expression, const TString &title="", const TString &unit="", Double_t min=0, Double_t max=0)
user inserts target in data set info
void SetSignalTree(TTree *signal, Double_t weight=1.0)
void WriteDataInformation()
put correlations of input data and a few (default + user selected) transformations into the root file...
MVector fMethods
verbose mode
void AddTestEvent(const TString &className, const std::vector< Double_t > &event, Double_t weight)
add signal test event
void AddBackgroundTree(TTree *background, Double_t weight=1.0, Types::ETreeType treetype=Types::kMaxTreeType)
number of signal events (used to compute significance)
VariableInfo & AddSpectator(const TString &expression, const TString &title, const TString &unit, Double_t min, Double_t max, char type='F', Bool_t normalized=kTRUE, void *external=0)
add a spectator (can be a complex expression) to the set of spectator variables used in the MV analys...
void AddEvent(const TString &className, Types::ETreeType tt, const std::vector< Double_t > &event, Double_t weight)
add event vector event : the order of values is: variables + targets + spectators ...
static Types & Instance()
the the single instance of "Types" if existin already, or create it (Signleton)
static void InhibitOutput()
static void SetIsTraining(Bool_t)
when this static function is called, it sets the flag whether events with negative event weight shoul...
const TString & GetName() const
TH2 * CreateCorrelationMatrixHist(const TMatrixD *m, const TString &hName, const TString &hTitle) const
TTree * GetTree(Types::ETreeType type)
create the test/trainings tree with all the variables, the weights, the classes, the targets...
void ReadStateFromFile()
Function to write options and weights to file.
void EvaluateAllVariables(TString options="")
iterates over all MVA input varables and evaluates them
void PrintHelpMessage() const
prints out method-specific help method
std::vector< std::vector< double > > Data
virtual void ParseOptions()
options parser
void SetupMethod()
setup of methods
DataSetInfo & DataInfo() const
Bool_t DoRegression() const
void SetMinType(EMsgType minType)
TTree * CreateEventAssignTrees(const TString &name)
create the data assignment tree (for event-wise data assignment by user)
void SetDrawProgressBar(Bool_t d)
Long64_t GetNTrainingEvents() const
std::vector< VariableInfo > & GetTargetInfos()
virtual Double_t GetEfficiency(const TString &, Types::ETreeType, Double_t &err)
fill background efficiency (resp.
virtual std::vector< Float_t > GetMulticlassEfficiency(std::vector< std::vector< Float_t > > &purity)
DataInputHandler & DataInput()
A specialized string object used for TTree selections.
TMatrixT< Double_t > TMatrixD
Bool_t DoMulticlass() const
Bool_t UserAssignEvents(UInt_t clIndex)
const Int_t MinNoTrainingEvents
virtual ~Factory()
destructor delete fATreeEvent;
void AddBackgroundTestEvent(const std::vector< Double_t > &event, Double_t weight=1.0)
add signal training event
void SetCut(const TString &cut, const TString &className="")
void SetSplitOptions(const TString &so)
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
DataSetInfo & AddDataSet(DataSetInfo &)
UInt_t GetNTargets() const
Results * GetResults(const TString &, Types::ETreeType type, Types::EAnalysisType analysistype)
TString info(resultsName+"/"); switch(type) { case Types::kTraining: info += "kTraining/"; break; cas...
std::string GetMethodName(TCppMethod_t)
Service class for 2-Dim histogram classes.
MethodBase * BookMethod(TString theMethodName, TString methodTitle, TString theOption="")
Book a classifier or regression method.
virtual void AddRow(const Double_t *x)
Add a data point and update the covariance matrix.
const char * GetName() const
Returns name of object.
Long64_t GetNEvtSigTest()
return number of signal test events in dataset
ClassInfo * GetClassInfo(Int_t clNum) const
const TMatrixD * CorrelationMatrix(const TString &className) const
void EvaluateAllMethods(void)
iterates over all MVAs that have been booked, and calls their evaluation methods
Bool_t BeginsWith(const char *s, ECaseCompare cmp=kExact) const
void SetWeightExpression(const TString &exp, const TString &className="")
set the weight expressions for the classes if class name is specified, set only for this class if cla...
DataSetInfo & AddDataSetInfo(DataSetInfo &dsi)
stores a copy of the dataset info object
char * Form(const char *fmt,...)
DataSetManager * fDataSetManager
const TMatrixD * GetCovarianceMatrix() const
void AddCut(const TString &cut, const TString &className="")
const TString & GetMethodName() const
virtual void MakeClass(const TString &methodTitle="") const
Print predefined help message of classifier iterate over methods and test.
void SetBackgroundWeightExpression(const TString &variable)
void Greetings()
print welcome message options are: kLogoWelcomeMsg, kIsometricWelcomeMsg, kLeanWelcomeMsg ...
virtual void MakePrincipals()
Perform the principal components analysis.
void SetBoostedMethodName(TString methodName)
void SetVerbose(Bool_t v=kTRUE)
void PrintClasses() const
virtual Double_t GetSignificance() const
compute significance of mean difference significance = |<S> - |/Sqrt(RMS_S2 + RMS_B2) ...
Long64_t GetNEvtBkgdTest()
return number of background test events in dataset
TString GetWeightFileName() const
retrieve weight file name
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
virtual void Print() const
get maximum length of variable names
void SetInputTreesFromEventAssignTrees()
assign event-wise local trees to data set
virtual void SetDirectory(TDirectory *dir)
Change the tree's directory.
virtual void CheckSetup()
check may be overridden by derived class (sometimes, eg, fitters are used which can only be implement...
virtual void PrintHelpMessage() const =0
virtual Double_t GetSeparation(TH1 *, TH1 *) const
compute "separation" defined as <s2> = (1/2) Int_-oo..+oo { (S(x) - B(x))^2/(S(x) + B(x)) dx } ...
void AddTree(TTree *tree, const TString &className, Double_t weight=1.0, const TCut &cut="", Types::ETreeType tt=Types::kMaxTreeType)
virtual Long64_t ReadFile(const char *filename, const char *branchDescriptor="", char delimiter=' ')
Create or simply read branches from filename.
static void DestroyInstance()
static function: destroy TMVA instance
void SetCurrentType(Types::ETreeType type) const
Bool_t Contains(const char *pat, ECaseCompare cmp=kExact) const
VariableInfo & GetVariableInfo(Int_t i)
void AddPreDefVal(const T &)
ClassInfo * AddClass(const TString &className)
virtual Long64_t GetEntries() const
void ProcessSetup()
process all options the "CheckForUnusedOptions" is done in an independent call, since it may be overr...
static Vc_ALWAYS_INLINE int_v max(const int_v &x, const int_v &y)
const TString & GetOptions() const
void AddSignalTree(TTree *signal, Double_t weight=1.0, Types::ETreeType treetype=Types::kMaxTreeType)
number of signal events (used to compute significance)
void SetUseColor(Bool_t uc)
void SetConfigName(const char *n)
virtual Int_t Branch(TCollection *list, Int_t bufsize=32000, Int_t splitlevel=99, const char *name="")
Create one branch for each element in the collection.
virtual void TestRegression(Double_t &bias, Double_t &biasT, Double_t &dev, Double_t &devT, Double_t &rms, Double_t &rmsT, Double_t &mInf, Double_t &mInfT, Double_t &corr, Types::ETreeType type)
calculate <sum-of-deviation-squared> of regression output versus "true" value from test sample ...
DataSetManager * fDataSetManager
void SetWeightExpression(const TString &variable, const TString &className="")
Log() << kWarning << DefaultDataSetInfo().GetNClasses() /*fClasses.size()*/ << Endl;.
virtual Bool_t cd(const char *path=0)
Change current directory to "this" directory.
Factory(TString theJobName, TFile *theTargetFile, TString theOption="")
standard constructor jobname : this name will appear in all weight file names produced by the MVAs th...
TString GetMethodTypeName() const
TString fJobName
all MVA methods
Double_t GetSignalReferenceCut() const
void DeleteAllMethods(void)
delete methods
void PrintHelpMessage(const TString &methodTitle="") const
Print predefined help message of classifier iterate over methods and test.
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
virtual Double_t GetTrainingEfficiency(const TString &)
DataSetManager * fDataSetManager
VariableInfo & AddVariable(const TString &expression, const TString &title="", const TString &unit="", Double_t min=0, Double_t max=0, char varType='F', Bool_t normalized=kTRUE, void *external=0)
add a variable (can be a complex expression) to the set of variables used in the MV analysis ...
void AddSignalTestEvent(const std::vector< Double_t > &event, Double_t weight=1.0)
add signal testing event
Types::EAnalysisType GetAnalysisType() const
A TTree object has a header with a name and a title.
Types::EAnalysisType fAnalysisType
std::vector< IMethod * > MVector
virtual const char * GetName() const
Returns name of object.
const TString & GetTestvarName() const
void SetTestvarName(const TString &v="")
IMethod * GetMethod(const TString &title) const
returns pointer to MVA that corresponds to given method title
DataSet * GetDataSet() const
returns data set
Types::EMVA GetMethodType() const
void SetTree(TTree *tree, const TString &className, Double_t weight)
set background tree
void CheckForUnusedOptions() const
checks for unused options in option string
virtual void TestClassification()
initialization
const Event * GetEvent() const
std::vector< VariableInfo > & GetVariableInfos()
void PrepareTrainingAndTestTree(const TCut &cut, const TString &splitOpt)
prepare the training and test trees -> same cuts for signal and background
virtual void SetAnalysisType(Types::EAnalysisType type)
void SetConfigDescription(const char *d)
Bool_t fVerbose
List of transformations to test.
const char * Data() const
void SetBackgroundTree(TTree *background, Double_t weight=1.0)