103 fMinLinCorrForFisher (1),
104 fUseExclusiveVars (
kTRUE),
110 fUseSearchTree(kFALSE),
112 fPruneMethod (kNoPruning),
113 fNNodesBeforePruning(0),
114 fNodePurityLimit(0.5),
115 fRandomisedTree (kFALSE),
117 fUsePoissonNvars(kFALSE),
122 fAnalysisType (
Types::kClassification),
140 fMinLinCorrForFisher (1),
141 fUseExclusiveVars (
kTRUE),
145 fMinNodeSize (minSize),
149 fPruneMethod (kNoPruning),
150 fNNodesBeforePruning(0),
151 fNodePurityLimit(purityLimit),
152 fRandomisedTree (randomisedTree),
153 fUseNvars (useNvars),
154 fUsePoissonNvars(usePoissonNvars),
156 fMaxDepth (nMaxDepth),
159 fAnalysisType (
Types::kClassification),
160 fDataSetInfo (dataInfo)
162 if (sepType ==
NULL) {
169 Log() <<
kWARNING <<
" You had choosen the training mode using optimal cuts, not\n" 170 <<
" based on a grid of " <<
fNCuts <<
" by setting the option NCuts < 0\n" 171 <<
" as this doesn't exist yet, I set it to " <<
fNCuts <<
" and use the grid" 235 Log() <<
kFATAL <<
"SetParentTreeNodes: started with undefined ROOT node" <<
Endl;
241 Log() <<
kFATAL <<
" Node with only one daughter?? Something went wrong" <<
Endl;
244 Log() <<
kFATAL <<
" Node with only one daughter?? Something went wrong" <<
Endl;
264 std::string
type(
"");
268 dt->
ReadXML( node, tmva_Version_Code );
291 Log() <<
kINFO <<
"The minimal node size MinNodeSize=" <<
fMinNodeSize <<
" fMinNodeSize="<<
fMinNodeSize<<
"% is translated to an actual number of events = "<<
fMinSize<<
" for the training sample size of " << eventSample.size() <<
Endl;
292 Log() <<
kINFO <<
"Note: This number will be taken as absolute minimum in the node, " <<
Endl;
293 Log() <<
kINFO <<
" in terms of 'weighted events' and unweighted ones !! " <<
Endl;
297 UInt_t nevents = eventSample.size();
300 if (
fNvars==0)
fNvars = eventSample[0]->GetNVariables();
303 else Log() <<
kFATAL <<
":<BuildTree> eventsample Size == 0 " <<
Endl;
312 xmin[ivar]=xmax[ivar]=0;
314 for (
UInt_t iev=0; iev<eventSample.size(); iev++) {
331 target2+=weight*tgt*tgt;
336 if (iev==0) xmin[ivar]=xmax[ivar]=val;
337 if (val < xmin[ivar]) xmin[ivar]=val;
338 if (val > xmax[ivar]) xmax[ivar]=val;
344 Log() <<
kWARNING <<
" One of the Decision Tree nodes has negative total number of signal or background events. " 345 <<
"(Nsig="<<s<<
" Nbkg="<<b<<
" Probaby you use a Monte Carlo with negative weights. That should in principle " 346 <<
"be fine as long as on average you end up with something positive. For this you have to make sure that the " 347 <<
"minimul number of (unweighted) events demanded for a tree node (currently you use: MinNodeSize="<<
fMinNodeSize 348 <<
"% of training events, you can set this via the BDT option string when booking the classifier) is large enough " 349 <<
"to allow for reasonable averaging!!!" << Endl
350 <<
" If this does not help.. maybe you want to try the option: NoNegWeightsInTraining which ignores events " 351 <<
"with negative weight in the training." <<
Endl;
353 for (
UInt_t i=0; i<eventSample.size(); i++) {
355 nBkg += eventSample[i]->GetWeight();
356 Log() <<
kDEBUG <<
"Event "<< i<<
" has (original) weight: " << eventSample[i]->GetWeight()/eventSample[i]->GetBoostWeight()
357 <<
" boostWeight: " << eventSample[i]->GetBoostWeight() <<
Endl;
424 std::vector<const TMVA::Event*> leftSample; leftSample.reserve(nevents);
425 std::vector<const TMVA::Event*> rightSample; rightSample.reserve(nevents);
428 Double_t nRightUnBoosted=0, nLeftUnBoosted=0;
430 for (
UInt_t ie=0; ie< nevents ; ie++) {
432 rightSample.push_back(eventSample[ie]);
433 nRight += eventSample[ie]->GetWeight();
434 nRightUnBoosted += eventSample[ie]->GetOriginalWeight();
437 leftSample.push_back(eventSample[ie]);
438 nLeft += eventSample[ie]->GetWeight();
439 nLeftUnBoosted += eventSample[ie]->GetOriginalWeight();
450 if (leftSample.empty() || rightSample.empty()) {
451 Log() <<
kERROR <<
"<TrainNode> all events went to the same branch" << Endl
452 <<
"--- Hence new node == old node ... check" << Endl
453 <<
"--- left:" << leftSample.size()
454 <<
" right:" << rightSample.size() << Endl
455 <<
" while the separation is thought to be " << separationGain
456 <<
kFATAL <<
"--- this should never happen, please write a bug report to Helge.Voss@cern.ch" <<
Endl;
518 for (
UInt_t i=0; i<eventSample.size(); i++) {
552 this->
FillEvent(event,dynamic_cast<TMVA::DecisionTreeNode*>(node->
GetLeft())) ;
621 Log() <<
kFATAL <<
"Selected pruning method not yet implemented " 625 if(!tool)
return 0.0;
629 if(validationSample ==
NULL){
630 Log() <<
kFATAL <<
"Cannot automate the pruning algorithm without an " 631 <<
"independent validation sample!" <<
Endl;
632 }
else if(validationSample->size() == 0) {
633 Log() <<
kFATAL <<
"Cannot automate the pruning algorithm with " 634 <<
"independent validation sample of ZERO events!" <<
Endl;
641 Log() <<
kFATAL <<
"Error pruning tree! Check prune.log for more information." 661 return pruneStrength;
674 for (
UInt_t ievt=0; ievt < validationSample->size(); ievt++) {
690 Log() <<
kFATAL <<
"TestPrunedTreeQuality: started with undefined ROOT node" <<
Endl;
711 else if ( mode == 1 ) {
716 throw std::string(
"Unknown ValidationQualityMode");
730 if (current ==
NULL) {
731 Log() <<
kFATAL <<
"CheckEventWithPrunedTree: started with undefined ROOT node" <<
Endl;
734 while(current !=
NULL) {
763 for( EventConstList::const_iterator it = validationSample->begin();
764 it != validationSample->end(); ++it ) {
765 sumWeights += (*it)->GetWeight();
780 Log() <<
kFATAL <<
"CountLeafNodes: started with undefined ROOT node" <<
Endl;
809 Log() <<
kFATAL <<
"DescendTree: started with undefined ROOT node" <<
Endl;
818 Log() <<
kFATAL <<
" Node with only one daughter?? Something went wrong" <<
Endl;
822 Log() <<
kFATAL <<
" Node with only one daughter?? Something went wrong" <<
Endl;
862 if(node ==
NULL)
return;
879 for (
UInt_t i =0; i < depth; i++) {
902 while (nSelectedVars < useNvars) {
907 if (useVariable[ivar] ==
kTRUE) {
908 mapVariable[nSelectedVars] = ivar;
913 if (nSelectedVars != useNvars) { std::cout <<
"Bug in TrainNode - GetRandisedVariables()... sorry" << std::endl; std::exit(1);}
928 Double_t separationGainTotal = -1, sepTmp;
933 separationGain[ivar]=-1;
939 Int_t nTotS_unWeighted, nTotB_unWeighted;
940 UInt_t nevents = eventSample.size();
948 std::vector<Double_t> fisherCoeff;
956 useVariable[ivar] =
kTRUE;
957 mapVariable[ivar] = ivar;
971 useVarInFisher[ivar] =
kFALSE;
972 mapVarInFisher[ivar] = ivar;
975 std::vector<TMatrixDSym*>* covMatrices;
978 Log() <<
kWARNING <<
" in TrainNodeFast, the covariance Matrices needed for the Fisher-Cuts returned error --> revert to just normal cuts for this node" <<
Endl;
990 useVarInFisher[ivar] =
kTRUE;
991 useVarInFisher[jvar] =
kTRUE;
1002 if (useVarInFisher[ivar] && useVariable[ivar]) {
1003 mapVarInFisher[nFisherVars++]=ivar;
1014 delete [] useVarInFisher;
1015 delete [] mapVarInFisher;
1033 for (
UInt_t ivar=0; ivar<cNvars; ivar++) {
1041 nSelS[ivar] =
new Double_t [nBins[ivar]];
1042 nSelB[ivar] =
new Double_t [nBins[ivar]];
1043 nSelS_unWeighted[ivar] =
new Double_t [nBins[ivar]];
1044 nSelB_unWeighted[ivar] =
new Double_t [nBins[ivar]];
1045 target[ivar] =
new Double_t [nBins[ivar]];
1046 target2[ivar] =
new Double_t [nBins[ivar]];
1047 cutValues[ivar] =
new Double_t [nBins[ivar]];
1054 for (
UInt_t ivar=0; ivar < cNvars; ivar++) {
1061 useVariable[ivar]=
kFALSE;
1069 for (
UInt_t iev=0; iev<nevents; iev++) {
1073 result += fisherCoeff[jvar]*(eventSample[iev])->GetValue(jvar);
1074 if (result > xmax[ivar]) xmax[ivar]=
result;
1075 if (result < xmin[ivar]) xmin[ivar]=
result;
1078 for (
UInt_t ibin=0; ibin<nBins[ivar]; ibin++) {
1079 nSelS[ivar][ibin]=0;
1080 nSelB[ivar][ibin]=0;
1081 nSelS_unWeighted[ivar][ibin]=0;
1082 nSelB_unWeighted[ivar][ibin]=0;
1083 target[ivar][ibin]=0;
1084 target2[ivar][ibin]=0;
1085 cutValues[ivar][ibin]=0;
1090 for (
UInt_t ivar=0; ivar < cNvars; ivar++) {
1092 if ( useVariable[ivar] ) {
1115 for (
UInt_t icut=0; icut<nBins[ivar]-1; icut++) {
1116 cutValues[ivar][icut]=xmin[ivar]+(
Double_t(icut+1))*istepSize;
1123 nTotS_unWeighted=0; nTotB_unWeighted=0;
1124 for (
UInt_t iev=0; iev<nevents; iev++) {
1126 Double_t eventWeight = eventSample[iev]->GetWeight();
1137 for (
UInt_t ivar=0; ivar < cNvars; ivar++) {
1140 if ( useVariable[ivar] ) {
1142 if (ivar <
fNvars) eventData = eventSample[iev]->GetValue(ivar);
1144 eventData = fisherCoeff[
fNvars];
1146 eventData += fisherCoeff[jvar]*(eventSample[iev])->GetValue(jvar);
1150 iBin =
TMath::Min(
Int_t(nBins[ivar]-1),
TMath::Max(0,
int (nBins[ivar]*(eventData-xmin[ivar])/(xmax[ivar]-xmin[ivar]) ) ));
1152 nSelS[ivar][iBin]+=eventWeight;
1153 nSelS_unWeighted[ivar][iBin]++;
1156 nSelB[ivar][iBin]+=eventWeight;
1157 nSelB_unWeighted[ivar][iBin]++;
1160 target[ivar][iBin] +=eventWeight*eventSample[iev]->GetTarget(0);
1161 target2[ivar][iBin]+=eventWeight*eventSample[iev]->GetTarget(0)*eventSample[iev]->GetTarget(0);
1167 for (
UInt_t ivar=0; ivar < cNvars; ivar++) {
1168 if (useVariable[ivar]) {
1169 for (
UInt_t ibin=1; ibin < nBins[ivar]; ibin++) {
1170 nSelS[ivar][ibin]+=nSelS[ivar][ibin-1];
1171 nSelS_unWeighted[ivar][ibin]+=nSelS_unWeighted[ivar][ibin-1];
1172 nSelB[ivar][ibin]+=nSelB[ivar][ibin-1];
1173 nSelB_unWeighted[ivar][ibin]+=nSelB_unWeighted[ivar][ibin-1];
1175 target[ivar][ibin] +=target[ivar][ibin-1] ;
1176 target2[ivar][ibin]+=target2[ivar][ibin-1];
1179 if (nSelS_unWeighted[ivar][nBins[ivar]-1] +nSelB_unWeighted[ivar][nBins[ivar]-1] != eventSample.size()) {
1180 Log() <<
kFATAL <<
"Helge, you have a bug ....nSelS_unw..+nSelB_unw..= " 1181 << nSelS_unWeighted[ivar][nBins[ivar]-1] +nSelB_unWeighted[ivar][nBins[ivar]-1]
1182 <<
" while eventsample size = " << eventSample.size()
1185 double lastBins=nSelS[ivar][nBins[ivar]-1] +nSelB[ivar][nBins[ivar]-1];
1186 double totalSum=nTotS+nTotB;
1187 if (
TMath::Abs(lastBins-totalSum)/totalSum>0.01) {
1188 Log() <<
kFATAL <<
"Helge, you have another bug ....nSelS+nSelB= " 1190 <<
" while total number of events = " << totalSum
1197 for (
UInt_t ivar=0; ivar < cNvars; ivar++) {
1198 if (useVariable[ivar]) {
1199 for (
UInt_t iBin=0; iBin<nBins[ivar]-1; iBin++) {
1211 Double_t sl = nSelS_unWeighted[ivar][iBin];
1212 Double_t bl = nSelB_unWeighted[ivar][iBin];
1230 target[ivar][iBin],target2[ivar][iBin],
1232 target[ivar][nBins[ivar]-1],target2[ivar][nBins[ivar]-1]);
1236 if (separationGain[ivar] < sepTmp) {
1237 separationGain[ivar] = sepTmp;
1238 cutIndex[ivar] = iBin;
1247 for (
UInt_t ivar=0; ivar < cNvars; ivar++) {
1248 if (useVariable[ivar] ) {
1249 if (separationGainTotal < separationGain[ivar]) {
1250 separationGainTotal = separationGain[ivar];
1259 node->
SetResponse(target[0][nBins[mxVar]-1]/(nTotS+nTotB));
1263 node->
SetRMS(
TMath::Sqrt(target2[0][nBins[mxVar]-1]/(nTotS+nTotB) - target[0][nBins[mxVar]-1]/(nTotS+nTotB)*target[0][nBins[mxVar]-1]/(nTotS+nTotB)));
1269 if (nSelS[mxVar][cutIndex[mxVar]]/nTotS > nSelB[mxVar][cutIndex[mxVar]]/nTotB) cutType=
kTRUE;
1274 node->
SetCutValue(cutValues[mxVar][cutIndex[mxVar]]);
1279 fVariableImportance[mxVar] += separationGainTotal*separationGainTotal * (nTotS+nTotB) * (nTotS+nTotB) ;
1290 fVariableImportance[ivar] += fisherCoeff[ivar]*fisherCoeff[ivar]*separationGainTotal*separationGainTotal * (nTotS+nTotB) * (nTotS+nTotB) ;
1296 separationGainTotal = 0;
1313 for (
UInt_t i=0; i<cNvars; i++) {
1316 delete [] nSelS_unWeighted[i];
1317 delete [] nSelB_unWeighted[i];
1318 delete [] target[i];
1319 delete [] target2[i];
1320 delete [] cutValues[i];
1324 delete [] nSelS_unWeighted;
1325 delete [] nSelB_unWeighted;
1328 delete [] cutValues;
1333 delete [] useVariable;
1334 delete [] mapVariable;
1336 delete [] separationGain;
1341 return separationGainTotal;
1351 std::vector<Double_t> fisherCoeff(
fNvars+1);
1374 for (
UInt_t ivar=0; ivar<nFisherVars; ivar++) { sumS[ivar] = sumB[ivar] = 0; }
1376 UInt_t nevents = eventSample.size();
1378 for (
UInt_t ievt=0; ievt<nevents; ievt++) {
1381 const Event * ev = eventSample[ievt];
1386 else sumOfWeightsB += weight;
1389 for (
UInt_t ivar=0; ivar<nFisherVars; ivar++) {
1390 sum[ivar] += ev->
GetValue( mapVarInFisher[ivar] )*weight;
1393 for (
UInt_t ivar=0; ivar<nFisherVars; ivar++) {
1394 (*meanMatx)( ivar, 2 ) = sumS[ivar];
1395 (*meanMatx)( ivar, 0 ) = sumS[ivar]/sumOfWeightsS;
1397 (*meanMatx)( ivar, 2 ) += sumB[ivar];
1398 (*meanMatx)( ivar, 1 ) = sumB[ivar]/sumOfWeightsB;
1401 (*meanMatx)( ivar, 2 ) /= (sumOfWeightsS + sumOfWeightsB);
1413 assert( sumOfWeightsS > 0 && sumOfWeightsB > 0 );
1417 const Int_t nFisherVars2 = nFisherVars*nFisherVars;
1421 memset(sum2Sig,0,nFisherVars2*
sizeof(
Double_t));
1422 memset(sum2Bgd,0,nFisherVars2*
sizeof(
Double_t));
1425 for (
UInt_t ievt=0; ievt<nevents; ievt++) {
1429 const Event* ev = eventSample.at(ievt);
1439 if ( ev->
GetClass() ==
fSigClass ) sum2Sig[k] += ( (xval[
x] - (*meanMatx)(
x, 0))*(xval[
y] - (*meanMatx)(
y, 0)) )*weight;
1440 else sum2Bgd[k] += ( (xval[
x] - (*meanMatx)(
x, 1))*(xval[
y] - (*meanMatx)(
y, 1)) )*weight;
1448 (*with)(
x,
y) = sum2Sig[k]/sumOfWeightsS + sum2Bgd[k]/sumOfWeightsB;
1468 prodSig = ( ((*meanMatx)(
x, 0) - (*meanMatx)(
x, 2))*
1469 ((*meanMatx)(
y, 0) - (*meanMatx)(
y, 2)) );
1470 prodBgd = ( ((*meanMatx)(
x, 1) - (*meanMatx)(
x, 2))*
1471 ((*meanMatx)(
y, 1) - (*meanMatx)(
y, 2)) );
1473 (*betw)(
x,
y) = (sumOfWeightsS*prodSig + sumOfWeightsB*prodBgd) / (sumOfWeightsS + sumOfWeightsB);
1482 (*cov)(
x,
y) = (*with)(
x,
y) + (*betw)(
x,
y);
1497 Log() <<
kWARNING <<
"FisherCoeff matrix is almost singular with deterninant=" 1499 <<
" did you use the variables that are linear combinations or highly correlated?" 1503 Log() <<
kFATAL <<
"FisherCoeff matrix is singular with determinant=" 1505 <<
" did you use the variables that are linear combinations?" 1512 Double_t xfact =
TMath::Sqrt( sumOfWeightsS*sumOfWeightsB ) / (sumOfWeightsS + sumOfWeightsB);
1515 std::vector<Double_t> diffMeans( nFisherVars );
1517 for (
UInt_t ivar=0; ivar<=
fNvars; ivar++) fisherCoeff[ivar] = 0;
1518 for (
UInt_t ivar=0; ivar<nFisherVars; ivar++) {
1519 for (
UInt_t jvar=0; jvar<nFisherVars; jvar++) {
1520 Double_t d = (*meanMatx)(jvar, 0) - (*meanMatx)(jvar, 1);
1521 fisherCoeff[mapVarInFisher[ivar]] += invCov(ivar, jvar)*d;
1525 fisherCoeff[mapVarInFisher[ivar]] *= xfact;
1530 for (
UInt_t ivar=0; ivar<nFisherVars; ivar++){
1531 f0 += fisherCoeff[mapVarInFisher[ivar]]*((*meanMatx)(ivar, 0) + (*meanMatx)(ivar, 1));
1535 fisherCoeff[
fNvars] = f0;
1549 Int_t nTotS_unWeighted = 0, nTotB_unWeighted = 0;
1551 std::vector<TMVA::BDTEventWrapper> bdtEventSample;
1554 std::vector<Double_t> lCutValue(
fNvars, 0.0 );
1555 std::vector<Double_t> lSepGain(
fNvars, -1.0e6 );
1556 std::vector<Char_t> lCutType(
fNvars );
1561 for( std::vector<const TMVA::Event*>::const_iterator it = eventSample.begin(); it != eventSample.end(); ++it ) {
1563 nTotS += (*it)->GetWeight();
1567 nTotB += (*it)->GetWeight();
1573 std::vector<Char_t> useVariable(
fNvars);
1582 Int_t nSelectedVars = 0;
1588 if(useVariable[ivar] ==
Char_t(
kTRUE)) nSelectedVars++;
1597 if(!useVariable[ivar])
continue;
1599 std::sort( bdtEventSample.begin(),bdtEventSample.end() );
1601 Double_t bkgWeightCtr = 0.0, sigWeightCtr = 0.0;
1602 std::vector<TMVA::BDTEventWrapper>::iterator it = bdtEventSample.begin(), it_end = bdtEventSample.end();
1603 for( ; it != it_end; ++it ) {
1605 sigWeightCtr += (**it)->GetWeight();
1607 bkgWeightCtr += (**it)->GetWeight();
1609 it->SetCumulativeWeight(
false,bkgWeightCtr);
1610 it->SetCumulativeWeight(
true,sigWeightCtr);
1616 Double_t separationGain = -1.0, sepTmp = 0.0, cutValue = 0.0, dVal = 0.0,
norm = 0.0;
1618 for( it = bdtEventSample.begin(); it != it_end; ++it ) {
1619 if( index == 0 ) { ++index;
continue; }
1620 if( *(*it) ==
NULL ) {
1621 Log() <<
kFATAL <<
"In TrainNodeFull(): have a null event! Where index=" 1622 << index <<
", and parent node=" << node->
GetParent() <<
Endl;
1625 dVal = bdtEventSample[index].GetVal() - bdtEventSample[index-1].GetVal();
1626 norm =
TMath::Abs(bdtEventSample[index].GetVal() + bdtEventSample[index-1].GetVal());
1630 sepTmp =
fSepType->
GetSeparationGain( it->GetCumulativeWeight(
true), it->GetCumulativeWeight(
false), sigWeightCtr, bkgWeightCtr );
1631 if( sepTmp > separationGain ) {
1632 separationGain = sepTmp;
1633 cutValue = it->GetVal() - 0.5*dVal;
1634 Double_t nSelS = it->GetCumulativeWeight(
true);
1635 Double_t nSelB = it->GetCumulativeWeight(
false);
1638 if( nSelS/sigWeightCtr > nSelB/bkgWeightCtr ) cutType =
kTRUE;
1644 lCutType[ivar] =
Char_t(cutType);
1645 lCutValue[ivar] = cutValue;
1646 lSepGain[ivar] = separationGain;
1650 Int_t iVarIndex = -1;
1652 if( lSepGain[ivar] > separationGain ) {
1654 separationGain = lSepGain[ivar];
1658 if(iVarIndex >= 0) {
1663 fVariableImportance[iVarIndex] += separationGain*separationGain * (nTotS+nTotB) * (nTotS+nTotB);
1666 separationGain = 0.0;
1669 return separationGain;
1697 Log() <<
kFATAL <<
"CheckEvent: started with undefined ROOT node" <<
Endl;
1706 Log() <<
kFATAL <<
"DT::CheckEvent: inconsistent tree structure" <<
Endl;
1725 Double_t sumsig=0, sumbkg=0, sumtot=0;
1726 for (
UInt_t ievt=0; ievt<eventSample.size(); ievt++) {
1727 if (eventSample[ievt]->
GetClass() !=
fSigClass) sumbkg+=eventSample[ievt]->GetWeight();
1728 else sumsig+=eventSample[ievt]->GetWeight();
1729 sumtot+=eventSample[ievt]->GetWeight();
1732 if (sumtot!= (sumsig+sumbkg)){
1733 Log() <<
kFATAL <<
"<SamplePurity> sumtot != sumsig+sumbkg" 1734 << sumtot <<
" " << sumsig <<
" " << sumbkg <<
Endl;
1736 if (sumtot>0)
return sumsig/(sumsig + sumbkg);
1748 std::vector<Double_t> relativeImportance(
fNvars);
1757 relativeImportance[i] /= sum;
1759 relativeImportance[i] = 0;
1761 return relativeImportance;
1770 if (ivar <
fNvars)
return relativeImportance[ivar];
1773 <<
"--- ivar = " << ivar <<
" is out of range " <<
Endl;
void SetNTerminal(Int_t n)
Double_t PruneStrength
quality measure for a pruned subtree T of T_max
DataSetInfo * fDataSetInfo
Random number generator class based on M.
void SetSelector(Short_t i)
MsgLogger & Endl(MsgLogger &ml)
void SetFisherCoeff(Int_t ivar, Double_t coeff)
set fisher coefficients
Float_t GetSumTarget() const
Double_t CheckEvent(const TMVA::Event *, Bool_t UseYesNoLeaf=kFALSE) const
the event e is put into the decision tree (starting at the root node) and the output is NodeType (sig...
Double_t GetNodePurityLimit() const
EPruneMethod fPruneMethod
virtual DecisionTreeNode * GetParent() const
void IncrementNEvents_unweighted()
Float_t GetSumTarget2() const
Double_t GetSeparationGain(const Double_t &nLeft, const Double_t &targetLeft, const Double_t &target2Left, const Double_t &nTot, const Double_t &targetTot, const Double_t &target2Tot)
Separation Gain: the measure of how the quality of separation of the sample increases by splitting th...
void IncrementNEvents(Float_t nev)
Float_t GetSampleMax(UInt_t ivar) const
return the maximum of variable ivar from the training sample that pass/end up in this node ...
Short_t Min(Short_t a, Short_t b)
std::vector< Double_t > GetFisherCoefficients(const EventConstList &eventSample, UInt_t nFisherVars, UInt_t *mapVarInFisher)
calculate the fisher coefficients for the event sample and the variables used
std::vector< DecisionTreeNode * > PruneSequence
the regularization parameter for pruning
Bool_t IsTerminal() const
virtual void SetParentTree(TMVA::BinaryTree *t)
Double_t fNodePurityLimit
virtual void SetRight(Node *r)
virtual ~DecisionTree(void)
destructor
Double_t TestPrunedTreeQuality(const DecisionTreeNode *dt=NULL, Int_t mode=0) const
return the misclassification rate of a pruned tree a "pruned tree" may have set the variable "IsTermi...
Float_t GetNSigEvents(void) const
virtual Double_t Determinant() const
Return the matrix determinant.
virtual DecisionTreeNode * GetRoot() const
void CheckEventWithPrunedTree(const TMVA::Event *) const
pass a single validation event throught a pruned decision tree on the way down the tree...
void DeleteNode(Node *)
protected, recursive, function used by the class destructor and when Pruning
void SetNSigEvents_unweighted(Float_t s)
void SetResponse(Float_t r)
void SetNBValidation(Double_t b)
std::vector< Double_t > GetVariableImportance()
Return the relative variable importance, normalized to all variables together having the importance 1...
virtual Double_t GetSeparationIndex(const Double_t &n, const Double_t &target, const Double_t &target2)
Separation Index: a simple Variance.
void SetNFisherCoeff(Int_t nvars)
std::vector< const TMVA::Event * > EventConstList
static const Int_t fgRandomSeed
Float_t GetNBkgEvents(void) const
void FillTree(const EventList &eventSample)
Float_t GetSampleMin(UInt_t ivar) const
return the minimum of variable ivar from the training sample that pass/end up in this node ...
void IncrementNBkgEvents(Float_t b)
Double_t SamplePurity(EventList eventSample)
calculates the purity S/(S+B) of a given event sample
Double_t GetNodeR() const
std::vector< Double_t > fVariableImportance
void SetSeparationGain(Float_t sep)
Double_t GetSumWeights(const EventConstList *validationSample) const
calculate the normalization factor for a pruning validation sample
void ResetValidationData()
temporary stored node values (number of events, etc.) that originate not from the training but from t...
void SetNBkgEvents(Float_t b)
void SetNSValidation(Double_t s)
virtual Double_t Rndm(Int_t i=0)
Machine independent random number generator.
UInt_t CountLeafNodes(TMVA::Node *n=NULL)
return the number of terminal nodes in the sub-tree below Node n
void AddToSumTarget(Float_t t)
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
Double_t TrainNodeFast(const EventConstList &eventSample, DecisionTreeNode *node)
Decide how to split a node using one of the variables that gives the best separation of signal/backgr...
void DescendTree(Node *n=NULL)
descend a tree to find all its leaf nodes
TMatrixT< Element > & Invert(Double_t *det=0)
Invert the matrix and calculate its determinant.
void FillEvent(const TMVA::Event &event, TMVA::DecisionTreeNode *node)
fill the existing the decision tree structure by filling event in from the top node and see where the...
UInt_t GetNTargets() const
accessor to the number of targets
void SetNEvents(Float_t nev)
TMatrixT< Double_t > TMatrixD
Bool_t DoRegression() const
Double_t fMinLinCorrForFisher
void SetTotalTreeDepth(Int_t depth)
Float_t GetTarget(UInt_t itgt) const
Int_t GetNodeType(void) const
void SetSubTreeR(Double_t r)
virtual void SetLeft(Node *l)
void SetAlpha(Double_t alpha)
UInt_t CleanTree(DecisionTreeNode *node=NULL)
remove those last splits that result in two leaf nodes that are both of the type (i.e.
void SetSampleMin(UInt_t ivar, Float_t xmin)
set the minimum of variable ivar from the training sample that pass/end up in this node ...
void SetCutValue(Float_t c)
void GetRandomisedVariables(Bool_t *useVariable, UInt_t *variableMap, UInt_t &nVars)
Double_t TrainNodeFull(const EventConstList &eventSample, DecisionTreeNode *node)
void SetParentTreeInNodes(Node *n=NULL)
descend a tree to find all its leaf nodes, fill max depth reached in the tree at the same time...
void SetPurity(void)
return the S/(S+B) (purity) for the node REM: even if nodes with purity 0.01 are very PURE background...
void SetCutType(Bool_t t)
void IncrementNSigEvents_unweighted()
virtual void ReadXML(void *node, UInt_t tmva_Version_Code=TMVA_VERSION_CODE)
read attributes from XML
void PruneNodeInPlace(TMVA::DecisionTreeNode *node)
prune a node temporaily (without actually deleting its decendants which allows testing the pruned tre...
TMVA::DecisionTreeNode * GetEventNode(const TMVA::Event &e) const
get the pointer to the leaf node where a particular event ends up in...
void ApplyValidationSample(const EventConstList *validationSample) const
run the validation sample through the (pruned) tree and fill in the nodes the variables NSValidation ...
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
static void SetVarIndex(Int_t iVar)
void AddToSumTarget2(Float_t t2)
virtual Double_t GetSeparationGain(const Double_t &nSelS, const Double_t &nSelB, const Double_t &nTotS, const Double_t &nTotB)
Separation Gain: the measure of how the quality of separation of the sample increases by splitting th...
Float_t GetPurity(void) const
void SetSampleMax(UInt_t ivar, Float_t xmax)
set the maximum of variable ivar from the training sample that pass/end up in this node ...
virtual Bool_t GoesRight(const Event &) const
test event if it decends the tree at this node to the right
Double_t GetNBValidation() const
Node * GetNode(ULong_t sequence, UInt_t depth)
retrieve node from the tree.
void IncrementNSigEvents(Float_t s)
void SetNodeType(Int_t t)
void ClearTree()
clear the tree nodes (their S/N, Nevents etc), just keep the structure of the tree ...
void SetAlphaMinSubtree(Double_t g)
Types::EAnalysisType fAnalysisType
static DecisionTree * CreateFromXML(void *node, UInt_t tmva_Version_Code=TMVA_VERSION_CODE)
re-create a new tree (decision tree or search tree) from XML
void SetNEvents_unboosted(Float_t nev)
UInt_t GetTotalTreeDepth() const
VariableInfo & GetVariableInfo(Int_t i)
void SetNSigEvents_unboosted(Float_t s)
void SetTerminal(Bool_t s=kTRUE)
RegressionVariance * fRegType
void SetNSigEvents(Float_t s)
UInt_t CountNodes(Node *n=NULL)
return the number of nodes in the tree. (make a new count –> takes time)
void SetNBkgEvents_unboosted(Float_t b)
SeparationBase * fSepType
void SetNBkgEvents_unweighted(Float_t b)
void IncrementNBkgEvents_unweighted()
Double_t PruneTree(const EventConstList *validationSample=NULL)
prune (get rid of internal nodes) the Decision tree to avoid overtraining serveral different pruning ...
Abstract ClassifierFactory template that handles arbitrary types.
Node * GetRightDaughter(Node *n)
get right daughter node current node "n"
Float_t GetResponse(void) const
Double_t GetNSValidation() const
virtual Double_t GetSeparationIndex(const Double_t &s, const Double_t &b)=0
Short_t Max(Short_t a, Short_t b)
Double_t GetOriginalWeight() const
UInt_t BuildTree(const EventConstList &eventSample, DecisionTreeNode *node=NULL)
building the decision tree by recursively calling the splitting of one (root-) node into two daughter...
virtual DecisionTreeNode * GetLeft() const
virtual DecisionTreeNode * GetRight() const
Node * GetLeftDaughter(Node *n)
get left daughter node current node "n"
void ClearNodeAndAllDaughters()
clear the nodes (their S/N, Nevents etc), just keep the structure of the tree
void SetSeparationIndex(Float_t sep)
virtual Int_t Poisson(Double_t mean)
Generates a random integer N according to a Poisson law.
Double_t Sqrt(Double_t x)
DecisionTree(void)
default constructor using the GiniIndex as separation criterion, no restrictions on minium number of ...
Double_t GetPruneStrength() const
double norm(double *x, double *p)
void SetNEvents_unweighted(Float_t nev)
void PruneNode(TMVA::DecisionTreeNode *node)
prune away the subtree below the node