ROOT  6.06/08
Reference Guide
PDEFoamDecisionTree.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Alexander Voigt
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Classes: PDEFoamDecisionTree *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Implementation of decision tree like PDEFoam *
12  * *
13  * Authors (alphabetical): *
14  * Tancredi Carli - CERN, Switzerland *
15  * Dominik Dannheim - CERN, Switzerland *
16  * S. Jadach - Institute of Nuclear Physics, Cracow, Poland *
17  * Alexander Voigt - TU Dresden, Germany *
18  * Peter Speckmayer - CERN, Switzerland *
19  * *
20  * Copyright (c) 2010: *
21  * CERN, Switzerland *
22  * MPI-K Heidelberg, Germany *
23  * *
24  * Redistribution and use in source and binary forms, with or without *
25  * modification, are permitted according to the terms listed in LICENSE *
26  * (http://tmva.sourceforge.net/LICENSE) *
27  **********************************************************************************/
28 
29 //_____________________________________________________________________
30 //
31 // PDEFoamDecisionTree
32 //
33 // This PDEFoam variant acts like a decision tree and stores in every
34 // cell the discriminant
35 //
36 // D = #events with given class / total number of events
37 //
38 // as well as the statistical error on the discriminant. It therefore
39 // acts as a discriminant estimator. The decision tree-like behaviour
40 // is achieved by overriding PDEFoamDiscriminant::Explore() to use a
41 // decision tree-like cell splitting algorithm (given a separation
42 // type).
43 //
44 // This PDEFoam variant should be booked together with the
45 // PDEFoamDecisionTreeDensity density estimator, which returns the
46 // events in a cell without sampling.
47 //
48 //_____________________________________________________________________
49 
50 #ifndef ROOT_TMVA_PDEFoamDecisionTree
52 #endif
53 #ifndef ROOT_TMVA_PDEFoamDecisionTreeDensity
55 #endif
56 
58 
59 ////////////////////////////////////////////////////////////////////////////////
60 /// Default constructor for streamer, user should not use it.
61 
64  , fSepType(NULL)
65 {
66 }
67 
68 ////////////////////////////////////////////////////////////////////////////////
69 /// Parameters:
70 ///
71 /// - name - name of the foam
72 ///
73 /// - sepType - separation type used for the cell splitting (will be
74 /// deleted in the destructor)
75 ///
76 /// - cls - class to consider as signal when calcualting the purity
77 
79  : PDEFoamDiscriminant(name, cls)
80  , fSepType(sepType)
81 {
82 }
83 
84 ////////////////////////////////////////////////////////////////////////////////
85 /// Copy Constructor NOT IMPLEMENTED (NEVER USED)
86 
88  : PDEFoamDiscriminant(from)
89  , fSepType(from.fSepType)
90 {
91  Log() << kFATAL << "COPY CONSTRUCTOR NOT IMPLEMENTED" << Endl;
92 }
93 
94 ////////////////////////////////////////////////////////////////////////////////
95 /// Destructor
96 /// deletes fSepType
97 
99 {
100  if (fSepType)
101  delete fSepType;
102 }
103 
104 ////////////////////////////////////////////////////////////////////////////////
105 /// Internal subprogram used by Create. It explores newly defined
106 /// cell with according to the decision tree logic. The separation
107 /// set via the 'sepType' option in the constructor.
108 ///
109 /// The optimal division point for eventual future cell division is
110 /// determined/recorded. Note that links to parents and initial
111 /// volume = 1/2 parent has to be already defined prior to calling
112 /// this routine.
113 ///
114 /// Note, that according to the decision tree logic, a cell is only
115 /// split, if the number of (unweighted) events in each dautghter
116 /// cell is greater than fNmin.
117 
119 {
120  if (!cell)
121  Log() << kFATAL << "<DTExplore> Null pointer given!" << Endl;
122 
123  // create edge histograms
124  std::vector<TH1D*> hsig, hbkg, hsig_unw, hbkg_unw;
125  hsig.reserve(fDim);
126  hbkg.reserve(fDim);
127  hsig_unw.reserve(fDim);
128  hbkg_unw.reserve(fDim);
129  for (Int_t idim = 0; idim < fDim; idim++) {
130  hsig.push_back(new TH1D(Form("hsig_%i", idim),
131  Form("signal[%i]", idim), fNBin, fXmin[idim], fXmax[idim]));
132  hbkg.push_back(new TH1D(Form("hbkg_%i", idim),
133  Form("background[%i]", idim), fNBin, fXmin[idim], fXmax[idim]));
134  hsig_unw.push_back(new TH1D(Form("hsig_unw_%i", idim),
135  Form("signal_unw[%i]", idim), fNBin, fXmin[idim], fXmax[idim]));
136  hbkg_unw.push_back(new TH1D(Form("hbkg_unw_%i", idim),
137  Form("background_unw[%i]", idim), fNBin, fXmin[idim], fXmax[idim]));
138  }
139 
140  // get cell position and size
141  PDEFoamVect cellSize(GetTotDim()), cellPosi(GetTotDim());
142  cell->GetHcub(cellPosi, cellSize);
143 
144  // determine lower and upper cell bound
145  std::vector<Double_t> lb(GetTotDim()); // lower bound
146  std::vector<Double_t> ub(GetTotDim()); // upper bound
147  for (Int_t idim = 0; idim < GetTotDim(); idim++) {
148  lb[idim] = VarTransformInvers(idim, cellPosi[idim] - std::numeric_limits<float>::epsilon());
149  ub[idim] = VarTransformInvers(idim, cellPosi[idim] + cellSize[idim] + std::numeric_limits<float>::epsilon());
150  }
151 
152  // fDistr must be of type PDEFoamDecisionTreeDensity*
154  if (distr == NULL)
155  Log() << kFATAL << "<PDEFoamDecisionTree::Explore>: cast failed: "
156  << "PDEFoamDensityBase* --> PDEFoamDecisionTreeDensity*" << Endl;
157 
158  // create TMVA::Volume object needed for searching within the BST
159  TMVA::Volume volume(&lb, &ub);
160 
161  // fill the signal and background histograms for the given volume
162  distr->FillHistograms(volume, hsig, hbkg, hsig_unw, hbkg_unw);
163 
164  // ------ determine the best division edge
165  Double_t xBest = 0.5; // best division point
166  Int_t kBest = -1; // best split dimension
167  Double_t maxGain = -1.0; // maximum gain
168  Double_t nTotS = hsig.at(0)->Integral(0, hsig.at(0)->GetNbinsX() + 1);
169  Double_t nTotB = hbkg.at(0)->Integral(0, hbkg.at(0)->GetNbinsX() + 1);
170  Double_t nTotS_unw = hsig_unw.at(0)->Integral(0, hsig_unw.at(0)->GetNbinsX() + 1);
171  Double_t nTotB_unw = hbkg_unw.at(0)->Integral(0, hbkg_unw.at(0)->GetNbinsX() + 1);
172 
173  for (Int_t idim = 0; idim < fDim; ++idim) {
174  Double_t nSelS = hsig.at(idim)->GetBinContent(0);
175  Double_t nSelB = hbkg.at(idim)->GetBinContent(0);
176  Double_t nSelS_unw = hsig_unw.at(idim)->GetBinContent(0);
177  Double_t nSelB_unw = hbkg_unw.at(idim)->GetBinContent(0);
178  for (Int_t jLo = 1; jLo < fNBin; jLo++) {
179  nSelS += hsig.at(idim)->GetBinContent(jLo);
180  nSelB += hbkg.at(idim)->GetBinContent(jLo);
181  nSelS_unw += hsig_unw.at(idim)->GetBinContent(jLo);
182  nSelB_unw += hbkg_unw.at(idim)->GetBinContent(jLo);
183 
184  // proceed if total number of events in left and right cell
185  // is greater than fNmin
186  if (!((nSelS_unw + nSelB_unw) >= GetNmin() &&
187  (nTotS_unw - nSelS_unw + nTotB_unw - nSelB_unw) >= GetNmin()))
188  continue;
189 
190  Double_t xLo = 1.0 * jLo / fNBin;
191 
192  // calculate separation gain
193  Double_t gain = fSepType->GetSeparationGain(nSelS, nSelB, nTotS, nTotB);
194 
195  if (gain >= maxGain) {
196  maxGain = gain;
197  xBest = xLo;
198  kBest = idim;
199  }
200  } // jLo
201  } // idim
202 
203  if (kBest >= fDim || kBest < 0) {
204  // No best division edge found! One must ensure, that this cell
205  // is not chosen for splitting in PeekMax(). But since in
206  // PeekMax() it is ensured that cell->GetDriv() > epsilon, one
207  // should set maxGain to -1.0 (or even 0.0?) here.
208  maxGain = -1.0;
209  }
210 
211  // set cell properties
212  cell->SetBest(kBest);
213  cell->SetXdiv(xBest);
214  if (nTotB + nTotS > 0)
215  cell->SetIntg(nTotS / (nTotB + nTotS));
216  else
217  cell->SetIntg(0.0);
218  cell->SetDriv(maxGain);
219  cell->CalcVolume();
220 
221  // set cell element 0 (total number of events in cell) during
222  // build-up
223  if (GetNmin() > 0)
224  SetCellElement(cell, 0, nTotS + nTotB);
225 
226  // clean up
227  for (UInt_t ih = 0; ih < hsig.size(); ih++) delete hsig.at(ih);
228  for (UInt_t ih = 0; ih < hbkg.size(); ih++) delete hbkg.at(ih);
229  for (UInt_t ih = 0; ih < hsig_unw.size(); ih++) delete hsig_unw.at(ih);
230  for (UInt_t ih = 0; ih < hbkg_unw.size(); ih++) delete hbkg_unw.at(ih);
231 }
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:162
Int_t fDim
Definition: PDEFoam.h:109
Double_t * fXmax
Definition: PDEFoam.h:132
Basic string class.
Definition: TString.h:137
int Int_t
Definition: RtypesCore.h:41
void SetCellElement(PDEFoamCell *cell, UInt_t i, Double_t value)
Set cell element i of cell to value.
Definition: PDEFoam.cxx:1442
void SetBest(Int_t Best)
Definition: PDEFoamCell.h:85
THist< 1, double > TH1D
Definition: THist.h:314
Int_t GetTotDim() const
Definition: PDEFoam.h:222
UInt_t GetNmin()
Definition: PDEFoam.h:231
PDEFoamDensityBase * fDistr
Definition: PDEFoam.h:140
Int_t fNBin
Definition: PDEFoam.h:112
virtual void FillHistograms(TMVA::Volume &, std::vector< TH1D *> &, std::vector< TH1D *> &, std::vector< TH1D *> &, std::vector< TH1D *> &)
Fill the given histograms with signal and background events, which are found in the volume...
Float_t VarTransformInvers(Int_t idim, Float_t x) const
Definition: PDEFoam.h:323
void SetXdiv(Double_t Xdiv)
Definition: PDEFoamCell.h:86
virtual void Explore(PDEFoamCell *Cell)
Internal subprogram used by Create.
PDEFoamDecisionTree()
Default constructor for streamer, user should not use it.
void CalcVolume()
Calculates volume of the cell using size params which are calculated.
unsigned int UInt_t
Definition: RtypesCore.h:42
char * Form(const char *fmt,...)
void SetDriv(Double_t Driv)
Definition: PDEFoamCell.h:95
MsgLogger & Log() const
Definition: PDEFoam.h:265
Double_t * fXmin
Definition: PDEFoam.h:131
REAL epsilon
Definition: triangle.c:617
virtual Double_t GetSeparationGain(const Double_t &nSelS, const Double_t &nSelB, const Double_t &nTotS, const Double_t &nTotB)
Separation Gain: the measure of how the quality of separation of the sample increases by splitting th...
#define ClassImp(name)
Definition: Rtypes.h:279
double Double_t
Definition: RtypesCore.h:55
#define name(a, b)
Definition: linkTestLib0.cpp:5
void GetHcub(PDEFoamVect &, PDEFoamVect &) const
Provides size and position of the cell These parameter are calculated by analyzing information in all...
void SetIntg(Double_t Intg)
Definition: PDEFoamCell.h:94
Abstract ClassifierFactory template that handles arbitrary types.
#define NULL
Definition: Rtypes.h:82
virtual ~PDEFoamDecisionTree()
Destructor deletes fSepType.