ROOT  6.06/08
Reference Guide
KDEKernel.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Asen Christov
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate Data analysis *
6  * Package: TMVA *
7  * Class : TMVA::KDEKernel *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Implementation (see header for description) *
12  * *
13  * Authors (alphabetical): *
14  * Asen Christov <christov@physik.uni-freiburg.de> - Freiburg U., Germany *
15  * *
16  * Copyright (c) 2005: *
17  * CERN, Switzerland *
18  * MPI-K Heidelberg, Germany *
19  * Freiburg U., Germany *
20  * *
21  * Redistribution and use in source and binary forms, with or without *
22  * modification, are permitted according to the terms listed in LICENSE *
23  * (http://tmva.sourceforge.net/LICENSE) *
24  **********************************************************************************/
25 
26 #include "TH1.h"
27 #include "TH1F.h"
28 #include "TF1.h"
29 
30 // #if ROOT_VERSION_CODE >= 364802
31 // #ifndef ROOT_TMathBase
32 // #include "TMathBase.h"
33 // #endif
34 // #else
35 // #ifndef ROOT_TMath
36 #include "TMath.h"
37 // #endif
38 // #endif
39 
40 #include "TMVA/KDEKernel.h"
41 #include "TMVA/MsgLogger.h"
42 
44 
45 ////////////////////////////////////////////////////////////////////////////////
46 /// constructor
47 /// sanity check
48 
49 TMVA::KDEKernel::KDEKernel( EKernelIter kiter, const TH1 *hist, Float_t lower_edge, Float_t upper_edge,
50  EKernelBorder kborder, Float_t FineFactor )
51  : fSigma( 1. ),
52  fIter ( kiter ),
53  fLowerEdge (lower_edge ),
54  fUpperEdge (upper_edge),
55  fFineFactor ( FineFactor ),
56  fKernel_integ ( 0 ),
57  fKDEborder ( kborder ),
58  fLogger( new MsgLogger("KDEKernel") )
59 {
60  if (hist == NULL) {
61  Log() << kFATAL << "Called without valid histogram pointer (hist)!" << Endl;
62  }
63 
64  fHist = (TH1F*)hist->Clone();
65  fFirstIterHist = (TH1F*)hist->Clone();
66  fFirstIterHist->Reset(); // now it is empty but with the proper binning
67  fSigmaHist = (TH1F*)hist->Clone();
68  fSigmaHist->Reset(); // now fSigmaHist is empty but with the proper binning
69 
70  fHiddenIteration=false;
71 }
72 
73 ////////////////////////////////////////////////////////////////////////////////
74 /// destructor
75 
77 {
78  if (fHist != NULL) delete fHist;
79  if (fFirstIterHist != NULL) delete fFirstIterHist;
80  if (fSigmaHist != NULL) delete fSigmaHist;
81  if (fKernel_integ != NULL) delete fKernel_integ;
82  delete fLogger;
83 }
84 
85 ////////////////////////////////////////////////////////////////////////////////
86 /// when using Gaussian as Kernel function this is faster way to calculate the integrals
87 
89 {
90  if ( (par[1]<=0) || (x[0]>x[1])) return -1.;
91 
92  Float_t xs1=(x[0]-par[0])/par[1];
93  Float_t xs2=(x[1]-par[0])/par[1];
94 
95  if (xs1==0) {
96  if (xs2==0) return 0.;
97  if (xs2>0 ) return 0.5*TMath::Erf(xs2);
98  }
99  if (xs2==0) return 0.5*TMath::Erf(TMath::Abs(xs1));
100  if (xs1>0) return 0.5*(TMath::Erf(xs2)-TMath::Erf(xs1));
101  if (xs1<0) {
102  if (xs2>0 ) return 0.5*(TMath::Erf(xs2)+TMath::Erf(TMath::Abs(xs1)));
103  else return 0.5*(TMath::Erf(TMath::Abs(xs1))-TMath::Erf(TMath::Abs(xs2)));
104  }
105  return -1.;
106 }
107 
108 ////////////////////////////////////////////////////////////////////////////////
109 /// fIter == 1 ---> nonadaptive KDE
110 /// fIter == 2 ---> adaptive KDE
111 
113 {
114  if (ktype == kGauss) {
115 
116  // i.e. gauss kernel
117  //
118  // this is going to be done for both (nonadaptive KDE and adaptive KDE)
119  // for nonadaptive KDE this is the only = final thing to do
120  // for adaptive KDE this is going to be used in the first (hidden) iteration
121  fKernel_integ = new TF1("GaussIntegral",GaussIntegral,fLowerEdge,fUpperEdge,4);
122  fSigma = ( TMath::Sqrt(2.0)
123  *TMath::Power(4./3., 0.2)
124  *fHist->GetRMS()
125  *TMath::Power(fHist->Integral(), -0.2) );
126  // this formula for sigma is valid for Gaussian Kernel function (nonadaptive KDE).
127  // formula found in:
128  // Multivariate Density Estimation, Theory, Practice and Visualization D. W. SCOTT, 1992 New York, Wiley
129  if (fSigma <= 0 ) {
130  Log() << kFATAL << "<SetKernelType> KDE sigma has invalid value ( <=0 ) !" << Endl;
131  }
132  }
133 
134  if (fIter == kAdaptiveKDE) {
135 
136  // this is done only for adaptive KDE
137 
138  // fill a temporary histo using nonadaptive KDE
139  // this histo is identical with the final output when using only nonadaptive KDE
140  fHiddenIteration=true;
141 
142  Float_t histoLowEdge=fHist->GetBinLowEdge(1);
143  Float_t histoUpperEdge=fHist->GetBinLowEdge(fHist->GetNbinsX()+1);
144 
145  for (Int_t i=1;i<fHist->GetNbinsX();i++) {
146  // loop over the bins of the original histo
147  for (Int_t j=1;j<fFirstIterHist->GetNbinsX();j++) {
148  // loop over the bins of the PDF histo and fill it
152  fHist->GetBinCenter(i),
153  i)
154  );
155  }
156  if (fKDEborder == 3) { // mirror the saples and fill them again
157  // in order to save time do the mirroring only for the first (the lowwer) 1/5 of the histo to the left;
158  // and the last (the higher) 1/5 of the histo to the right.
159  // the middle part of the histo, which is not mirrored, has no influence on the border effects anyway ...
160  if (i < fHist->GetNbinsX()/5 ) { // the first (the lowwer) 1/5 of the histo
161  for (Int_t j=1;j<fFirstIterHist->GetNbinsX();j++) {
162  // loop over the bins of the PDF histo and fill it
166  2*histoLowEdge-fHist->GetBinCenter(i), // mirroring to the left
167  i)
168  );
169  }
170  }
171  if (i > 4*fHist->GetNbinsX()/5) { // the last (the higher) 1/5 of the histo
172  for (Int_t j=1;j<fFirstIterHist->GetNbinsX();j++) {
173  // loop over the bins of the PDF histo and fill it
177  2*histoUpperEdge-fHist->GetBinCenter(i), // mirroring to the right
178  i)
179  );
180  }
181  }
182  }
183  }
184 
185  fFirstIterHist->SetEntries(fHist->GetEntries()); //set the number of entries to be the same as the original histo
186 
187  // do "function like" integration = sum of (bin_width*bin_content):
188  Float_t integ=0;
189  for (Int_t j=1;j<fFirstIterHist->GetNbinsX();j++)
191  fFirstIterHist->Scale(1./integ);
192 
193  fHiddenIteration=false;
194 
195  // OK, now we have the first iteration,
196  // next: calculate the Sigmas (Widths) for the second (adaptive) iteration
197  // based on the output of the first iteration
198  // these Sigmas will be stored in histo called fSigmaHist
199  for (Int_t j=1;j<fFirstIterHist->GetNbinsX();j++) {
200  // loop over the bins of the PDF histo and fill fSigmaHist
201  if (fSigma*TMath::Sqrt(1.0/fFirstIterHist->GetBinContent(j)) <= 0 ) {
202  Log() << kFATAL << "<SetKernelType> KDE sigma has invalid value ( <=0 ) !" << Endl;
203  }
204 
206  }
207  }
208 
209  if (fKernel_integ ==0 ) {
210  Log() << kFATAL << "KDE kernel not correctly initialized!" << Endl;
211  }
212 }
213 
214 ////////////////////////////////////////////////////////////////////////////////
215 /// calculates the integral of the Kernel
216 
218 {
219  if ((fIter == kNonadaptiveKDE) || fHiddenIteration )
220  fKernel_integ->SetParameters(mean,fSigma); // non adaptive KDE
221  else if ((fIter == kAdaptiveKDE) && !fHiddenIteration )
222  fKernel_integ->SetParameters(mean,fSigmaHist->GetBinContent(binnum)); // adaptive KDE
223 
224  if ( fKDEborder == 2 ) { // renormalization of the kernel function
225  Float_t renormFactor=1.0/fKernel_integ->Eval(fLowerEdge,fUpperEdge);
226  return (renormFactor*fKernel_integ->Eval(lowr,highr));
227  }
228 
229  // the RenormFactor takes care aboud the "border" effects, i.e. sets the
230  // integral to one inside the histogram borders
231  return (fKernel_integ->Eval(lowr,highr));
232 }
233 
double par[1]
Definition: unuranDistr.cxx:38
virtual void Scale(Double_t c1=1, Option_t *option="")
Multiply this histogram by a constant c1.
Definition: TH1.cxx:6180
Double_t GaussIntegral(Double_t *x, Double_t *par)
when using Gaussian as Kernel function this is faster way to calculate the integrals ...
Definition: KDEKernel.cxx:88
virtual void SetParameters(const Double_t *params)
Definition: TF1.h:439
virtual Double_t GetBinCenter(Int_t bin) const
return bin center for 1D historam Better to use h1.GetXaxis().GetBinCenter(bin)
Definition: TH1.cxx:8476
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:162
Float_t fLowerEdge
Definition: KDEKernel.h:79
float Float_t
Definition: RtypesCore.h:53
Float_t fUpperEdge
Definition: KDEKernel.h:80
virtual Double_t GetBinContent(Int_t bin) const
Return content of bin number bin.
Definition: TH1.cxx:4635
virtual Double_t Integral(Option_t *option="") const
Return integral of bin contents.
Definition: TH1.cxx:7384
virtual void AddBinContent(Int_t bin)
Increment bin content by 1.
Definition: TH1.h:582
1-D histogram with a float per channel (see TH1 documentation)}
Definition: TH1.h:570
int Int_t
Definition: RtypesCore.h:41
MsgLogger & Log() const
Definition: KDEKernel.h:91
Double_t GetRMS(Int_t axis=1) const
Definition: TH1.h:315
TH1F * fHist
Definition: KDEKernel.h:84
Float_t fFineFactor
Definition: KDEKernel.h:81
virtual Double_t GetBinLowEdge(Int_t bin) const
return bin lower edge for 1D historam Better to use h1.GetXaxis().GetBinLowEdge(bin) ...
Definition: TH1.cxx:8487
Short_t Abs(Short_t d)
Definition: TMathBase.h:110
LongDouble_t Power(LongDouble_t x, LongDouble_t y)
Definition: TMath.h:501
Bool_t fHiddenIteration
Definition: KDEKernel.h:87
Double_t x[n]
Definition: legend1.C:17
EKernelIter fIter
Definition: KDEKernel.h:78
EKernelBorder fKDEborder
Definition: KDEKernel.h:83
TH1F * fFirstIterHist
Definition: KDEKernel.h:85
Double_t Erf(Double_t x)
Computation of the error function erf(x).
Definition: TMath.cxx:187
virtual void SetBinContent(Int_t bin, Double_t content)
Set bin content see convention for numbering bins in TH1::GetBin In case the bin number is greater th...
Definition: TH1.cxx:8549
TH1F * fSigmaHist
Definition: KDEKernel.h:86
MsgLogger * fLogger
Definition: KDEKernel.h:90
Float_t fSigma
Definition: KDEKernel.h:77
virtual Double_t Eval(Double_t x, Double_t y=0, Double_t z=0, Double_t t=0) const
Evaluate this function.
Definition: TF1.cxx:1185
virtual Double_t GetBinWidth(Int_t bin) const
return bin width for 1D historam Better to use h1.GetXaxis().GetBinWidth(bin)
Definition: TH1.cxx:8498
#define ClassImp(name)
Definition: Rtypes.h:279
Float_t GetBinKernelIntegral(Float_t lowr, Float_t highr, Float_t mean, Int_t binnum)
calculates the integral of the Kernel
Definition: KDEKernel.cxx:217
double Double_t
Definition: RtypesCore.h:55
virtual ~KDEKernel(void)
destructor
Definition: KDEKernel.cxx:76
The TH1 histogram class.
Definition: TH1.h:80
virtual Double_t GetEntries() const
return the current number of entries
Definition: TH1.cxx:4057
virtual TObject * Clone(const char *newname="") const
Make a clone of an object using the Streamer facility.
Definition: TObject.cxx:203
Abstract ClassifierFactory template that handles arbitrary types.
1-Dim function class
Definition: TF1.h:149
#define NULL
Definition: Rtypes.h:82
virtual void SetEntries(Double_t n)
Definition: TH1.h:382
void SetKernelType(EKernelType ktype=kGauss)
fIter == 1 —> nonadaptive KDE fIter == 2 —> adaptive KDE
Definition: KDEKernel.cxx:112
virtual Int_t GetNbinsX() const
Definition: TH1.h:296
Double_t Sqrt(Double_t x)
Definition: TMath.h:464
Definition: math.cpp:60
TF1 * fKernel_integ
Definition: KDEKernel.h:82