73 , fGDPathStep ( 0.01 )
74 , fGDNPathSteps ( 1000 )
111 Log() <<
kFATAL <<
"RuleFitParams::Init() - MethodRuleFit ptr is null" <<
Endl;
159 Log() <<
kVERBOSE <<
"Path constr. - event index range = [ " <<
fPathIdx1 <<
", " << fPathIdx2 <<
" ]" 161 Log() <<
kVERBOSE <<
"Error estim. - event index range = [ " <<
fPerfIdx1 <<
", " << fPerfIdx2 <<
" ]" 180 fGDNtuple=
new TTree(
"MonitorNtuple_RuleFitParams",
"RuleFit path search");
202 std::vector<Double_t> &avsel,
203 std::vector<Double_t> &avrul )
205 UInt_t neve = ind2-ind1+1;
207 Log() <<
kFATAL <<
"<EvaluateAverage> - no events selected for path search -> BUG!" <<
Endl;
215 const std::vector<UInt_t> *eventRuleMap=0;
222 for (
UInt_t i=ind1; i<ind2+1; i++) {
232 nrules = (*eventRuleMap).size();
235 avrul[(*eventRuleMap)[
r]] += ew;
241 for (
UInt_t i=ind1; i<ind2+1; i++) {
259 avsel[sel] = avsel[sel] / sumew;
263 avrul[
r] = avrul[
r] / sumew;
309 UInt_t neve = ind2-ind1+1;
311 Log() <<
kFATAL <<
"<Risk> Invalid start/end indices! BUG!!!" <<
Endl;
316 for (
UInt_t i=ind1; i<ind2+1; i++) {
329 UInt_t neve = ind2-ind1+1;
331 Log() <<
kFATAL <<
"<Risk> Invalid start/end indices! BUG!!!" <<
Endl;
336 for (
UInt_t i=ind1; i<ind2+1; i++) {
351 Log() <<
kWARNING <<
"<Penalty> Using unverified code! Check!" <<
Endl;
448 Log() <<
kFATAL <<
"BUG! FindGDTau() has been called BEFORE InitGD()." <<
Endl;
450 Log() <<
kINFO <<
"Estimating the cutoff parameter tau. The estimated time is a pessimistic maximum." <<
Endl;
479 if ( (ip==0) || ((ip+1)%netst==0) ) {
486 doloop = ((ip<nscan) && (fGDNTauTstOK>3));
496 Log() <<
kERROR <<
"<FindGDTau> number of scanned loops is zero! Should NOT see this message." <<
Endl;
532 Log() <<
kINFO <<
"GD path scan - the scan stops when the max num. of steps is reached or a min is found" 569 std::vector<Double_t> coefsMin;
570 std::vector<Double_t> lincoefsMin;
585 std::vector<Double_t> valx;
586 std::vector<Double_t> valy;
587 std::vector<Double_t> valxy;
599 if (imod>100) imod=100;
610 Log() <<
kVERBOSE <<
"Obtained initial offset = " << offsetMin <<
Endl;
628 Int_t stopCondition=0;
635 if (isVerbose) t0 = clock();
638 tgradvec =
Double_t(clock()-t0)/CLOCKS_PER_SEC;
639 stgradvec += tgradvec;
643 if (isVerbose) t0 = clock();
646 tupgrade =
Double_t(clock()-t0)/CLOCKS_PER_SEC;
647 stupgrade += tupgrade;
651 docheck = ((iloop==0) ||((iloop+1)%imod==0));
668 trisk =
Double_t(clock()-t0)/CLOCKS_PER_SEC;
679 riskFlat=(nbadrisk>3);
683 Log() <<
kWARNING <<
"This may be OK if minimum is already found" <<
Endl;
693 if (isVerbose) t0 = clock();
706 tperf =
Double_t(clock()-t0)/CLOCKS_PER_SEC;
725 if (valx.size()==npreg) {
726 valx.erase(valx.begin());
727 valy.erase(valy.begin());
728 valxy.erase(valxy.begin());
741 <<
Form(
"%8d",iloop+1) <<
" " 743 <<
Form(
"%4.4f",riskPerf) <<
" " 761 if ( ((riskFlat) || (endOfLoop)) && (!found) ) {
765 else if (endOfLoop) {
769 Log() <<
kWARNING <<
"BUG TRAP: should not be here - still, this bug is harmless;)" <<
Endl;
782 Log() <<
kINFO <<
"----------------------------------------------------------------" <<
Endl;
783 Log() <<
kINFO <<
"Found minimum at step " << indMin+1 <<
" with error = " << errmin <<
Endl;
784 Log() <<
kINFO <<
"Reason for ending loop: ";
785 switch (stopCondition) {
787 Log() <<
kINFO <<
"clear minima found";
790 Log() <<
kINFO <<
"chaotic behaviour of risk";
793 Log() <<
kINFO <<
"end of loop reached";
800 Log() <<
kINFO <<
"----------------------------------------------------------------" <<
Endl;
805 Log() <<
kWARNING <<
"Check results and maybe decrease GDStep size" <<
Endl;
815 Log() <<
kINFO <<
"The error rate was still decreasing at the end of the path" <<
Endl;
816 Log() <<
kINFO <<
"Increase number of steps (GDNSteps)." <<
Endl;
827 Log() <<
kFATAL <<
"BUG TRAP: minimum not found in MakeGDPath()" <<
Endl;
834 Double_t stloop = strisk +stupgrade + stgradvec + stperf;
878 Log() <<
kWARNING <<
"<CalcFStar> Using unverified code! Check!" <<
Endl;
881 Log() <<
kFATAL <<
"<CalcFStar> Invalid start/end indices!" <<
Endl;
888 std::vector<Double_t> fstarSorted;
892 const Event& e = *(*events)[i];
894 fFstar.push_back(fstarVal);
895 fstarSorted.push_back(fstarVal);
899 std::sort( fstarSorted.begin(), fstarSorted.end() );
902 fFstarMedian = 0.5*(fstarSorted[ind]+fstarSorted[ind-1]);
919 Log() <<
kWARNING <<
"<Optimism> Using unverified code! Check!" <<
Endl;
922 Log() <<
kFATAL <<
"<Optimism> Invalid start/end indices!" <<
Endl;
937 const Event& e = *(*events)[i];
943 sumyhaty += w*yhat*
y;
948 Double_t cov = sumyhaty - sumyhat*sumy;
961 Log() <<
kWARNING <<
"<ErrorRateReg> Using unverified code! Check!" <<
Endl;
964 Log() <<
kFATAL <<
"<ErrorRateReg> Invalid start/end indices!" <<
Endl;
966 if (
fFstar.size()!=neve) {
967 Log() <<
kFATAL <<
"--- RuleFitParams::ErrorRateReg() - F* not initialized! BUG!!!" 968 <<
" Fstar.size() = " <<
fFstar.size() <<
" , N(events) = " << neve <<
Endl;
983 const Event& e = *(*events)[i];
992 return sumdf/sumdfmed;
1006 Log() <<
kWARNING <<
"<ErrorRateBin> Using unverified code! Check!" <<
Endl;
1009 Log() <<
kFATAL <<
"<ErrorRateBin> Invalid start/end indices!" <<
Endl;
1020 const Event& e = *(*events)[i];
1023 signF = (sF>0 ? +1:-1);
1036 std::vector<Double_t> & sFbkg )
1044 std::sort(sFsig.begin(), sFsig.end());
1045 std::sort(sFbkg.begin(), sFbkg.end());
1046 const Double_t minsig = sFsig.front();
1047 const Double_t minbkg = sFbkg.front();
1048 const Double_t maxsig = sFsig.back();
1049 const Double_t maxbkg = sFbkg.back();
1055 const Double_t df = (maxf-minf)/(np-1);
1060 std::vector<Double_t>::const_iterator indit;
1075 for (
Int_t i=0; i<np; i++) {
1077 indit = std::find_if( sFsig.begin(), sFsig.end(), std::bind2nd(std::greater_equal<Double_t>(), fcut));
1078 nesig = sFsig.end()-indit;
1081 indit = std::find_if( sFbkg.begin(), sFbkg.end(), std::bind2nd(std::greater_equal<Double_t>(), fcut));
1082 nrbkg = indit-sFbkg.begin();
1094 area += 0.5*(1+rejb)*effs;
1109 Log() <<
kWARNING <<
"<ErrorRateRoc> Should not be used in the current version! Check!" <<
Endl;
1112 Log() <<
kFATAL <<
"<ErrorRateRoc> Invalid start/end indices!" <<
Endl;
1119 std::vector<Double_t> sFsig;
1120 std::vector<Double_t> sFbkg;
1127 const Event& e = *(*events)[i];
1130 sFsig.push_back(sF);
1135 sFbkg.push_back(sF);
1140 fsigave = sumfsig/sFsig.size();
1141 fbkgave = sumfbkg/sFbkg.size();
1159 Log() <<
kWARNING <<
"<ErrorRateRocTst> Should not be used in the current version! Check!" <<
Endl;
1162 Log() <<
kFATAL <<
"<ErrorRateRocTst> Invalid start/end indices!" <<
Endl;
1170 std::vector< std::vector<Double_t> > sFsig;
1171 std::vector< std::vector<Double_t> > sFbkg;
1183 sFsig[itau].push_back(sF);
1186 sFbkg[itau].push_back(sF);
1209 Log() <<
kFATAL <<
"<ErrorRateRocTst> Invalid start/end indices!" <<
Endl;
1225 if (fGDErrTst[itau]>maxx) maxx=fGDErrTst[itau];
1226 if (fGDErrTst[itau]<minx) {
1227 minx=fGDErrTst[itau];
1267 Log() <<
kFATAL <<
"<MakeTstGradientVector> Invalid start/end indices!" <<
Endl;
1291 const std::vector<UInt_t> *eventRuleMap=0;
1298 const Event *e = (*events)[i];
1302 nrules = (*eventRuleMap).size();
1315 for (
UInt_t ir=0; ir<nrules; ir++) {
1316 rind = (*eventRuleMap)[ir];
1347 Double_t maxv = (maxr>maxl ? maxr:maxl);
1361 if (TMath::Abs(val)>=cthresh) {
1368 if (TMath::Abs(val)>=cthresh) {
1391 Log() <<
kFATAL <<
"<MakeGradientVector> Invalid start/end indices!" <<
Endl;
1411 const std::vector<UInt_t> *eventRuleMap=0;
1417 const Event *e = (*events)[i];
1426 nrules = (*eventRuleMap).size();
1431 for (
UInt_t ir=0; ir<nrules; ir++) {
1432 rind = (*eventRuleMap)[ir];
1458 Double_t maxv = (maxr>maxl ? maxr:maxl);
1466 useRThresh = cthresh;
1467 useLThresh = cthresh;
1540 Log() <<
kFATAL <<
"<CalcAverageTruth> Invalid start/end indices!" <<
Endl;
1553 Log() <<
kVERBOSE <<
"Effective number of signal / background = " << ensig <<
" / " << enbkg <<
Endl;
static Vc_ALWAYS_INLINE int_v min(const int_v &x, const int_v &y)
MsgLogger & Endl(MsgLogger &ml)
std::vector< std::vector< Double_t > > fGDCoefLinTst
std::vector< Double_t > fAverageSelectorPath
const std::vector< Double_t > & GetLinNorm() const
const std::vector< const TMVA::Event *> & GetTrainingEvents() const
Double_t EvalLinEventRaw(UInt_t vind, const Event &e, Bool_t norm) const
void MakeGradientVector()
make gradient vector
UInt_t GetNLinear() const
void EvaluateAveragePerf()
virtual Int_t Fill()
Fill all branches.
void FillCoefficients()
helper function to store the rule coefficients in local arrays
EMsgType GetMinType() const
const std::vector< UInt_t > & GetEventRuleMap(UInt_t evtidx) const
Double_t RiskPath() const
const std::vector< TMVA::Rule * > & GetRulesConst() const
std::vector< std::vector< Double_t > > fGradVecTst
void EvaluateAveragePath()
std::vector< Double_t > fAverageRulePath
Double_t GetGDValidEveFrac() const
Short_t Min(Short_t a, Short_t b)
std::vector< Double_t > fGDTauVec
std::vector< Double_t > fFstar
void ErrorRateRocTst()
Estimates the error rate with the current set of parameters.
const std::vector< Double_t > & GetLinCoefficients() const
Double_t GetEventLinearValNorm(UInt_t i) const
void SetLinCoefficients(const std::vector< Double_t > &v)
void MakeTstGradientVector()
make test gradient vector for all tau same algorithm as MakeGradientVector()
#define rprev(otri1, otri2)
std::vector< std::vector< Double_t > > fGDCoefTst
Int_t FindGDTau()
This finds the cutoff parameter tau by scanning several different paths.
Double_t GetEventRuleVal(UInt_t i) const
MsgLogger & Log() const
message logger
Double_t Risk(UInt_t ind1, UInt_t ind2, Double_t neff) const
risk asessment
Double_t FStar() const
We want to estimate F* = argmin Eyx( L(y,F(x) ), min wrt F(x) F(x) = FL(x) + FR(x) ...
Double_t CalcAverageTruth()
calulate the average truth
DataSetInfo & DataInfo() const
Double_t ErrorRateBin()
Estimates the error rate with the current set of parameters It uses a binary estimate of (y-F*(x)) (y...
void SetMinType(EMsgType minType)
RuleEnsemble * GetRuleEnsemblePtr()
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
void SetMsgType(EMsgType t)
TString GetElapsedTime(Bool_t Scientific=kTRUE)
Double_t LossFunction(const Event &e) const
Implementation of squared-error ramp loss function (eq 39,40 in ref 1) This is used for binary Classi...
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
Double_t GetGDPathEveFrac() const
std::vector< Char_t > fGDErrTstOK
Double_t ErrorRateReg()
Estimates the error rate with the current set of parameters This code is pretty messy at the moment...
Double_t ErrorRateRoc()
Estimates the error rate with the current set of parameters.
Double_t CoefficientRadius()
Calculates sqrt(Sum(a_i^2)), i=1..N (NOTE do not include a0)
std::vector< Double_t > fGDOfsTst
void SetOffset(Double_t v=0.0)
void ClearCoefficients(Double_t val=0)
const TMVA::Event * GetRuleMapEvent(UInt_t evtidx) const
Double_t Optimism()
implementation of eq.
char * Form(const char *fmt,...)
Double_t Penalty() const
This is the "lasso" penalty To be used for regression.
void ClearLinCoefficients(Double_t val=0)
Bool_t IsRuleMapOK() const
void SetLinCoefficient(UInt_t i, Double_t v)
void SetCoefficients(const std::vector< Double_t > &v)
set all rule coefficients
void InitNtuple()
initializes the ntuple
Double_t GetTrainingEventWeight(UInt_t i) const
void CalcFStar()
Estimates F* (optimum scoring function) for all events for the given sets.
virtual ~RuleFitParams()
destructor
std::vector< std::vector< Double_t > > fGradVecLinTst
static Vc_ALWAYS_INLINE int_v max(const int_v &x, const int_v &y)
void UpdateCoefficients()
Establish maximum gradient for rules, linear terms and the offset.
Double_t ErrorRateRocRaw(std::vector< Double_t > &sFsig, std::vector< Double_t > &sFbkg)
RuleFitParams()
constructor
virtual Int_t Branch(TCollection *list, Int_t bufsize=32000, Int_t splitlevel=99, const char *name="")
Create one branch for each element in the collection.
std::vector< Double_t > fGradVecLin
Double_t GetOffset() const
std::vector< Double_t > fGradVec
Double_t EvalLinEvent() const
Short_t Max(Short_t a, Short_t b)
void Init()
Initializes all parameters using the RuleEnsemble and the training tree.
Double_t CalcAverageResponse()
calulate the average response - TODO : rewrite bad dependancy on EvaluateAverage() ! ...
RuleEnsemble * fRuleEnsemble
std::vector< TMVA::Rule * > & GetRules()
Bool_t IsSignal(const Event *ev) const
void DrawProgressBar(Int_t, const TString &comment="")
draws progress bar in color or B&W caution:
void GetCoefficients(std::vector< Double_t > &v)
Retrieve all rule coefficients.
void InitGD()
Initialize GD path search.
A TTree object has a header with a name and a title.
Double_t EvalEvent() const
void UpdateTstCoefficients()
Establish maximum gradient for rules, linear terms and the offset for all taus TODO: do not need inde...
const MethodRuleFit * GetMethodRuleFit() const
UInt_t RiskPerfTst()
Estimates the error rate with the current set of parameters.
std::vector< Double_t > fGDErrTst
Double_t Sqrt(Double_t x)
void EvaluateAverage(UInt_t ind1, UInt_t ind2, std::vector< Double_t > &avsel, std::vector< Double_t > &avrul)
evaluate the average of each variable and f(x) in the given range
void MakeGDPath()
The following finds the gradient directed path in parameter space.
double norm(double *x, double *p)
Double_t RiskPerf() const
Int_t Type(const Event *e) const
void CalcTstAverageResponse()
calc average response for all test paths - TODO: see comment under CalcAverageResponse() note that 0 ...