64 using std::stringstream;
78 :
MethodBase( jobName,
Types::kFDA, methodTitle, theData, theOption, theTargetDir ),
83 fConvergerFitter( 0 ),
84 fSumOfWeightsSig( 0 ),
85 fSumOfWeightsBkg( 0 ),
87 fOutputDimensions( 0 )
102 fConvergerFitter( 0 ),
103 fSumOfWeightsSig( 0 ),
104 fSumOfWeightsBkg( 0 ),
106 fOutputDimensions( 0 )
182 for (
Int_t ipar=fNPars; ipar<1000; ipar++) {
185 <<
"<CreateFormula> Formula contains expression: \"" <<
Form(
"(%i)",ipar) <<
"\", " 186 <<
"which cannot be attributed to a parameter; " 187 <<
"it may be that the number of variable ranges given via \"ParRanges\" " 188 <<
"does not match the number of parameters in the formula expression, please verify!" 201 <<
"<CreateFormula> Formula contains expression: \"" <<
Form(
"x%i",ivar) <<
"\", " 202 <<
"which cannot be attributed to an input variable" <<
Endl;
207 Log() <<
"Creating and compiling formula" <<
Endl;
215 Log() <<
kFATAL <<
"<ProcessOptions> Formula expression could not be properly compiled" <<
Endl;
219 Log() <<
kFATAL <<
"<ProcessOptions> Dubious number of parameters in formula expression: " 237 Log() <<
kFATAL <<
"<ProcessOptions> Mismatch in parameter string: " 238 <<
"the number of parameters: " <<
fNPars <<
" != ranges defined: " 239 << parList->
GetSize() <<
"; the format of the \"ParRanges\" string " 240 <<
"must be: \"(-1.2,3.4);(-2.3,4.55);...\", " 241 <<
"where the numbers in \"(a,b)\" correspond to the a=min, b=max parameter ranges; " 242 <<
"each parameter defined in the function string must have a corresponding rang." 256 stringstream stmin;
Float_t pmin=0; stmin << pminS.
Data(); stmin >> pmin;
257 stringstream stmax;
Float_t pmax=0; stmax << pmaxS.
Data(); stmax >> pmax;
260 if (
TMath::Abs(pmax-pmin) < 1.e-30) pmax = pmin;
261 if (pmin > pmax)
Log() <<
kFATAL <<
"<ProcessOptions> max > min in interval for parameter: [" 262 << ipar <<
"] : [" << pmin <<
", " << pmax <<
"] " <<
Endl;
264 Log() <<
kINFO <<
"Create parameter interval for parameter " << ipar <<
" : [" << pmin <<
"," << pmax <<
"]" <<
Endl;
374 Log() <<
kFATAL <<
"<Train> Troubles in sum of weights: " 379 Log() <<
kFATAL <<
"<Train> Troubles in sum of weights: " 385 for (std::vector<Interval*>::const_iterator parIt =
fParRange.begin(); parIt !=
fParRange.end(); parIt++) {
386 fBestPars.push_back( (*parIt)->GetMean() );
409 Log() <<
"Results for parameter fit using \"" << fitter <<
"\" fitter:" <<
Endl;
410 std::vector<TString> parNames;
411 for (
UInt_t ipar=0; ipar<pars.size(); ipar++) parNames.push_back(
Form(
"Par(%i)",ipar ) );
414 Log() <<
"Value of estimator at minimum: " << estimator <<
Endl;
440 estimator[2] += deviation * ev->
GetWeight();
443 estimator[2] /= sumOfWeights[2];
458 crossEntropy += t*
log(y);
460 estimator[2] += ev->
GetWeight()*crossEntropy;
462 estimator[2] /= sumOfWeights[2];
476 estimator[0] /= sumOfWeights[0];
477 estimator[1] /= sumOfWeights[1];
479 return estimator[0] + estimator[1];
490 for( std::vector<Double_t>::iterator it = parBegin; it != parEnd; ++it ){
545 std::vector<Float_t> temp;
553 for(
UInt_t iClass=0; iClass<nClasses; iClass++){
555 for(
UInt_t j=0;j<nClasses;j++){
557 norm+=
exp(temp[j]-temp[iClass]);
559 (*fMulticlassReturnVal).push_back(1.0/(1.0+norm));
584 values.push_back( value );
634 if(
gTools().HasAttr( wghtnode,
"NDim")) {
652 if (ipar >=
fNPars*fOutputDimensions)
Log() <<
kFATAL <<
"<ReadWeightsFromXML> index out of range: " 671 fout <<
" double fParameter[" <<
fNPars <<
"];" << std::endl;
672 fout <<
"};" << std::endl;
673 fout <<
"" << std::endl;
674 fout <<
"inline void " << className <<
"::Initialize() " << std::endl;
675 fout <<
"{" << std::endl;
677 fout <<
" fParameter[" << ipar <<
"] = " <<
fBestPars[ipar] <<
";" << std::endl;
679 fout <<
"}" << std::endl;
681 fout <<
"inline double " << className <<
"::GetMvaValue__( const std::vector<double>& inputValues ) const" << std::endl;
682 fout <<
"{" << std::endl;
683 fout <<
" // interpret the formula" << std::endl;
696 fout <<
" double retval = " << str <<
";" << std::endl;
698 fout <<
" return retval; " << std::endl;
699 fout <<
"}" << std::endl;
701 fout <<
"// Clean up" << std::endl;
702 fout <<
"inline void " << className <<
"::Clear() " << std::endl;
703 fout <<
"{" << std::endl;
704 fout <<
" // nothing to clear" << std::endl;
705 fout <<
"}" << std::endl;
719 Log() <<
"The function discriminant analysis (FDA) is a classifier suitable " <<
Endl;
720 Log() <<
"to solve linear or simple nonlinear discrimination problems." <<
Endl;
722 Log() <<
"The user provides the desired function with adjustable parameters" <<
Endl;
723 Log() <<
"via the configuration option string, and FDA fits the parameters to" <<
Endl;
724 Log() <<
"it, requiring the signal (background) function value to be as close" <<
Endl;
725 Log() <<
"as possible to 1 (0). Its advantage over the more involved and" <<
Endl;
726 Log() <<
"automatic nonlinear discriminators is the simplicity and transparency " <<
Endl;
727 Log() <<
"of the discrimination expression. A shortcoming is that FDA will" <<
Endl;
728 Log() <<
"underperform for involved problems with complicated, phase space" <<
Endl;
729 Log() <<
"dependent nonlinear correlations." <<
Endl;
731 Log() <<
"Please consult the Users Guide for the format of the formula string" <<
Endl;
732 Log() <<
"and the allowed parameter ranges:" <<
Endl;
733 if (
gConfig().WriteOptionsReference()) {
734 Log() <<
"<a href=\"http://tmva.sourceforge.net/docu/TMVAUsersGuide.pdf\">" 735 <<
"http://tmva.sourceforge.net/docu/TMVAUsersGuide.pdf</a>" <<
Endl;
737 else Log() <<
"http://tmva.sourceforge.net/docu/TMVAUsersGuide.pdf" <<
Endl;
741 Log() <<
"The FDA performance depends on the complexity and fidelity of the" <<
Endl;
742 Log() <<
"user-defined discriminator function. As a general rule, it should" <<
Endl;
743 Log() <<
"be able to reproduce the discrimination power of any linear" <<
Endl;
744 Log() <<
"discriminant analysis. To reach into the nonlinear domain, it is" <<
Endl;
745 Log() <<
"useful to inspect the correlation profiles of the input variables," <<
Endl;
746 Log() <<
"and add quadratic and higher polynomial terms between variables as" <<
Endl;
747 Log() <<
"necessary. Comparison with more involved nonlinear classifiers can" <<
Endl;
748 Log() <<
"be used as a guide." <<
Endl;
752 Log() <<
"Depending on the function used, the choice of \"FitMethod\" is" <<
Endl;
753 Log() <<
"crucial for getting valuable solutions with FDA. As a guideline it" <<
Endl;
754 Log() <<
"is recommended to start with \"FitMethod=MINUIT\". When more complex" <<
Endl;
755 Log() <<
"functions are used where MINUIT does not converge to reasonable" <<
Endl;
756 Log() <<
"results, the user should switch to non-gradient FitMethods such" <<
Endl;
757 Log() <<
"as GeneticAlgorithm (GA) or Monte Carlo (MC). It might prove to be" <<
Endl;
758 Log() <<
"useful to combine GA (or MC) with MINUIT by setting the option" <<
Endl;
759 Log() <<
"\"Converger=MINUIT\". GA (MC) will then set the starting parameters" <<
Endl;
760 Log() <<
"for MINUIT such that the basic quality of GA (MC) of finding global" <<
Endl;
761 Log() <<
"minima is combined with the efficacy of MINUIT of finding local" <<
Endl;
void Init(void)
default initialisation
Double_t fSumOfWeightsBkg
void DeclareOptions()
define the options (their key words) that can be set in the option string
MsgLogger & Endl(MsgLogger &ml)
void ClearAll()
delete and clear all class members
Collectable string class.
TString & ReplaceAll(const TString &s1, const TString &s2)
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
Double_t InterpretFormula(const Event *, std::vector< Double_t >::iterator begin, std::vector< Double_t >::iterator end)
formula interpretation
void CreateFormula()
translate formula string into TFormula, and parameter string into par ranges
TransformationHandler & GetTransformationHandler(Bool_t takeReroutedIfAvailable=true)
UInt_t GetNClasses() const
LongDouble_t Power(LongDouble_t x, LongDouble_t y)
std::vector< Double_t > fBestPars
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
returns MVA value for given event
Double_t Run()
estimator function interface for fitting
std::vector< Interval * > fParRange
const Event * GetEvent() const
DataSetInfo & DataInfo() const
void SetOptions(const TString &s)
Bool_t DoRegression() const
Ssiz_t First(char c) const
Find first occurrence of a character c.
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
void PrintResults(const TString &, std::vector< Double_t > &, const Double_t) const
display fit parameters check maximum length of variable name
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
FDA can handle classification with 2 classes and regression with one regression-target.
UInt_t GetNEvents() const
temporary event when testing on a different DataSet than the own one
Bool_t DoMulticlass() const
void GetHelpMessage() const
get help message text
Float_t GetTarget(UInt_t itgt) const
UInt_t GetNTargets() const
const char * GetName() const
Returns name of object.
void MakeClassSpecific(std::ostream &, const TString &) const
write FDA-specific classifier response
void CalculateMulticlassValues(const TMVA::Event *&evt, std::vector< Double_t > ¶meters, std::vector< Float_t > &values)
calculate the values for multiclass
char * Form(const char *fmt,...)
IFitterTarget * fConvergerFitter
void SetTarget(UInt_t itgt, Float_t value)
set the target value (dimension itgt) to value
virtual TObject * At(Int_t idx) const
Returns the object at position idx. Returns 0 if idx is out of range.
void ReadWeightsFromXML(void *wghtnode)
read coefficients from xml weight file
void Train(void)
FDA training.
void ProcessOptions()
the option string is decoded, for availabel options see "DeclareOptions"
void AddWeightsXMLTo(void *parent) const
create XML description for LD classification and regression (for arbitrary number of output classes/t...
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
Describe directory structure in memory.
std::vector< Float_t > * fMulticlassReturnVal
Double_t EstimatorFunction(std::vector< Double_t > &)
compute estimator for given parameter set (to be minimised) const Double_t sumOfWeights[] = { fSumOfW...
void ReadWeightsFromStream(std::istream &i)
read back the training results from a file (stream)
Bool_t Contains(const char *pat, ECaseCompare cmp=kExact) const
void AddPreDefVal(const T &)
MethodFDA(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="", TDirectory *theTargetDir=0)
standard constructor
const TString & GetOptions() const
Double_t fSumOfWeightsSig
#define REGISTER_METHOD(CLASS)
for example
Abstract ClassifierFactory template that handles arbitrary types.
virtual const std::vector< Float_t > & GetRegressionValues()
Int_t CountChar(Int_t c) const
Return number of times character c occurs in the string.
Bool_t IsSignal(const Event *ev) const
virtual const std::vector< Float_t > & GetMulticlassValues()
std::vector< Float_t > * fRegressionReturnVal
virtual Int_t GetSize() const
double norm(double *x, double *p)
void CheckForUnusedOptions() const
checks for unused options in option string
virtual ~MethodFDA(void)
destructor
void NoErrorCalc(Double_t *const err, Double_t *const errUpper)
const char * Data() const