52 EMsgType minType = kINFO ) :
53 fMethodRuleFit(rfbase),
60 SetRFWorkDir(rfbase->GetRFWorkDir());
62 SetRFWorkDir(
"./rulefit");
82 <<
"---------------------------------------------------------------------------\n" 83 <<
"- You are running the interface to Jerome Friedmans RuleFit(tm) code. -\n" 84 <<
"- For a full manual see the following web page: -\n" 86 <<
"- http://www-stat.stanford.edu/~jhf/R-RuleFit.html -\n" 88 <<
"---------------------------------------------------------------------------" 98 <<
"------------------------ RULEFIT-JF INTERFACE SETUP -----------------------\n" 100 <<
"1. Create a rulefit directory in your current work directory:\n" 102 <<
" the directory may be set using the option RuleFitDir\n" 104 <<
"2. Copy (or make a link) the file rf_go.exe into this directory\n" 106 <<
"The file can be obtained from Jerome Friedmans homepage (linux):\n" 107 <<
" wget http://www-stat.stanford.edu/~jhf/r-rulefit/linux/rf_go.exe\n" 109 <<
"Don't forget to do:\n" 110 <<
" chmod +x rf_go.exe\n" 112 <<
"For Windows download:\n" 113 <<
" http://www-stat.stanford.edu/~jhf/r-rulefit/windows/rf_go.exe\n" 115 <<
"NOTE: other platforms are not supported (see Friedmans homepage)\n" 117 <<
"---------------------------------------------------------------------------\n" 174 fLogger << kFATAL <<
"Setup failed - aborting!" <<
Endl;
177 FILE *
f = fopen(
"rf_go.exe",
"r");
181 fLogger << kFATAL <<
"Setup failed - aborting!" <<
Endl;
317 program =
"rulefit_pred";
351 fLogger << kWARNING <<
"WriteRfOut is not yet implemented" <<
Endl;
360 fLogger << kWARNING <<
"WriteRfStatus is not yet implemented" <<
Endl;
369 fLogger << kWARNING <<
"WriteRuleFitMod is not yet implemented" <<
Endl;
378 fLogger << kWARNING <<
"WriteRuleFitSum is not yet implemented" <<
Endl;
468 fLogger << kWARNING <<
"WriteVarImp is not yet implemented" <<
Endl;
477 fLogger << kWARNING <<
"WriteYhat is not yet implemented" <<
Endl;
493 neve =
static_cast<Int_t>(xval);
495 fLogger << kWARNING <<
"Inconsistent size of yhat file and test tree!" <<
Endl;
522 for (
UInt_t ivar=0; ivar<nvars; ivar++) {
527 if (xval>xmax) xmax=xval;
535 for (
UInt_t ivar=0; ivar<nvars; ivar++) {
550 fLogger << kVERBOSE <<
"Reading RuleFit summary file" <<
Endl;
592 norules = (nrules==1);
594 norules = norules && (dumI==1);
596 norules = norules && (dumI==1);
598 norules = norules && (dumI==0);
599 if (nrules==0) norules=
kTRUE;
600 if (norules) nrules = 0;
606 fLogger << kDEBUG <<
"N(rules) = " << nrules <<
Endl;
607 fLogger << kDEBUG <<
"N(vars) = " << nvars <<
Endl;
608 fLogger << kDEBUG <<
"N(varsO) = " << nvarsOpt <<
Endl;
610 fLogger << kDEBUG <<
"offset = " << offset <<
Endl;
611 if (nvars!=nvarsOpt) {
612 fLogger << kWARNING <<
"Format of rulefit.sum is ... weird?? Continuing but who knows how it will end...?" <<
Endl;
614 std::vector<Double_t> rfSupp;
615 std::vector<Double_t> rfCoef;
616 std::vector<Int_t> rfNcut;
617 std::vector<Rule *> rfRules;
621 for (
Int_t t=0; t<8; t++) {
639 rfSupp.push_back(dumF);
641 rfCoef.push_back(dumF);
643 rfNcut.push_back(static_cast<int>(dumF+0.5));
662 rfRules.push_back( rule );
680 if (imp>impref) impref = imp;
682 fLogger << kDEBUG <<
"Rule #" << r <<
" : " << nvars <<
Endl;
683 fLogger << kDEBUG <<
" support = " << rfSupp[
r] <<
Endl;
688 for (
Int_t c=0; c<rfNcut[
r]; c++) {
690 varind =
static_cast<Int_t>(dumF+0.5)-1;
720 std::vector<Int_t> varind;
721 std::vector<Double_t>
xmin;
722 std::vector<Double_t>
xmax;
723 std::vector<Double_t> average;
724 std::vector<Double_t> stdev;
725 std::vector<Double_t>
norm;
726 std::vector<Double_t> coeff;
728 for (
Int_t c=0; c<nvars; c++) {
730 varind.push_back(static_cast<Int_t>(dumF+0.5)-1);
732 xmin.push_back(static_cast<Double_t>(dumF));
734 xmax.push_back(static_cast<Double_t>(dumF));
736 average.push_back(static_cast<Double_t>(dumF));
738 stdev.push_back(static_cast<Double_t>(dumF));
742 coeff.push_back(dumF/nv);
745 fLogger << kDEBUG <<
" varind = " << varind.back() <<
Endl;
746 fLogger << kDEBUG <<
" xmin = " << xmin.back() <<
Endl;
747 fLogger << kDEBUG <<
" xmax = " << xmax.back() <<
Endl;
748 fLogger << kDEBUG <<
" average = " << average.back() <<
Endl;
749 fLogger << kDEBUG <<
" stdev = " << stdev.back() <<
Endl;
750 fLogger << kDEBUG <<
" coeff = " << coeff.back() <<
Endl;
760 if (imp>impref) impref=imp;
Bool_t WriteLx()
Save input variable mask.
Bool_t DoOnlyRules() const
Bool_t ReadVarImp()
read variable importance
void WelcomeMessage()
welcome message
void SetCoefficient(Double_t v)
J Friedman's RuleFit method.
Long64_t GetNTestEvents() const
void HowtoSetupRF()
howto message
void SetLinDP(const std::vector< Double_t > &xmax)
MsgLogger & Endl(MsgLogger &ml)
void SetSSBNeve(Double_t v)
void SetRuleCut(RuleCut *rc)
void SetCutMax(Int_t i, Double_t v)
J Friedman's RuleFit method.
void SetLinDM(const std::vector< Double_t > &xmin)
A class implementing various fits of rule ensembles.
virtual ~RuleFitAPI()
destructor
const TString & GetExpression() const
Bool_t WriteRfStatus()
written by rf_go.exe; write rulefit status
void SetRFWorkDir(const char *wdir)
set the directory containing rf_go.exe.
Double_t CalcLinNorm(Double_t stdev)
Bool_t DoOnlyLinear() const
Bool_t WriteAll()
write all files read by rf_go.exe
void FillIntParmsDef()
set default int params
Bool_t cd(const char *path)
Bool_t WriteTrain()
write training data, column wise
const MethodRuleFit * fMethodRuleFit
Bool_t WriteRfOut()
written by rf_go.exe; write rulefit output (rfout)
Bool_t WriteIntParms()
write int params file
Bool_t WriteRealVarImp()
write the minimum importance to be considered
void CleanupLinear()
cleanup linear model
Int_t GetRFNendnodes() const
void ImportSetup()
import setup from MethodRuleFit
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
UInt_t GetNVariables() const
void CheckRFWorkDir()
check if the rulefit work dir is properly setup.
Int_t GetGDNPathSteps() const
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
Implementation of a rule.
void SetAverageRuleSigma(Double_t v)
void SetLinCoefficients(const std::vector< Double_t > &v)
const char * Data() const
void SetImportanceRef(Double_t impref)
set reference importance
void SetCutMin(Int_t i, Double_t v)
std::vector< Float_t > fRFYhat
Bool_t WriteProgram()
write command to rf_go.exe
void FillRealParmsDef()
set default real params
Bool_t IsSignal(const Event *ev) const
Double_t GetGDPathStep() const
void SetCutDoMin(Int_t i, Bool_t v)
Bool_t ReadModelSum()
read model from rulefit.sum
void SetSelector(Int_t i, UInt_t s)
RuleEnsemble * GetRuleEnsemblePtr()
Bool_t WriteRuleFitMod()
written by rf_go.exe (NOTE:Format unknown!)
void SetTrainParms()
set the training parameters
const Event * GetTrainingEvent(Long64_t ievt) const
void CalcVarImportance()
Calculates variable importance using eq (35) in RuleFit paper by Friedman et.al.
Double_t GetGDErrScale() const
R__EXTERN TSystem * gSystem
A class describing a 'rule cut'.
Double_t GetImportance() const
Bool_t WriteInt(std::ofstream &f, const Int_t *v, Int_t n=1)
void SetOffset(Double_t v=0.0)
Double_t GetSigma() const
const Event * GetEvent() const
virtual Int_t Exec(const char *shellcmd)
Execute a command.
void SetTestParms()
set the test params
Bool_t WriteFloat(std::ofstream &f, const Float_t *v, Int_t n=1)
void SetImportanceRef(Double_t v)
void SetSupport(Double_t v)
void SetCurrentType(Types::ETreeType type) const
void SetNorm(Double_t norm)
Int_t ReadFloat(std::ifstream &f, Float_t *v, Int_t n=1) const
std::vector< Float_t > fRFVarImp
void SetCutDoMax(Int_t i, Bool_t v)
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
Bool_t WriteVarNames()
write variable names, ascii
Bool_t WriteTest()
Write test data.
Double_t GetLinQuantile() const
DataSetInfo & DataInfo() const
VariableInfo & GetVariableInfo(Int_t i)
Int_t ReadInt(std::ifstream &f, Int_t *v, Int_t n=1) const
Bool_t OpenRFile(TString name, std::ofstream &f)
void SetRules(const std::vector< TMVA::Rule * > &rules)
set rules
Abstract ClassifierFactory template that handles arbitrary types.
Double_t GetTreeEveFrac() const
Bool_t WriteYhat()
written by rf_go.exe
Bool_t WriteRuleFitSum()
written by rf_go.exe (NOTE: format unknown!)
Bool_t ReadYhat()
read the score
Int_t RunRuleFit()
execute rf_go.exe
void SetLinNorm(const std::vector< Double_t > &norm)
Bool_t WriteRealParms()
write int params file
Long64_t GetNTrainingEvents() const
Int_t GetRFNrules() const
Double_t CalcLinImportance()
calculate the linear importance for each rule
std::vector< Int_t > fRFVarImpInd
double norm(double *x, double *p)
void InitRuleFit()
default initialisation SetRFWorkDir("./rulefit");