AliRoot Core  ee782a0 (ee782a0)
AliNDFunctionInterface.cxx
Go to the documentation of this file.
1 /**************************************************************************
2  * Copyright(c) 1998-1999, ALICE Experiment at CERN, All rights reserved. *
3  * *
4  * Author: The ALICE Off-line Project. *
5  * Contributors are mentioned in the code where appropriate. *
6  * *
7  * Permission to use, copy, modify and distribute this software and its *
8  * documentation strictly for non-commercial purposes is hereby granted *
9  * without fee, provided that the above copyright notice appears in all *
10  * copies and that both the copyright notice and this permission notice *
11  * appear in the supporting documentation. The authors make no claims *
12  * about the suitability of this software for any purpose. It is *
13  * provided "as is" without express or implied warranty. *
14  **************************************************************************/
16 
19 /*
20  NDimensional function interpolation using THn and using TMVA reader interface
21  .L $AliRoot_SRC/STAT/Macros/AliNDFunctionInterface.cxx+
22 
23 */
24 
25 #include "TObjArray.h"
26 #include "TH1.h"
27 
28 #include "TGraph.h"
29 #include "TTreeStream.h"
30 #include "TVirtualFFT.h"
31 #include "TMath.h"
32 #include "TVector.h"
33 #include "TStatToolkit.h"
34 #include <stdio.h>
35 #include "TPRegexp.h"
36 #include "THn.h"
37 #include "TKey.h"
38 #include "TMVA/Factory.h"
39 #include "TMVA/Tools.h"
40 #include "TMVA/Reader.h"
41 #include "TMVA/DataLoader.h"
42 #include "TMVA/MethodBase.h"
43 #include "AliNDFunctionInterface.h"
44 
45 //Int_t AliNDFunctionInterface::fVerbose=0;
52 Double_t AliNDFunctionInterface::GetInterpolationLinear(THn * his, Double_t *xyz, Int_t verbose){
53  const Int_t kMaxDim=100;
54  Int_t nDim=his->GetNdimensions();
55  Double_t deltaXYZ[kMaxDim];
56  Double_t ddXYZ[kMaxDim];
57  Int_t bin0, index0[kMaxDim];
58  Double_t val0,value;
59  bin0=his->GetBin(xyz);
60  val0=his->GetBinContent(bin0,index0);
61  value=val0;
62  if (verbose==0) return val0;
63  for (Int_t i=0;i<nDim; i++) {
64  deltaXYZ[i] = (his->GetAxis(i)->GetBinCenter(index0[i]) - xyz[i])/his->GetAxis(i)->GetBinWidth(index0[i]);
65  Int_t dBin = (deltaXYZ[i] > 0) ? 1 : -1;
66  if (verbose>2) dBin*=-1;
67  index0[i] += dBin;
68  if (index0[i]<1) {index0[i]=1; dBin=0;};
69  if (index0[i]>=his->GetAxis(i)->GetNbins()) {index0[i]=his->GetAxis(i)->GetNbins(); dBin=0;};
70  ddXYZ[i] = -(his->GetBinContent(index0)-val0)*dBin;
71  index0[i] -= dBin;
72  value += ddXYZ[i]*deltaXYZ[i];
73  if (verbose>1){ printf("%d\t%f\t%f\t%f\n", i, val0, deltaXYZ[i], ddXYZ[i]); }
74  }
75  return value;
76 }
77 
85 Double_t AliNDFunctionInterface::GetDeltaInterpolationLinear(THn * his, Double_t *xyz, Int_t dIndex, Int_t verbose){
86  const Int_t kMaxDim=100;
87  Int_t nDim=his->GetNdimensions();
88  Double_t deltaXYZ[kMaxDim];
89  Double_t ddXYZ[kMaxDim];
90  Int_t bin0, index0[kMaxDim],index1[kMaxDim];
91  Double_t val0,value;
92  bin0=his->GetBin(xyz); // Get nearest bin
93  his->GetBinContent(bin0,index0); // get Nd index of closest bin
94  memcpy(index1,index0,nDim*sizeof(Int_t));
95  if (index1[dIndex]<=1) index1[dIndex]=2;
96  if (index1[dIndex]>=his->GetAxis(dIndex)->GetNbins()-1) index1[dIndex]=his->GetAxis(dIndex)->GetNbins();
97  index1[dIndex]+=1;
98  Double_t derivative=his->GetBinContent(index1);
99  Double_t dx=his->GetAxis(dIndex)->GetBinWidth(index1[dIndex]);
100  index1[dIndex]-=2;
101  derivative-=his->GetBinContent(index1);
102  dx+=his->GetAxis(dIndex)->GetBinWidth(index1[dIndex]);
103  derivative/=dx;
104  value=derivative;
105  for (Int_t i=0;i<nDim; i++) {
106  deltaXYZ[i] = (his->GetAxis(i)->GetBinCenter(index0[i]) - xyz[i])/his->GetAxis(i)->GetBinWidth(index0[i]);
107  Int_t dBin = (deltaXYZ[i] > 0) ? 1 : -1;
108  if (verbose>2) dBin*=-1;
109  index0[i] += dBin;
110  if (index0[i]<1) {index0[i]=1; dBin=0;};
111  if (index0[i]>=his->GetAxis(i)->GetNbins()) {index0[i]=his->GetAxis(i)->GetNbins(); dBin=0;};
112  //
113  memcpy(index1,index0,nDim*sizeof(Int_t));
114  if (index1[dIndex]<=1) index1[dIndex]=2;
115  if (index1[dIndex]>=his->GetAxis(dIndex)->GetNbins()-1) index1[dIndex]=his->GetAxis(dIndex)->GetNbins();
116  //
117  index1[dIndex]+=1;
118  Double_t derivativeL=his->GetBinContent(index1);
119  index1[dIndex]-=2;
120  derivativeL-=his->GetBinContent(index1);
121  derivativeL/=dx;
122  ddXYZ[i] = (derivativeL-derivative)*dBin;
123  index0[i] -= dBin;
124  value -= ddXYZ[i]*deltaXYZ[i];
125  if (verbose>1){ printf("%d\t%f\t%f\t%f\n", i, derivative, deltaXYZ[i], ddXYZ[i]); }
126  }
127  return value;
128 }
129 
157 Int_t AliNDFunctionInterface::FitMVA(TTree *tree, const char *varFit, TCut cut, const char * variables, const char *methods, const char *weights, Int_t index){
159  auto outputFile = TFile::Open("TMVA_RegressionOutput.root", "UPDATE");
160  TMVA::Factory factory("TMVARegression", outputFile, "!V:!Silent:Color:DrawProgressBar:AnalysisType=Regression" );
161  TString varNameFit=varFit;
162  if (index>=0) varNameFit=TString::Format("%s[%d]",varFit,index);
164  TMVA::DataLoader loader(TString::Format("dir_%s",varNameFit.Data()).Data());
165  // 2.) Add the feature variables from the variables list
166  TObjArray *variableList = TString(variables).Tokenize(":");
167  if (variableList->GetEntries()<=0){
168  ::Error("AliNDFunctionInterface::FitMVA","Empty variable list");
169  return -1;
170  }
171  for (Int_t i=0; i<variableList->GetEntries(); i++){
172  loader.AddVariable(variableList->At(i)->GetName());
174  }
175  delete variableList;
176  loader.AddTarget(varFit);
178  loader.AddRegressionTree(tree, 1.0); // link the TTree to the loader, weight for each event = 1
179  Int_t entries = tree->Draw("1",cut,"goff");
180  loader.PrepareTrainingAndTestTree(cut, TString::Format("nTrain_Regression=%d:nTest_Regression=%d:SplitMode=Random:NormMode=NumEvents:!V",entries/2,entries/2));
181  if (weights!=NULL) loader.SetWeightExpression(weights);
183  TObjArray *methodList = TString(methods).Tokenize(":");
184  if (methodList->GetEntries()<=0){
185  ::Error("AliNDFunctionInterface::FitMVA","Empty method list");
186  return -1;
187  }
188  for (Int_t i=0; i<methodList->GetEntries(); i++){
189  std::string methodName=methodList->At(i)->GetName();
190  printf("Booking method %s\t%d\t%s\n",methodName.data(), regressionMethodID[methodName], regressionMethodSetting[methodName].data());
191  factory.BookMethod(&loader,regressionMethodID[methodName], methodName.data(), regressionMethodSetting[methodName].data());
192  }
194  factory.TrainAllMethods();
196  for (Int_t i=0; i<methodList->GetEntries(); i++) {
197  std::string methodName = methodList->At(i)->GetName();
198  TString weightFile = TString::Format("dir_%s/weights/TMVARegression_%s.weights.xml", varNameFit.Data(),methodName.data());
199  printf("Weight file\t%s\n", weightFile.Data());
200  TObjString weight(gSystem->GetFromPipe(TString::Format("cat %s",weightFile.Data())));
201  outputFile->cd(TString::Format("dir_%s", varNameFit.Data()).Data());
202  weight.Write(methodName.data());
203  }
204  delete methodList;
205  // 7.) Evaluate all MVAs using the set of test events /// TODO - make an option
206  factory.TestAllMethods();
207  // 8.) Evaluate and compare performance of all configured MVAs /// TODO - make an option
208  factory.EvaluateAllMethods();
209  // Save the output
210  outputFile->Close();
211  return 0;
212 }
213 
214 // Load MVA regression object and register it in the map of available readers fr evaluation in TFormula
240 TMVA::MethodBase * AliNDFunctionInterface::LoadMVAReader(Int_t id/*=0*/, const char * inputFile/*="TMVA_RegressionOutput.root"*/, const char *method/*="MLP"*/, const char *dir/*="dir_resolutionMIP"*/){
241  TMVA::Reader *reader;
242  reader = new TMVA::Reader( "!Color:!Silent" );
243  auto fin = TFile::Open(inputFile);
244  if (fin==NULL){
245  ::Error("LoadMVAReader","Invalid input file\t%s", inputFile);
246  return NULL;
247  }
248  TDirectoryFile *dir0 = 0,*dirVar=0;
249  fin->GetObject(dir,dir0);
250  if (dir0==NULL) {
251  ::Error("LoadMVAReader","Invalid input directory\t%s\t%s", dir, inputFile);
252  return NULL;
253  }
254  dir0->GetObject("InputVariables_Id",dirVar);
255  for (Int_t i=0; i<dirVar->GetNkeys()-2; i++){
256  TString varHis = dirVar->GetListOfKeys()->At(i)->GetName();
257  TString var(varHis(0,varHis.First("__")));
258  printf("%s\n",var.Data());
259  Float_t value;
260  reader->AddVariable(var.Data(),&value);
261  }
263  FILE * pFile;
264  pFile = fopen ("weights.xml","w");
265  TObjString *methodWeights= NULL;
266 
267  if (dir0==NULL) {
268  ::Error("LoadMVAReader","Object %s not present in input directory\t%s/%s", method, inputFile,dir);
269  delete pFile;
270  return NULL;
271  }
272  dir0->GetObject(method,methodWeights);
273  if (methodWeights==NULL){
274  ::Error("LoadMVAReader","Object %s not present in input directory\t%s/%s", method, inputFile,dir);
275  delete pFile;
276  return NULL;
277  }
278  fprintf (pFile, "%s",methodWeights->GetName());
279  fclose (pFile);
280  reader->BookMVA( method, "weights.xml");
281  if (id>=0) AliNDFunctionInterface::readerMethodBase[id]=dynamic_cast<TMVA::MethodBase *>(reader->FindMVA(method));
282  return dynamic_cast<TMVA::MethodBase *>(reader->FindMVA(method));
283 }
284 
302 Int_t AliNDFunctionInterface::LoadMVAReaderArray(Int_t id, const char * inputFile, const char *methodMask, const char *dirMask){
303  auto fin = TFile::Open(inputFile);
304  if (fin==NULL){
305  ::Error("LoadMVAReader","Invalid input file\t%s", inputFile);
306  return 1;
307  }
308  TList * keyArrayDir = fin->GetListOfKeys();
309  TPRegexp regDir(dirMask);
310  TPRegexp regMask(methodMask);
311  for (Int_t iDir=0; iDir<keyArrayDir->GetEntries(); iDir++){
312  if (regDir.Match(keyArrayDir->At(iDir)->GetName())==0) continue;
313  printf("%s\n",keyArrayDir->At(iDir)->GetName());
314  if (fin->cd(keyArrayDir->At(iDir)->GetName()) != 0) {
315  TList *keyArrayMethod = fin->GetDirectory(keyArrayDir->At(iDir)->GetName())->GetListOfKeys();
316  for (Int_t iMethod = 0; iMethod < keyArrayDir->GetEntries(); iMethod++) {
317  TKey *key = (TKey *) keyArrayMethod->At(iMethod);
318  if (key == 0) continue;
319  if (regMask.Match(keyArrayMethod->At(iMethod)->GetName()) == 0) continue;
320  TString className = key->GetClassName();
321  if (!className.Contains("TObjString")) continue;
322  printf("%s/%s\n", keyArrayDir->At(iDir)->GetName(), keyArrayMethod->At(iMethod)->GetName());
323  TMVA::MethodBase *method = AliNDFunctionInterface::LoadMVAReader(-1, inputFile, keyArrayMethod->At(iMethod)->GetName(), keyArrayDir->At(iDir)->GetName());
325  }
326  }
327  }
328  return 0;
329 }
330 
331 
337 Int_t AliNDFunctionInterface::AppendMethodToArray(Int_t index, TMVA::MethodBase * method){
339  if (array==NULL){
340  array=new TObjArray(100);
342  }
343  array->AddLast(method);
344  return 0;
345 };
346 
357 Double_t AliNDFunctionInterface::EvalMVAStatArray(int id, int statType, vector<float> point){
359  if (array==NULL) return 0;
360  Int_t size = array->GetEntries();
361  if (size<=0) return 0;
362  TMVA::Event event = TMVA::Event(point,point.size());
363  vector<float> value(size);
364  for (Int_t i=0;i<size; i++){
365  TMVA::MethodBase * method= (TMVA::MethodBase *)array->UncheckedAt(i);
366  if (method==NULL) continue;
367  value[i]=method->GetRegressionValues(&event)[0];
368  }
369  if (statType==0) return TMath::Mean(value.size(),value.data());
370  if (statType==1) return TMath::Median(value.size(),value.data());
371  if (statType==2) return TMath::RMS(value.size(),value.data());
372  return 0;
373 }
374 
377  AliNDFunctionInterface::registerMethod("BDTRF25_8","!H:!V:NTrees=25:Shrinkage=0.1:UseRandomisedTrees:nCuts=20:MaxDepth=8",TMVA::Types::kBDT);
378  AliNDFunctionInterface::registerMethod( "BDTRF12_16", "!H:!V:NTrees=12:Shrinkage=0.1:UseRandomisedTrees:nCuts=20:MaxDepth=16", TMVA::Types::kBDT);
379  AliNDFunctionInterface::registerMethod("KNN","nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim", TMVA::Types::kKNN);
380  AliNDFunctionInterface::registerMethod("MLP", "!H:!V:VarTransform=Norm:NeuronType=tanh:NCycles=20000:HiddenLayers=N+20:TestRate=6:TrainingMethod=BFGS:Sampling=0.3:SamplingEpoch=0.8:ConvergenceImprove=1e-6:ConvergenceTests=15:!UseRegulator",TMVA::Types::kMLP);
381 }
map< int, TMVA::MethodBase * > readerMethodBase
TMVA interface.
printf("Chi2/npoints = %f\n", TMath::Sqrt(chi2/npoints))
map< std::string, TMVA::Types::EMVA > regressionMethodID
map of registered TMVA regression methods
TFile * Open(const char *filename, Long64_t &nevents)
Int_t AppendMethodToArray(Int_t index, TMVA::MethodBase *method)
Register TMVA method to the array at index Not assumed to be used by users.
#define TObjArray
Double_t GetDeltaInterpolationLinear(THn *his, Double_t *xyz, Int_t dIndex, Int_t verbose)
Linear interpolation of the numerical derivative dV/dx (V(i+1)-V(i-1))/(2(delta) ...
Double_t GetInterpolationLinear(THn *his, Double_t *xyz, Int_t verbose)
Linear interpolation of the bin content.
TObjArray * array
Definition: AnalyzeLaser.C:12
TTree * tree
map< std::string, std::string > regressionMethodSetting
map of registered array of TMVA::MethodBase - used to define TMVA statistics (Mean, Median, RMS, quantiles)
Int_t LoadMVAReaderArray(Int_t id, const char *inputFile, const char *methodMask, const char *dirMask)
TMVA::MethodBase * LoadMVAReader(Int_t id, const char *inputFile, const char *method, const char *dir)
MVA regression.
void registerMethod(std::string method, std::string content, TMVA::Types::EMVA id)
example registering default methods ()
map< int, TObjArray * > readerMethodBaseArray
map of registered TMVA::MethodBase
TCut cut
Definition: MakeGlobalFit.C:75
void registerDefaultMVAMethods()
map of registered TMVA regression methods
Double_t EvalMVAStatArray(int id, int statType, vector< float > point)
Append method into array of methods - used e.g for bootstrap statistics.
Int_t FitMVA(TTree *tree, const char *varFit, TCut cut, const char *variableList, const char *methodList, const char *weights=NULL, Int_t index=-1)