AliRoot Core  3dc7879 (3dc7879)
TStatToolkit.h
Go to the documentation of this file.
1 #ifndef TSTATTOOLKIT_H
2 #define TSTATTOOLKIT_H
3 /* Copyright(c) 1998-1999, ALICE Experiment at CERN, All rights reserved. *
4  * See cxx source for full Copyright notice */
5 /* $Id$ */
6 
11 
12 #include "TMath.h"
13 #include "Riostream.h"
14 #include "TH1F.h"
15 #include "TH2F.h"
16 #include "TH3.h"
17 #include "THnBase.h"
18 #include "TF1.h"
19 #include "TTree.h"
20 #include "TChain.h"
21 #include "TObjString.h"
22 #include "TLinearFitter.h"
23 #include "TGraph2D.h"
24 #include "TGraph.h"
25 #include "TGraphErrors.h"
26 #include "TMultiGraph.h"
27 #include "TCanvas.h"
28 #include "TLatex.h"
29 #include "TCut.h"
30 #include "THashList.h"
31 #include "TFitResultPtr.h"
32 #include "TFitResult.h"
33 #include "TPaletteAxis.h"
34 //
35 // includes necessary for test functions
36 //
37 #include "TSystem.h"
38 #include "TRandom.h"
39 #include "TStopwatch.h"
40 #include "TTreeStream.h"
41 
42 #include "TObject.h"
43 #include "TVectorD.h"
44 #include "TVectorF.h"
45 #include "TMatrixD.h"
46 #include "TMatrixF.h"
47 #include "TList.h"
48 #include <float.h>
49 //#include "TGraph2D.h"
50 //#include "TGraph.h"
51 class THashList;
52 
53 
54 namespace TStatToolkit {
56  enum ENormType {kL1, kL2, kLp, kMax, kHamming, kNNormType }; // http://en.wikipedia.org/w/index.php?title=Norm_(mathematics)&oldid=655824636
57  //
58  //
59  void EvaluateUni(Int_t nvectors, Double_t *data, Double_t &mean, Double_t &sigma, Int_t hh);
60  void EvaluateUniExternal(Int_t nvectors, Double_t *data, Double_t &mean, Double_t &sigma, Int_t hh, Float_t externalfactor=1);
61  Int_t Freq(Int_t n, const Int_t *inlist, Int_t *outlist, Bool_t down);
62  //
63  // HISTOGRAMS TOOLS
64  //
65  template <typename T>
66  void TruncatedMean(const TH1 * his, TVectorT<T> *param, Float_t down=0, Float_t up=1.0, Bool_t verbose=kFALSE);
67  void MedianFilter(TH1 * his1D, Int_t nmedian);
68 
69  template <typename T> Bool_t LTMHisto(TH1 * his, TVectorT<T> &param , Float_t fraction=1);
70  template <typename T> Int_t* LTMUnbinned(int np, const T *arr, TVectorT<T> &params , Float_t keep=1.0);
71 
72  template <typename T> void Reorder(int np, T *arr, const int *idx);
73  //
74  template <typename T>
75  void LTM(TH1 * his, TVectorT<T> *param=0 , Float_t fraction=1, Bool_t verbose=kFALSE);
76 
77  template <typename T>
78  Double_t FitGaus(TH1* his, TVectorT<T> *param=0, TMatrixT<T> *matrix=0, Float_t xmin=0, Float_t xmax=0, Bool_t verbose=kFALSE);
79 
80  template <typename T>
81  Double_t FitGaus(Float_t *arr, Int_t nBins, Float_t xMin, Float_t xMax, TVectorT<T> *param=0, TMatrixT<T> *matrix=0, Bool_t verbose=kFALSE);
82  Float_t GetCOG(const Short_t *arr, Int_t nBins, Float_t xMin, Float_t xMax, Float_t *rms=0, Float_t *sum=0);
83 
84  TGraph2D * MakeStat2D(TH3 * his, Int_t delta0, Int_t delta1, Int_t type);
85  TGraphErrors * MakeStat1D(TH2 * his, Int_t deltaBin, Double_t fraction, Int_t returnType, Int_t markerStyle, Int_t markerColor);
86 
87  Double_t RobustBinMedian(TH1* hist, Double_t fractionCut=1.);
88  //
89  // Graph tools
90  //
91  THashList *AddMetadata(TTree*, const char *vartagName,const char *varTagValue);
92  TNamed *GetMetadata(TTree* tree, const char *vartagName, TString *prefix=0, Bool_t fullMatch=kFALSE);
93  THashList * GetMetadata(TTree *tree);
94  TGraph * MakeGraphSparse(TTree * tree, const char * expr="Entry", const char * cut="1", Int_t mstyle=25, Int_t mcolor=1, Float_t msize=-1, Float_t offset=0.0);
95  TGraphErrors * MakeGraphErrors(TTree * tree, const char * expr="Entry", const char * cut="1", Int_t mstyle=25, Int_t mcolor=1, Float_t msize=-1, Float_t offset=0.0, Int_t entries=10000000, Int_t firstEntry=0);
96  TMultiGraph * MakeMultGraph(TTree * tree, const char *groupName, const char* expr, const char * cut, const char * markers, const char *colors, Bool_t drawSparse, Float_t msize, Float_t sigmaRange, TLegend * legend, Bool_t comp=kTRUE );
97  void RebinSparseGraph(TGraph * graph0, TGraph *graph1, Option_t * option="");
98  void RebinSparseMultiGraph(TMultiGraph *multiGraph, TGraph *graphRef);
99  void MakeMultiGraphSparse(TMultiGraph *multiGraph);
100  //
101  // Fitting function
102  //
103  TString* FitPlane(TTree * tree, const char* drawCommand, const char* formula, const char* cuts, Double_t & chi2, Int_t &npoints, TVectorD &fitParam, TMatrixD &covMatrix, Float_t frac=-1, Int_t start=0, Int_t stop=10000000, Bool_t fix0=kFALSE);
104  TString* FitPlaneFixed(TTree * tree, const char* drawCommand, const char* formula, const char* cuts, Double_t & chi2, Int_t &npoints, TVectorD &fitParam, TMatrixD &covMatrix, Float_t frac=-1, Int_t start=0, Int_t stop=10000000);
105  //
106  //Linear fitter helper function
107  //
108  TString* FitPlaneConstrain(TTree * tree, const char* drawCommand, const char* formula, const char* cuts, Double_t & chi2, Int_t &npoints, TVectorD &fitParam, TMatrixD &covMatrix, Float_t frac=-1, Int_t start=0, Int_t stop=10000000, Double_t constrain=-1);
109  Int_t GetFitIndex(const TString fString, const TString subString);
110  TString FilterFit(const TString &input, const TString filter, TVectorD &vec, TMatrixD &covar);
111  void Update1D(Double_t delta, Double_t sigma, Int_t s1, TMatrixD &param, TMatrixD &covar);
112  void Constrain1D(const TString &input, const TString filter, TVectorD &param, TMatrixD & covar, Double_t mean, Double_t sigma);
113  TString MakeFitString(const TString &input, const TVectorD &param, const TMatrixD & covar, Bool_t verbose=kFALSE);
114  //
115  // TTree function for the trending
116  //
117  Int_t MakeStatAlias(TTree * tree, const char * expr, const char * cut, const char * alias);
118  void MakeAnchorAlias(TTree * tree, TString& sTrendVars, Int_t doCheck, Int_t verbose);
119  void MakeCombinedAlias(TTree * tree, TString& sCombinedStatus, Bool_t doCheck, Int_t verbose );
120  Int_t SetStatusAlias(TTree * tree, const char * expr, const char * cut, const char * alias);
121 
122  TMultiGraph* MakeStatusMultGr(TTree * tree, const char * expr, const char * cut, const char * alias, Int_t igr=0);
123  void AddStatusPad(TCanvas* c1, Float_t padratio, Float_t bottommargin);
124  void DrawMultiGraph(TMultiGraph *graph, Option_t * option); //
125  void DrawStatusGraphs(TObjArray* oaMultGr);
126  TTree* WriteStatusToTree(TObject* oStatusGr);
127  TMultiGraph* MakeStatusLines(TTree * tree, const char * expr, const char * cut, const char * alias);
128  void MakeSummaryTree(TTree* treeIn, TTreeSRedirector *pcstream, TObjString& sumID, TCut &selection);
129  Double_t GetDefaultStat(TTree * tree, const char * var, const char * selection, TStatType statType);
130  //
131  //
132  void MakeDistortionMap(Int_t iter, THnBase * histo, TTreeSRedirector *pcstream, TMatrixD &projectionInfo, Int_t dumpHisto=100,Int_t verbose=kFALSE);
133  void MakeDistortionMapFast(THnBase * histo, TTreeSRedirector *pcstream, TMatrixD &projectionInfo, Int_t verbose=0, Double_t fractionCut=0.1, const char * estimators=0);
134 
135  //
136  // norm (distance) functions
137  //
138  void CombineArray(TTree *tree, TVectorD &values);
139  Double_t GetDistance(const TVectorD &values, const ENormType normType,
140  const Bool_t normaliseToEntries=kFALSE, const Double_t pvalue=1.);
141  Double_t GetDistance(const Int_t size, const Double_t *values, const ENormType normType,
142  const Bool_t normaliseToEntries=kFALSE, const Double_t pvalue=1.);
143  Double_t GetDistance(TTree * tree, const char * var, const char * selection,
144  const ENormType normType, const Bool_t normaliseToEntries=kFALSE, const Double_t pvalue=1.);
145  //
146  // TTree function for robust draw
147  //
148  TH1* DrawHistogram(TTree * tree, const char* drawCommand, const char* cuts = "1", const char* hname = "histo", const char* htitle = "histo", Int_t nsigma = 4, Float_t fraction = 0.75, TObjArray *description=0 );
149  Int_t AdaptHistoMetadata(TTree* tree, TH1 *histogram, TString option);
150  //
151  // TestFunctions:
152  //
153  void TestGausFit(Int_t nhistos=5000);
154  void CheckTreeAliases(TTree * tree, Int_t ncheck);
155  //
156  // min, max, mean ...
157  template <typename T>
158  void GetMinMax(const T* arr, Long64_t n, double &minVal, double &maxVal);
159  template <typename T>
160  void GetMinMaxMean(const T* arr, Long64_t n, double &minVal, double &maxVal, double &meanVal);
161 
162 };
163 
164 //___________________________________________________________
165 template <typename T>
166 void TStatToolkit::GetMinMax(const T* arr, Long64_t n, double &minVal, double &maxVal)
167 {
168  // find min, max entries in the array in a single loop
169  minVal = DBL_MAX;
170  maxVal = -DBL_MAX;
171  for (int i=n;i--;) {
172  double val = arr[i];
173  if (val<minVal) minVal = val;
174  if (val>maxVal) maxVal = val;
175  }
176 }
177 
178 //___________________________________________________________
179 template <typename T>
180 void TStatToolkit::GetMinMaxMean(const T* arr, Long64_t n, double &minVal, double &maxVal, double &meanVal)
181 {
182  // find min, max entries in the array in a single loop
183  minVal = DBL_MAX;
184  maxVal = -DBL_MAX;
185  meanVal = 0;
186  for (int i=n;i--;) {
187  double val = arr[i];
188  if (val<minVal) minVal = val;
189  if (val>maxVal) maxVal = val;
190  meanVal += val;
191  }
192  if (n) meanVal /= n;
193 }
194 
195 //___TStatToolkit__________________________________________________________________________
196 template <typename T>
197 void TStatToolkit::TruncatedMean(const TH1 * his, TVectorT<T> *param, Float_t down, Float_t up, Bool_t verbose){
198  //
199  //
200  //
201  Int_t nbins = his->GetNbinsX();
202  Float_t nentries = his->GetEntries();
203  Float_t sum =0;
204  Float_t mean = 0;
205  Float_t sigma2 = 0;
206  Float_t ncumul=0;
207  for (Int_t ibin=1;ibin<nbins; ibin++){
208  ncumul+= his->GetBinContent(ibin);
209  Float_t fraction = Float_t(ncumul)/Float_t(nentries);
210  if (fraction>down && fraction<up){
211  sum+=his->GetBinContent(ibin);
212  mean+=his->GetBinCenter(ibin)*his->GetBinContent(ibin);
213  sigma2+=his->GetBinCenter(ibin)*his->GetBinCenter(ibin)*his->GetBinContent(ibin);
214  }
215  }
216  mean/=sum;
217  sigma2= TMath::Sqrt(TMath::Abs(sigma2/sum-mean*mean));
218  if (param){
219  (*param)[0] = his->GetMaximum();
220  (*param)[1] = mean;
221  (*param)[2] = sigma2;
222 
223  }
224  if (verbose) printf("Mean\t%f\t Sigma2\t%f\n", mean,sigma2);
225 }
226 
227 template <typename T>
228 Bool_t TStatToolkit::LTMHisto(TH1 *his1D, TVectorT<T> &params , Float_t fraction){
229  //
230  // LTM : Trimmed mean on histogram - Modified version for binned data
231  //
232  // Robust statistic to estimate properties of the distribution
233  // To handle binning error special treatment
234  // for definition of unbinned data see:
235  // http://en.wikipedia.org/w/index.php?title=Trimmed_estimator&oldid=582847999
236  //
237  // Function parameters:
238  // his1D - input histogram
239  // params - vector with parameters
240  // - 0 - area
241  // - 1 - mean
242  // - 2 - rms
243  // - 3 - error estimate of mean
244  // - 4 - error estimate of RMS
245  // - 5 - first accepted bin position
246  // - 6 - last accepted bin position
247  //
248  Int_t nbins = his1D->GetNbinsX();
249  Int_t nentries = (Int_t)his1D->GetEntries();
250  const Double_t kEpsilon=0.0000000001;
251 
252  if (nentries<=0) return 0;
253  if (fraction>1) fraction=0;
254  if (fraction<0) return 0;
255  TVectorD vectorX(nbins);
256  TVectorD vectorMean(nbins);
257  TVectorD vectorRMS(nbins);
258  Double_t sumCont=0;
259  for (Int_t ibin0=1; ibin0<=nbins; ibin0++) sumCont+=his1D->GetBinContent(ibin0);
260  //
261  Double_t minRMS=his1D->GetRMS()*10000;
262  //Int_t maxBin=0;
263  //
264  for (Int_t ibin0=1; ibin0<nbins; ibin0++){
265  Double_t sum0=0, sum1=0, sum2=0;
266  Int_t ibin1=ibin0;
267  for ( ibin1=ibin0; ibin1<=nbins; ibin1++){
268  Double_t cont=his1D->GetBinContent(ibin1);
269  Double_t x= his1D->GetBinCenter(ibin1);
270  sum0+=cont;
271  sum1+=cont*x;
272  sum2+=cont*x*x;
273  if ( (ibin0!=ibin1) && sum0>=fraction*sumCont) break;
274  }
275  vectorX[ibin0]=his1D->GetBinCenter(ibin0);
276  if (sum0<fraction*sumCont) continue;
277  //
278  // substract fractions of bin0 and bin1 to keep sum0=fration*sumCont
279  //
280  Double_t diff = sum0-fraction*sumCont;
281  Double_t mean = (sum0>0) ? sum1/sum0:0;
282  //
283  Double_t x0=his1D->GetBinCenter(ibin0);
284  Double_t x1=his1D->GetBinCenter(ibin1);
285  Double_t y0=his1D->GetBinContent(ibin0);
286  Double_t y1=his1D->GetBinContent(ibin1);
287  //
288  Double_t d = y0+y1-diff; //enties to keep
289  Double_t w0=0,w1=0;
290  if (y0<=kEpsilon&&y1>kEpsilon){
291  w1=d/y1;
292  }
293  if (y1<=kEpsilon&&y0>kEpsilon){
294  w0=d/y0;
295  }
296  if (y0>kEpsilon && y1>kEpsilon && x1>x0 ){
297  w0 = (d*(x1-mean))/((x1-x0)*y0);
298  w1 = (d-y0*w0)/y1;
299  //
300  if (w0>1) {w1+=(w0-1)*y0/y1; w0=1;}
301  if (w1>1) {w0+=(w1-1)*y1/y0; w1=1;}
302  }
303  if ( (x1>x0) &&TMath::Abs(y0*w0+y1*w1-d)>kEpsilon*sum0){
304  printf(" TStatToolkit::LTMHisto error\n");
305  }
306  sum0-=y0+y1;
307  sum1-=y0*x0;
308  sum1-=y1*x1;
309  sum2-=y0*x0*x0;
310  sum2-=y1*x1*x1;
311  //
312  Double_t xx0=his1D->GetXaxis()->GetBinUpEdge(ibin0)-0.5*w0*his1D->GetBinWidth(ibin0);
313  Double_t xx1=his1D->GetXaxis()->GetBinLowEdge(ibin1)+0.5*w1*his1D->GetBinWidth(ibin1);
314  sum0+=y0*w0+y1*w1;
315  sum1+=y0*w0*xx0;
316  sum1+=y1*w1*xx1;
317  sum2+=y0*w0*xx0*xx0;
318  sum2+=y1*w1*xx1*xx1;
319 
320  //
321  // choose the bin with smallest rms
322  //
323  if (sum0>0){
324  vectorMean[ibin0]=sum1/sum0;
325  vectorRMS[ibin0]=TMath::Sqrt(TMath::Abs(sum2/sum0-vectorMean[ibin0]*vectorMean[ibin0]));
326  if (vectorRMS[ibin0]<minRMS){
327  minRMS=vectorRMS[ibin0];
328  params[0]=sum0;
329  params[1]=vectorMean[ibin0];
330  params[2]=vectorRMS[ibin0];
331  params[3]=vectorRMS[ibin0]/TMath::Sqrt(sumCont*fraction);
332  params[4]=0; // what is the formula for error of RMS???
333  params[5]=ibin0;
334  params[6]=ibin1;
335  params[7]=his1D->GetBinCenter(ibin0);
336  params[8]=his1D->GetBinCenter(ibin1);
337  //maxBin=ibin0;
338  }
339  }else{
340  break;
341  }
342  }
343  return kTRUE;
344 }
345 
346 template <typename T>
347 Double_t TStatToolkit::FitGaus(TH1* his, TVectorT<T> *param, TMatrixT<T> */*matrix*/, Float_t xmin, Float_t xmax, Bool_t verbose){
348  //
349  // Fit histogram with gaussian function
350  //
351  // Prameters:
352  // return value- chi2 - if negative ( not enough points)
353  // his - input histogram
354  // param - vector with parameters
355  // xmin, xmax - range to fit - if xmin=xmax=0 - the full histogram range used
356  // Fitting:
357  // 1. Step - make logarithm
358  // 2. Linear fit (parabola) - more robust - always converge
359  // 3. In case of small statistic bins are averaged
360  //
361  static TLinearFitter fitter(3,"pol2");
362  TVectorD par(3);
363  TVectorD sigma(3);
364  TMatrixD mat(3,3);
365  if (his->GetMaximum()<4) return -1;
366  if (his->GetEntries()<12) return -1;
367  if (his->GetRMS()<mat.GetTol()) return -1;
368  Float_t maxEstimate = his->GetEntries()*his->GetBinWidth(1)/TMath::Sqrt((TMath::TwoPi()*his->GetRMS()));
369  Int_t dsmooth = TMath::Nint(6./TMath::Sqrt(maxEstimate));
370 
371  if (maxEstimate<1) return -1;
372  Int_t nbins = his->GetNbinsX();
373  Int_t npoints=0;
374  //
375 
376 
377  if (xmin>=xmax){
378  xmin = his->GetXaxis()->GetXmin();
379  xmax = his->GetXaxis()->GetXmax();
380  }
381  for (Int_t iter=0; iter<2; iter++){
382  fitter.ClearPoints();
383  npoints=0;
384  for (Int_t ibin=1;ibin<nbins+1; ibin++){
385  Int_t countB=1;
386  Float_t entriesI = his->GetBinContent(ibin);
387  for (Int_t delta = -dsmooth; delta<=dsmooth; delta++){
388  if (ibin+delta>1 &&ibin+delta<nbins-1){
389  entriesI += his->GetBinContent(ibin+delta);
390  countB++;
391  }
392  }
393  entriesI/=countB;
394  Double_t xcenter= his->GetBinCenter(ibin);
395  if (xcenter<xmin || xcenter>xmax) continue;
396  Double_t error=1./TMath::Sqrt(countB);
397  Float_t cont=2;
398  if (iter>0){
399  if (par[0]+par[1]*xcenter+par[2]*xcenter*xcenter>20) return 0;
400  cont = TMath::Exp(par[0]+par[1]*xcenter+par[2]*xcenter*xcenter);
401  if (cont>1.) error = 1./TMath::Sqrt(cont*Float_t(countB));
402  }
403  if (entriesI>1&&cont>1){
404  fitter.AddPoint(&xcenter,TMath::Log(Float_t(entriesI)),error);
405  npoints++;
406  }
407  }
408  if (npoints>3){
409  fitter.Eval();
410  fitter.GetParameters(par);
411  }else{
412  break;
413  }
414  }
415  if (npoints<=3){
416  return -1;
417  }
418  fitter.GetParameters(par);
419  fitter.GetCovarianceMatrix(mat);
420  if (TMath::Abs(par[1])<mat.GetTol()) return -1;
421  if (TMath::Abs(par[2])<mat.GetTol()) return -1;
422  Double_t chi2 = fitter.GetChisquare()/Float_t(npoints);
423  //fitter.GetParameters();
424  if (!param) param = new TVectorT<T>(3);
425  // if (!matrix) matrix = new TMatrixD(3,3); // Covariance matrix to be implemented
426  (*param)[1] = par[1]/(-2.*par[2]);
427  (*param)[2] = 1./TMath::Sqrt(TMath::Abs(-2.*par[2]));
428  (*param)[0] = TMath::Exp(par[0]+ par[1]* (*param)[1] + par[2]*(*param)[1]*(*param)[1]);
429  if (verbose){
430  par.Print();
431  mat.Print();
432  param->Print();
433  printf("Chi2=%f\n",chi2);
434  TF1 * f1= new TF1("f1","[0]*exp(-(x-[1])^2/(2*[2]*[2]))",his->GetXaxis()->GetXmin(),his->GetXaxis()->GetXmax());
435  f1->SetParameter(0, (*param)[0]);
436  f1->SetParameter(1, (*param)[1]);
437  f1->SetParameter(2, (*param)[2]);
438  f1->Draw("same");
439  }
440  return chi2;
441 }
442 
443 template <typename T>
444 void TStatToolkit::LTM(TH1 * his, TVectorT<T> *param , Float_t fraction, Bool_t verbose){
445  //
446  // LTM : Trimmed mean on histogram - Modified version for binned data
447  //
448  // Robust statistic to estimate properties of the distribution
449  // See http://en.wikipedia.org/w/index.php?title=Trimmed_estimator&oldid=582847999
450  //
451  // New faster version is under preparation
452  //
453  if (!param) return;
454  (*param)[0]=0;
455  (*param)[1]=0;
456  (*param)[2]=0;
457  Int_t nbins = his->GetNbinsX();
458  Int_t nentries = (Int_t)his->GetEntries();
459  if (nentries<=0) return;
460  Double_t *data = new Double_t[nentries];
461  Int_t npoints=0;
462  for (Int_t ibin=1;ibin<nbins; ibin++){
463  Double_t entriesI = his->GetBinContent(ibin);
464  //Double_t xcenter= his->GetBinCenter(ibin);
465  Double_t x0 = his->GetXaxis()->GetBinLowEdge(ibin);
466  Double_t w = his->GetXaxis()->GetBinWidth(ibin);
467  for (Int_t ic=0; ic<entriesI; ic++){
468  if (npoints<nentries){
469  data[npoints]= x0+w*Double_t((ic+0.5)/entriesI);
470  npoints++;
471  }
472  }
473  }
474  Double_t mean, sigma;
475  Int_t npoints2=TMath::Min(Int_t(fraction*Float_t(npoints)),npoints-1);
476  npoints2=TMath::Max(Int_t(0.5*Float_t(npoints)),npoints2);
477  TStatToolkit::EvaluateUni(npoints, data, mean,sigma,npoints2);
478  delete [] data;
479  if (verbose) printf("Mean\t%f\t Sigma2\t%f\n", mean,sigma);
480  if (param){
481  (*param)[0] = his->GetMaximum();
482  (*param)[1] = mean;
483  (*param)[2] = sigma;
484  }
485 }
486 
487 
488 template <typename T>
489 Double_t TStatToolkit::FitGaus(Float_t *arr, Int_t nBins, Float_t xMin, Float_t xMax, TVectorT<T> *param, TMatrixT<T> */*matrix*/, Bool_t verbose){
490  //
491  // Fit histogram with gaussian function
492  //
493  // Prameters:
494  // nbins: size of the array and number of histogram bins
495  // xMin, xMax: histogram range
496  // param: paramters of the fit (0-Constant, 1-Mean, 2-Sigma)
497  // matrix: covariance matrix -- not implemented yet, pass dummy matrix!!!
498  //
499  // Return values:
500  // >0: the chi2 returned by TLinearFitter
501  // -3: only three points have been used for the calculation - no fitter was used
502  // -2: only two points have been used for the calculation - center of gravity was uesed for calculation
503  // -1: only one point has been used for the calculation - center of gravity was uesed for calculation
504  // -4: invalid result!!
505  //
506  // Fitting:
507  // 1. Step - make logarithm
508  // 2. Linear fit (parabola) - more robust - always converge
509  //
510  static TLinearFitter fitter(3,"pol2");
511  static TMatrixD mat(3,3);
512  static Double_t kTol = mat.GetTol();
513  fitter.StoreData(kFALSE);
514  fitter.ClearPoints();
515  TVectorD par(3);
516  TVectorD sigma(3);
517  TMatrixD matA(3,3);
518  TMatrixD b(3,1);
519  Float_t rms = TMath::RMS(nBins,arr);
520  Float_t max = TMath::MaxElement(nBins,arr);
521  Float_t binWidth = (xMax-xMin)/(Float_t)nBins;
522 
523  Float_t meanCOG = 0;
524  Float_t rms2COG = 0;
525  Float_t sumCOG = 0;
526 
527  Float_t entries = 0;
528  Int_t nfilled=0;
529 
530  for (Int_t i=0; i<nBins; i++){
531  entries+=arr[i];
532  if (arr[i]>0) nfilled++;
533  }
534 
535  if (max<4) return -4;
536  if (entries<12) return -4;
537  if (rms<kTol) return -4;
538 
539  Int_t npoints=0;
540  //
541 
542  //
543  for (Int_t ibin=0;ibin<nBins; ibin++){
544  Float_t entriesI = arr[ibin];
545  if (entriesI>1){
546  Double_t xcenter = xMin+(ibin+0.5)*binWidth;
547 
548  Float_t error = 1./TMath::Sqrt(entriesI);
549  Float_t val = TMath::Log(Float_t(entriesI));
550  fitter.AddPoint(&xcenter,val,error);
551  if (npoints<3){
552  matA(npoints,0)=1;
553  matA(npoints,1)=xcenter;
554  matA(npoints,2)=xcenter*xcenter;
555  b(npoints,0)=val;
556  meanCOG+=xcenter*entriesI;
557  rms2COG +=xcenter*entriesI*xcenter;
558  sumCOG +=entriesI;
559  }
560  npoints++;
561  }
562  }
563 
564 
565  Double_t chi2 = 0;
566  if (npoints>=3){
567  if ( npoints == 3 ){
568  //analytic calculation of the parameters for three points
569  matA.Invert();
570  TMatrixD res(1,3);
571  res.Mult(matA,b);
572  par[0]=res(0,0);
573  par[1]=res(0,1);
574  par[2]=res(0,2);
575  chi2 = -3.;
576  } else {
577  // use fitter for more than three points
578  fitter.Eval();
579  fitter.GetParameters(par);
580  fitter.GetCovarianceMatrix(mat);
581  chi2 = fitter.GetChisquare()/Float_t(npoints);
582  }
583  if (TMath::Abs(par[1])<kTol) return -4;
584  if (TMath::Abs(par[2])<kTol) return -4;
585 
586  if (!param) param = new TVectorT<T>(3);
587  //if (!matrix) matrix = new TMatrixT<T>(3,3); // !!!!might be a memory leek. use dummy matrix pointer to call this function! // Covariance matrix to be implemented
588 
589  (*param)[1] = par[1]/(-2.*par[2]);
590  (*param)[2] = 1./TMath::Sqrt(TMath::Abs(-2.*par[2]));
591  Double_t lnparam0 = par[0]+ par[1]* (*param)[1] + par[2]*(*param)[1]*(*param)[1];
592  if ( lnparam0>307 ) return -4;
593  (*param)[0] = TMath::Exp(lnparam0);
594  if (verbose){
595  par.Print();
596  mat.Print();
597  param->Print();
598  printf("Chi2=%f\n",chi2);
599  TF1 * f1= new TF1("f1","[0]*exp(-(x-[1])^2/(2*[2]*[2]))",xMin,xMax);
600  f1->SetParameter(0, (*param)[0]);
601  f1->SetParameter(1, (*param)[1]);
602  f1->SetParameter(2, (*param)[2]);
603  f1->Draw("same");
604  }
605  return chi2;
606  }
607 
608  if (npoints == 2){
609  //use center of gravity for 2 points
610  meanCOG/=sumCOG;
611  rms2COG /=sumCOG;
612  (*param)[0] = max;
613  (*param)[1] = meanCOG;
614  (*param)[2] = TMath::Sqrt(TMath::Abs(meanCOG*meanCOG-rms2COG));
615  chi2=-2.;
616  }
617  if ( npoints == 1 ){
618  meanCOG/=sumCOG;
619  (*param)[0] = max;
620  (*param)[1] = meanCOG;
621  (*param)[2] = binWidth/TMath::Sqrt(12);
622  chi2=-1.;
623  }
624  return chi2;
625 
626 }
627 
628 template <typename T>
629 Int_t* TStatToolkit::LTMUnbinned(int np, const T *arr, TVectorT<T> &params , Float_t keep)
630 {
631  //
632  // LTM : Trimmed mean of unbinned array
633  //
634  // Robust statistic to estimate properties of the distribution
635  // To handle binning error special treatment
636  // for definition of unbinned data see:
637  // http://en.wikipedia.org/w/index.php?title=Trimmed_estimator&oldid=582847999
638  //
639  // Function parameters:
640  // np - number of points in the array
641  // arr - data array (unsorted)
642  // params - vector with parameters
643  // - 0 - area
644  // - 1 - mean
645  // - 2 - rms
646  // - 3 - error estimate of mean
647  // - 4 - error estimate of RMS
648  // - 5 - first accepted element (of sorted array)
649  // - 6 - last accepted element (of sorted array)
650  //
651  // On success returns index of sorted events
652  //
653  static int book = 0;
654  static int *index = 0;
655  static float* w = 0;
656  int keepN = np*keep;
657  if (keepN>np) keepN = np;
658  if (keepN<2) return 0;
659  params[0] = 0.0f;
660  if (book<np) {
661  delete[] index;
662  book = np;
663  index = new int[book];
664  delete[] w;
665  w = new float[book+book];
666  }
667  //
668  float *wx1 = w, *wx2 = wx1+np;
669  TMath::Sort(np,arr,index,kFALSE); // sort in increasing order
670  // build cumulants
671  double sum1=0.0,sum2=0.0;
672  for (int i=0;i<np;i++) {
673  double x = arr[index[i]];
674  wx1[i] = (sum1+=x);
675  wx2[i] = (sum2+=x*x);
676  }
677  double minRMS = sum2+1e6;
678  params[0] = keepN;
679  int limI = np - keepN+1;
680  for (int i=0;i<limI;i++) {
681  int limJ = i+keepN-1;
682  Double_t sum1 = double(wx1[limJ]) - double(i ? wx1[i-1] : 0.0);
683  Double_t sum2 = double(wx2[limJ]) - double(i ? wx2[i-1] : 0.0);
684  double mean = sum1/keepN;
685  double rms2 = sum2/keepN - mean*mean;
686  // printf("%d : %d %e %e\n",i,limJ, mean, TMath::Sqrt(rms2));
687  if (rms2>minRMS) continue;
688  minRMS = rms2;
689  params[1] = mean;
690  params[2] = rms2;
691  params[5] = i;
692  params[6] = limJ;
693  }
694  //
695  if (!params[0]) return 0;
696  if (params[2]<0) {
697  ::Error("TStatToolkit::LTMUnbinned","rounding error in RMS<0");
698  // current fast algorithm N - Sometimes rounding errors can lead to negative RMS
699  // more precise algorithm N2
700  throw 1;
701  }
702  params[2] = TMath::Sqrt(params[2]);
703  params[3] = params[2]/TMath::Sqrt(params[0]); // error on mean
704  params[4] = params[3]/TMath::Sqrt(2.0); // error on RMS
705  return index;
706 }
707 
708 template <typename T>
709 void TStatToolkit::Reorder(int np, T *arr, const int *idx)
710 {
711  // rearange arr in order given by idx
712  const int kMaxOnStack = 10000;
713  // don't abuse stack
714  T *arrCHeap=0, arrCstack[np<kMaxOnStack ? np:1], *arrC=np<kMaxOnStack ? &arrCstack[0] : (arrCHeap=new T[np]);
715  memcpy(arrC,arr,np*sizeof(T));
716  for (int i=np;i--;) arr[i] = arrC[idx[i]];
717  delete[] arrCHeap;
718  //
719 }
720 
721 
722 #endif
TBrowser b
Definition: RunAnaESD.C:12
Int_t colors[3]
Definition: CalibCosmic.C:62
void GetMinMax(const T *arr, Long64_t n, double &minVal, double &maxVal)
Definition: TStatToolkit.h:166
TNamed * GetMetadata(TTree *tree, const char *vartagName, TString *prefix=0, Bool_t fullMatch=kFALSE)
printf("Chi2/npoints = %f\n", TMath::Sqrt(chi2/npoints))
TString FilterFit(const TString &input, const TString filter, TVectorD &vec, TMatrixD &covar)
const Double_t kEpsilon
void TestGausFit(Int_t nhistos=5000)
void DrawStatusGraphs(TObjArray *oaMultGr)
TMultiGraph * MakeStatusMultGr(TTree *tree, const char *expr, const char *cut, const char *alias, Int_t igr=0)
Summary of statistics functions.
void MakeDistortionMapFast(THnBase *histo, TTreeSRedirector *pcstream, TMatrixD &projectionInfo, Int_t verbose=0, Double_t fractionCut=0.1, const char *estimators=0)
Double_t FitGaus(TH1 *his, TVectorT< T > *param=0, TMatrixT< T > *matrix=0, Float_t xmin=0, Float_t xmax=0, Bool_t verbose=kFALSE)
Definition: TStatToolkit.h:347
#define TObjArray
TVectorD vec
Definition: AnalyzeLaser.C:8
void Constrain1D(const TString &input, const TString filter, TVectorD &param, TMatrixD &covar, Double_t mean, Double_t sigma)
Double_t RobustBinMedian(TH1 *hist, Double_t fractionCut=1.)
void MakeCombinedAlias(TTree *tree, TString &sCombinedStatus, Bool_t doCheck, Int_t verbose)
void MakeDistortionMap(Int_t iter, THnBase *histo, TTreeSRedirector *pcstream, TMatrixD &projectionInfo, Int_t dumpHisto=100, Int_t verbose=kFALSE)
Int_t SetStatusAlias(TTree *tree, const char *expr, const char *cut, const char *alias)
TGraph2D * MakeStat2D(TH3 *his, Int_t delta0, Int_t delta1, Int_t type)
TH1 * DrawHistogram(TTree *tree, const char *drawCommand, const char *cuts="1", const char *hname="histo", const char *htitle="histo", Int_t nsigma=4, Float_t fraction=0.75, TObjArray *description=0)
void MakeMultiGraphSparse(TMultiGraph *multiGraph)
TMatrixD mat
Definition: AnalyzeLaser.C:9
THashList * AddMetadata(TTree *, const char *vartagName, const char *varTagValue)
TMultiGraph * MakeStatusLines(TTree *tree, const char *expr, const char *cut, const char *alias)
TTreeSRedirector * pcstream
void MakeAnchorAlias(TTree *tree, TString &sTrendVars, Int_t doCheck, Int_t verbose)
TGraphErrors * MakeGraphErrors(TTree *tree, const char *expr="Entry", const char *cut="1", Int_t mstyle=25, Int_t mcolor=1, Float_t msize=-1, Float_t offset=0.0, Int_t entries=10000000, Int_t firstEntry=0)
npoints
Definition: driftITSTPC.C:85
void Update1D(Double_t delta, Double_t sigma, Int_t s1, TMatrixD &param, TMatrixD &covar)
Bool_t LTMHisto(TH1 *his, TVectorT< T > &param, Float_t fraction=1)
Definition: TStatToolkit.h:228
void LTM(TH1 *his, TVectorT< T > *param=0, Float_t fraction=1, Bool_t verbose=kFALSE)
Definition: TStatToolkit.h:444
void sum()
Int_t AdaptHistoMetadata(TTree *tree, TH1 *histogram, TString option)
Int_t Freq(Int_t n, const Int_t *inlist, Int_t *outlist, Bool_t down)
TTree * tree
Double_t chi2
Definition: AnalyzeLaser.C:7
void CheckTreeAliases(TTree *tree, Int_t ncheck)
char * prefix
TString * FitPlane(TTree *tree, const char *drawCommand, const char *formula, const char *cuts, Double_t &chi2, Int_t &npoints, TVectorD &fitParam, TMatrixD &covMatrix, Float_t frac=-1, Int_t start=0, Int_t stop=10000000, Bool_t fix0=kFALSE)
TGraphErrors * MakeStat1D(TH2 *his, Int_t deltaBin, Double_t fraction, Int_t returnType, Int_t markerStyle, Int_t markerColor)
void TruncatedMean(const TH1 *his, TVectorT< T > *param, Float_t down=0, Float_t up=1.0, Bool_t verbose=kFALSE)
Definition: TStatToolkit.h:197
AliComparisonDraw comp
Definition: TestAnalisys.C:74
TString MakeFitString(const TString &input, const TVectorD &param, const TMatrixD &covar, Bool_t verbose=kFALSE)
void Reorder(int np, T *arr, const int *idx)
Definition: TStatToolkit.h:709
void EvaluateUni(Int_t nvectors, Double_t *data, Double_t &mean, Double_t &sigma, Int_t hh)
TTree * treeIn
TString * FitPlaneConstrain(TTree *tree, const char *drawCommand, const char *formula, const char *cuts, Double_t &chi2, Int_t &npoints, TVectorD &fitParam, TMatrixD &covMatrix, Float_t frac=-1, Int_t start=0, Int_t stop=10000000, Double_t constrain=-1)
void RebinSparseMultiGraph(TMultiGraph *multiGraph, TGraph *graphRef)
TTree * WriteStatusToTree(TObject *oStatusGr)
Int_t nBins
void CombineArray(TTree *tree, TVectorD &values)
void RebinSparseGraph(TGraph *graph0, TGraph *graph1, Option_t *option="")
void MakeSummaryTree(TTree *treeIn, TTreeSRedirector *pcstream, TObjString &sumID, TCut &selection)
TGraph * MakeGraphSparse(TTree *tree, const char *expr="Entry", const char *cut="1", Int_t mstyle=25, Int_t mcolor=1, Float_t msize=-1, Float_t offset=0.0)
Double_t GetDefaultStat(TTree *tree, const char *var, const char *selection, TStatType statType)
void GetMinMaxMean(const T *arr, Long64_t n, double &minVal, double &maxVal, double &meanVal)
Definition: TStatToolkit.h:180
void EvaluateUniExternal(Int_t nvectors, Double_t *data, Double_t &mean, Double_t &sigma, Int_t hh, Float_t externalfactor=1)
void MedianFilter(TH1 *his1D, Int_t nmedian)
Int_t * LTMUnbinned(int np, const T *arr, TVectorT< T > &params, Float_t keep=1.0)
Definition: TStatToolkit.h:629
TCut cut
Definition: MakeGlobalFit.C:75
void res(Char_t i)
Definition: Resolution.C:2
Float_t GetCOG(const Short_t *arr, Int_t nBins, Float_t xMin, Float_t xMax, Float_t *rms=0, Float_t *sum=0)
const Int_t markers[5]
Definition: makeBBFit.C:58
Double_t GetDistance(const TVectorD &values, const ENormType normType, const Bool_t normaliseToEntries=kFALSE, const Double_t pvalue=1.)
Int_t MakeStatAlias(TTree *tree, const char *expr, const char *cut, const char *alias)
void DrawMultiGraph(TMultiGraph *graph, Option_t *option)
TMultiGraph * MakeMultGraph(TTree *tree, const char *groupName, const char *expr, const char *cut, const char *markers, const char *colors, Bool_t drawSparse, Float_t msize, Float_t sigmaRange, TLegend *legend, Bool_t comp=kTRUE)
TString * FitPlaneFixed(TTree *tree, const char *drawCommand, const char *formula, const char *cuts, Double_t &chi2, Int_t &npoints, TVectorD &fitParam, TMatrixD &covMatrix, Float_t frac=-1, Int_t start=0, Int_t stop=10000000)
Int_t GetFitIndex(const TString fString, const TString subString)
void AddStatusPad(TCanvas *c1, Float_t padratio, Float_t bottommargin)