PHP Classes

File: LinearFit.php

Recommend this page to a friend!
  Classes of Jose Gomez  >  Linear Fit  >  LinearFit.php  >  Download  
File: LinearFit.php
Role: Class source
Content type: text/plain
Description: Multivariate linear fitting
Class: Linear Fit
Perform linear regression on a set of data values
Author: By
Last change:
Date: 9 years ago
Size: 21,096 bytes
 

Contents

Class file image Download
<?php /** * \mainpage Linear least squares fitting package * * \section intro Introduction * This package will be use to perform linear least squares fitting (<a href="http://en.wikipedia.org/wiki/Linear_regression">See linear regression</a>). * Other types of linear fitting are polynomial fitting and functional linear fitting. * * \section usage Usage * * The way this package can be use is described in several steps setting data, set degree in case of polynomial fitting or add the user defined functions, * add more data (optional), perform fitting, get confidence intervals, goodness of fitting and interpolate value sets. * After setting new data, degree or functions new fittings can be performed using new loaded data * * \subsection setdata Setting data * * Using the method SetData you can set new data to perform the Least squares fitting. Setting new data implies previous fitting is removed and everything is initialized * * \subsection setdegree Setting polynome degree * * In case of PolyFit class is used (polynome fitting) the degree of the polynome will be fit has to be set. For this purpose Setdegree method will be used * * \subsection setfunction Setting user defined functions * * In case of FunctionFit class is used (linear functional fitting) the array of functions will be fit has to be set. For this purpose SetFunctions method will be used * * \subsection adddata Adding more data * * In case a new fitting is going to be pewrform with extra data without losing previously set data the method AddData will be used * * \subsection fit Fitting * * Once all the necesary information has been provided the fitting will be performed using Fit method. This method will return an array with the coefficients for each variable in case * of LinearFit class is used, for each power in case of PolyFit, for each function in case of FunctionFit * * \subsection conf Confidence interval for each coefficient * * Least squares regression has a <a href="http://stattrek.com/regression/slope-confidence-interval.aspx">related error for coefficient estimation</a>. You can get this error using ConfInterval method * * \subsection goodness Goodness of fitting * * The <a href="http://www.graphpad.com/guides/prism/6/curve-fitting/index.htm?r2_ameasureofgoodness_of_fitoflinearregression.htm">goodness of fitting</a> can be got with the r2 method * * \subsection getval get estimated values * * Once the fitting ids performed set of values can be calculated using the previously calculated coefficients using the GetValue method. * \section license License * Copyright (C) 2013 José Gómez López * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. * **/ /** * LinearFit * * @author José Gómez López <jose.gomez.lopez@gmail.com> * * Multivariate linear least squares fitting class * * This class perform multivariate linear squares fitting * in the form y=a0+a1*x1+ ... +an*xn * each ai are coefficients to calculte * each xi are the independent variables **/ class LinearFit { private $X; private $Y; private $Coeffs; private $Conf; private $Size; private $Vars; private $nY; /** * Initialize data * * Initialize internal data and prepare for next calculation **/ protected function init() { $this->X=array(); $this->Y=array(); $this->Coeffs=array(); $this->Conf=array(); $this->Size=0; $this->Vars=0; $this->nY=0; } // End of init() /** * Constructor * * Create a new object **/ public function __construct () { $this->init(); } // End of function __construct /** * Add New data * * Add new data to the fitting process * @param array double $adX vector with values for each variable * @param (array) double $dY value corresponding to $adX **/ public function AddData ($adX, $dY) { if ($this->Size>0 && $this->Vars>0 && $this->Vars != count($adX)) { throw new Exception('# of variables of stored data and input vector is different'); } else { $this->Size++; $this->Vars=count($adX); if (is_array($dY)) { /* * Several fittings will be performed at the same time */ if ($this->nY==0) { $this->nY=count($dY); } if (count($dY) != $this->nY ) { throw new Exception('Number of Y elements is wrong'); } } else { /* * Only a fitting will be performed */ if ($this->nY==0) { $this->nY=1; } if ($this->nY != 1) { throw new Exception('Y must be an array'); } } /* * Provided set is added */ $this->Y[]=$dY; $this->X[]=$adX; } // End of if } // End of AddData /** * Set New data * * Set new data destroying previous data * @param array double $adX matrix with a set of X values * @param array double $adY vector of values corresponding to each row of $adX **/ public function SetData($adX, $adY) { if (count($adX) != count($adY)) { throw new Exception('The size of both arrays should be the same'); } else { if (is_array($adX[0])) { $this->Size=count($adX); $this->Vars=count($adX[0]); $this->Y=$adY; $this->X=$adX; $this->Coeffs=array(); $this->Conf=array(); if ( is_array($adY[0]) ) { $this->nY=count($adY[0]); } else { $this->nY=1; } } } // End of if } // End of setData /** * Two-tailed standard normal probality * * Returns the two-tailed standard normal probability * corresponding to dZ * @param double $dZ Value of normal distribution * * @return double two-tailed standard normal probability **/ private function Nnorm_p($dZ) { $dA1 = 0.0000053830; $dA2 = 0.0000488906; $dA3 = 0.0000380036; $dA4 = 0.0032776263; $dA5 = 0.0211410061; $dA6 = 0.0498673470; $dZ = abs($dZ); $dP = ((((($dA1*$dZ+$dA2)*$dZ+$dA3)*$dZ+$dA4)*$dZ+$dA5)*$dZ+$dA6)*$dZ+1; $dP = pow($dP, -16); return $dP; } //End of Nnorm_p /** * Normal distribution value given a half-middle type probability * * Normal distribution value given a half-middle type probability * @param double $dP Probability * * @return double Normal distribution value **/ private function Nnorm_z($dP) { $dA0= 2.5066282; $dA1=-18.6150006; $dA2= 41.3911977; $dA3=-25.4410605; $dB1= -8.4735109; $dB2= 23.0833674; $dB3=-21.0622410; $dB4= 3.1308291; $dC0= -2.7871893; $dC1= -2.2979648; $dC2= 4.8501413; $dC3= 2.3212128; $dD1= 3.5438892; $dD2= 1.6370678; if ($dP>0.42) { $dR=sqrt(-log(0.5-$dP)); $dZ=((($dC3*$dR+$dC2)*$dR+$dC1)*$dR+$dC0)/(($dD2*$dR+$dD1)*$dR+1); } else { $dR=$dP*$dP; $dZ=$dP*((($dA3*$dR+$dA2)*$dR+$dA1)*$dR+$dA0)/(((($dB4*$dR+$dB3)*$dR+$dB2)*$dR+$dB1)*$dR+1); } /* End of if */ return $dZ; } // End of Nnorm_z /** * t-Student's value * * Hill's aprox. inverse t-Student's distribution: * Comm. of A.C.M. Vol. 13 No.10 1970 pg 620. * Calculates t-Student's distribution for the given * degrees of freedom and two-tail probability. * @param $dP Probability * @param $iFd Degrees of freedom * * @return double aproximate t-Stuxdent's distribution value **/ private function Hills_inv_t ($dP, $iFd) { if ($iFd == 1) { $dT = cos($dP*M_PI/2)/sin($dP*M_PI/2); } else if ($iFd == 2) { $dT = sqrt(2/($dP*(2 - $dP)) - 2); } else { $dA = 1/($iFd - 0.5); $dB = 48/($dA*$dA); $dC = ((20700*$dA/$dB - 98)*$dA - 16)*$dA + 96.36; $dD = ((94.5/($dB + $dC) - 3)/$dB + 1)*sqrt($dA*M_PI*0.5)*$iFd; $dX = $dD*$dP; $dY = pow($dX, 2/$iFd); if ($dY > 0.05 + $dA) { $dX = $this->Norm_z(0.5*(1 - $dP)); $dY = $dX*$dX; if ($iFd < 5) $dC = $dC + 0.3*($iFd - 4.5)*($dX + 0.6); $dC = (((0.05*$dD*$dX - 5)*$dX - 7)*$dX - 2)*$dX + $dB + $dC; $dY = (((((0.4*$dY + 6.3)*$dY + 36)*$dY + 94.5)/$dC - $dY - 3)/$dB + 1)*$dX; $dY = $dA*$dY*$dY; if ($dY > 0.002) { $dY = exp($dY) - 1; } else $dY = 0.5*$dY*$dY + $dY; $dT = sqrt($iFd*$dY); } /* End of if */ else { $dY = ((1/((($iFd + 6)/($iFd*$dY) - 0.089*$dD - 0.822)*($iFd + 2)*3) + 0.5/($iFd + 4))*$dY - 1)*($iFd + 1)/($iFd + 2) + 1/$dY; $dT = sqrt($iFd*$dY); } /* End of if */ } /* End of if */ return $dT; } /** * t-Student's distribution * * Returns an accurate t-Student's distribution to tolerance * 0.0001 for the given degrees of freedom and two-tail * probability. * @param $dP Probability * @param $iFd Degrees of freedom * * @return double acurrate t-Stuxdent's distribution value **/ private function T ($dProb, $iDeg) { $dDiff = 1; $dP0 = 1-$dProb; $dP1 = $dP0; while (abs($dDiff) > .0001) { $dT = $this->Hills_inv_t($dP1, $iDeg); /* initial rough value */ $dDiff = $this->T_p($dT,$iDeg) - $dP0; /* compare result with forward fn */ $dP1 -= $dDiff; /* small adjustment to p1 */ } /* End of while */ return $dT; } /** * Two-tail probability * * Calculates two-tail probability given a t-Student's * distribution value and degrees of freedom * @param $dT t-Student's distribution value * @param $iFd Degrees of freedom * * @return double two tailed probality **/ private function T_p ($dT, $iFd) { $dAbst = abs($dT); $dTsq = $dT*$dT; if ($iFd== 1) { $dP = 1 - 2*atan($dAbst)/M_PI; } else if ($iFd== 2) { $dP = 1 - $dAbst/sqrt($dTsq + 2); } else if ($iFd== 3) { $dP = 1 - 2*(atan($dAbst/sqrt(3)) + $dAbst*sqrt(3)/($dTsq + 3))/M_PI; } else if ($iFd== 4) { $dP = 1 - $dAbst*(1 + 2/($dTsq + 4))/sqrt($dTsq + 4); } else { $dZ = T_z($dAbst, $iFd); if ($iFd>4) { $dP = $this->Norm_p($dZ); } else { $dP = $this->Norm_p($dZ); } /* End of if */ } /* End of if */ return $dP; } /** * Normal distribution value from t-Student's * * Converts a t-Student's distribution value to Normal distribution * value given the degrees of freedom at the two-tail probability level. * @param $dT t-Student's distribution value * @param $iFd Degrees of freedom * * @return double Normal distribution value from t-Student's value **/ private function T_z ($dT, $iFd) { $dA9 = $iFd - 0.5; $dB9 = 48*$dA9*$dA9; $dT9 = $dT*$dT/$iFd; if ($dT9 >= 0.04) { $dZ8 = $dA9*log(1+$dT9); } else { $dZ8 = $dA9*(((1 - $dT9*0.75)*$dT9/3 - 0.5)*$dT9 + 1)*$dT9; } /* End of if */ $dP7 = ((0.4*$dZ8 + 3.3)*$dZ8 + 24)*$dZ8 + 85.5; $dB7 = 0.8*pow($dZ8, 2) + 100 + $dB9; $dZ = (1 + (-$dP7/$dB7 + $dZ8 + 3)/$dB9)*sqrt($dZ8); return $dZ; } /** * Solve linear system * * Solve a linear system using Gauss-Jordan method * @param array double input/output $adX * @paran array double input/output $adY output the solution * **/ private function SolveLinear(&$adX, &$adY) { $iMax=0; $dMax=0; $dFactor=0; $adTemp=array(); $iDim=count($adX); if ( !is_array($adX) || !is_array($adY)) { throw new Exception('Both inputs should be matrix'); } // End of if if ( !is_array($adX[0]) || !is_array($adY[0])) { throw new Exception('Both inputs should be matrix'); } // End of if if ( $iDim != count($adX[0])) { throw new Exception('X matrix should be squared'); } // End of if if ( $iDim != count($adY)) { throw new Exception('Both input should have the same number of rows'); } // End of if $iSol=count($adY[0]); for ($i=0; $i<$iDim; $i++) { /************************************* * Search maximum value in the column *************************************/ for ($j=$i, $dMax=0; $j<$iDim; $j++) { if ($adX[$j][$i]*$adX[$j][$i]>$dMax*$dMax) { $dMax=$adX[$j][$i]; $iMax=$j; } /* End of if */ } /* End of for */ if ( $dMax*$dMax>0 ) { /************************************************ * Put the maximum value Row in current position ************************************************/ $adTemp=$adX[$i]; $adX[$i]=$adX[$iMax]; $adX[$iMax]=$adTemp; $adTemp=$adY[$i]; $adY[$i]=$adY[$iMax]; $adY[$iMax]=$adTemp; /************************************************* * Set the diagonal element to 1 *************************************************/ for ($j=0; $j<$iDim; $j++) { $adX[$i][$j]/=$dMax; } /* End of for */ for ($j=0; $j<$iSol; $j++) { $adY[$i][$j]/=$dMax; } /* End of for */ /**************************************************** * Set 0 to the non diagonal elements in the column ****************************************************/ for ($j=0; $j<$iDim; $j++) { if ($i!=$j) { $dFactor=$adX[$j][$i]; for ($k=$i; $k<$iDim; $k++) { $adX[$j][$k]-=$dFactor*$adX[$i][$k]; } /* End of for */ for ($k=0; $k<$iSol; $k++) { $adY[$j][$k]-=$dFactor*$adY[$i][$k]; } /* End of for */ } /* End of if */ } /* End of for */ } else { /* * Singular matrix */ throw new Exception('Input matrix is singular'); } } /* End of for */ } // End of SolveLinear /** * Build the lesat squares matrix * * Build the Least Squares matrix using the X vectors * * @return array double Least squares matrix **/ private function LSMatrix() { $adX=array(); /* * Matrix initialization */ for ($i=0; $i<=$this->Vars; $i++) { for ($j=0; $j<=$this->Vars; $j++) { $adX[$i][$j]=0; } } /* * Filling the first row and column * This matrix is symetric */ $adX[0][0]=$this->Size; for ($i=0; $i<$this->Vars; $i++) { for ($j=0; $j<$this->Size; $j++) { $adX[$i+1][0]+=$this->X[$j][$i]; } $adX[0][$i+1]=$adX[$i+1][0]; } /* * rest of the matrix filling */ for ($i=0; $i<$this->Vars; $i++) { for ($j=$i; $j<$this->Vars; $j++) { for ($k=0; $k<$this->Size; $k++) { $adX[$i+1][$j+1]+=$this->X[$k][$i]*$this->X[$k][$j]; } $adX[$j+1][$i+1]=$adX[$i+1][$j+1]; } } return $adX; } /** * Perform linear fitting * * perform linnear fitting process with the suplied data * @return array double Coefficient matrix **/ public function Fit() { if ($this->Size == 0) { throw new Exception('There is no loaded data'); } $adX=array(); $adY=array(); /* * Initialize result vector set */ for ($i=0; $i<=$this->Vars; $i++) { for ($j=0; $j<$this->nY; $j++) { $adY[$i][$j]=0; } } /* * Filling the results vector set * from the second elemet */ for ($i=0; $i<$this->Vars; $i++) { for ($j=0; $j<$this->Size; $j++) { for ($k=0; $k<$this->nY; $k++) { $dY=($this->nY>1)?$this->Y[$j][$k]: $this->Y[$j]; $adY[$i+1][$k]+=$this->X[$j][$i]*$dY; } } } /* * Filling the first element of * the results vector set */ for ($j=0; $j<$this->Size; $j++) { for ($k=0; $k<$this->nY; $k++) { $dY=($this->nY>1)?$this->Y[$j][$k]: $this->Y[$j]; $adY[0][$k]+=$dY; } } /* * Build van der Monde matrix * (least squares matrix) */ $adX=$this->LSMatrix(); /* * solves the iLeast squares system system */ $this->SolveLinear($adX, $adY); if ($this->nY == 1) { for ($i=0; $i<count($adY); $i++) { $adT[$i]=$adY[$i][0]; } $adY=$adT; } $this->Coeffs=$adY; return $adY; } // End of Fit /** * Get the confidence interval band for the parameters * * Get the values of an input using the calculated coefficients * @param array double $adX input to get the values * * @return array double Error estimation for coefficients **/ public function ConfInterval($dProb) { if (count($this->Coeffs)==0) { throw new Exception('Fitting is not performed yet'); } if (count($this->Conf)==0) { for ($i=0; $i<=$this->Vars; $i++) { for ($j=0; $j<=$this->Vars; $j++) { $adI[$i][$j]=0; } $adI[$i][$i]=1; } $adX=$this->LSMatrix(); $this->SolveLinear($adX, $adI); $adX=$this->X; $adY=self::GetValues($adX); $dT=$this->T($dProb, $this->Size-$this->Vars-1); if ($this->nY == 1) { for ($i=0, $dS=0; $i<$this->Size; $i++) { $dS+=($adY[$i]-$this->Y[$i])*($adY[$i]-$this->Y[$i]); } $dS/=$this->Size-$this->Vars-1; for ($i=0; $i<=$this->Vars; $i++) { $this->Conf[$i]=$dT*sqrt($adI[$i][$i]*$dS); } } else { for ($ii=0; $ii<$this->nY; $ii++) { for ($i=0, $adSi[$ii]=0; $i<$this->Size; $i++) { $adSi[$ii]+=($adY[$i][$ii]-$this->Y[$i][$ii])*($adY[$i][$ii]-$this->Y[$i][$ii]); } $adSi[$ii]/=$this->Size-$this->Vars-1; } for ($i=0; $i<=$this->Vars; $i++) { for ($j=0; $j<$this->nY; $j++) { $this->Conf[$i][$j]=$dT*sqrt($adI[$i][$i]*$adSi[$j]); } } } } return $this->Conf; } /** * Get the goodness of regression * * Get the goodness of regression value. * It will be between 0 and 1. As the closest to 1 * the best will be the fitting * @param array double $adX input to get the values * * @return double goodness of regression **/ public function r2() { if (count($this->Coeffs)==0) { throw new Exception('Fitting is not performed yet'); } $adX=$this->X; $adY=self::GetValues($adX); if ($this->nY == 1) { for ($i=0, $dYm=0; $i<$this->Size; $i++) { $dYm+=$adY[$i]; } $dYm/=$this->Size; for ($i=0, $dS=0, $dStot=0; $i<$this->Size; $i++) { $dS+=($adY[$i]-$this->Y[$i])*($adY[$i]-$this->Y[$i]); $dStot+=($dYm-$this->Y[$i])*($dYm-$this->Y[$i]); } return (1-$dS/$dStot); } else { for ($ii=0; $ii<$this->nY; $ii++) { for ($i=0, $adYm[$ii]=0; $i<$this->Size; $i++) { $adYm[$ii]+=$adY[$i][$ii]; } $adYm[$ii]/=$this->Size; for ($i=0, $adS[$ii]=0, $adStot[$ii]=0; $i<$this->Size; $i++) { $adS[$ii]+=($adY[$i][$ii]-$this->Y[$i][$ii])*($adY[$i][$ii]-$this->Y[$i][$ii]); $adStot[$ii]+=($adYm[$ii]-$this->Y[$i][$ii])*($adYm[$ii]-$this->Y[$i][$ii]); } } $adR2=array(); for ($j=0; $j<$this->nY; $j++) { $adR2[]=(1-$adS[$j]/$adStot[$j]); } return $adR2; } } /** * Get values from an input * * Get the values of an input using the calculated coefficients * @param array double $adX input to get the values * * @return (array) double value / value set for the provided * vector / vector set using the calculated coefficients **/ public function GetValues($adX) { if (count($this->Coeffs)==0) { throw new Exception('Fitting is not performed yet'); } if (is_array($adX[0])) { if (count($adX[0])!=$this->Vars) { throw new Exception('Number of columns should the same as variables'); } else { for ($i=0; $i<count($adX); $i++) { if ($this->nY==1) { $adTmp[$i]=$this->Coeffs[0]; for ($j=0; $j<$this->Vars; $j++) { $adTmp[$i]+=$this->Coeffs[$j+1]*$adX[$i][$j]; } } else { for ($k=0; $k<$this->nY; $k++) { $adTmp[$i][$k]=$this->Coeffs[0][$k]; for ($j=0; $j<$this->Vars; $j++) { $adTmp[$i][$k]+=$this->Coeffs[$j+1][$k]*$adX[$i][$j]; } } } } } } else { if ($this->nY==1) { $adTmp=$this->Coeffs[0]; for ($j=0; $j<$this->Vars; $j++) { $adTmp+=$this->Coeffs[$j+1]*$adX[$j]; } } else { for ($k=0; $k<$this->nY; $k++) { $adTmp[$k]=$this->Coeffs[0][$k]; for ($j=0; $j<$this->Vars; $j++) { $adTmp[$k]+=($this->Coeffs[$j+1][$k])*$adX[$j]; } } } } return $adTmp; } } // End of class LinearFit ?>