Created
December 6, 2020 16:05
-
-
Save fatihgune/20430240e6346891032c6b5394dd230b to your computer and use it in GitHub Desktop.
Confusion Equation(X Equation) with normal distribution (PHP- Laravel)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<? | |
// From 2006 to 2019 | |
$years = config('years'); | |
// Get All normal distribution records of bankrupted companies for selected Country | |
$allBankruptedCompaniesCount = NormalDistribution::join('companies AS c', 'company_id', '=', 'c.id') | |
->where('c.country', '=', 'UK') | |
->where('c.bankrupted', '=', 'yes') | |
->get()->unique('company_id') | |
->count(); | |
\Cache::flush(); | |
// initiatal values | |
$plus = 0.00; | |
$row1All = []; | |
$row3All = []; | |
$rowResult = []; | |
// Mimic percentages; from 1% to 100% | |
for ($i = 1; $i <= 100; $i++) { | |
$plus += 0.01; | |
// reset values for each percentage | |
$countOfBankruptedCompaniesForPercentage = 0; | |
$countOfHealthyCompaniesForPercentage = 0; | |
// for every year | |
foreach ($years as $year) { | |
// Get every normal distribution record of given country | |
// in given year and sort them by ascending order | |
$normDist = NormalDistribution::join('companies AS c', 'company_id', '=', 'c.id') | |
->where('c.country', '=', 'UK') | |
->where('year', '=', $year)->get() | |
->sortBy('value', SORT_NUMERIC); | |
// get normal distribution as many records based on current percentage | |
// i.e; 1 percent = Take 1 percent of all records in given condition | |
$normDistForThatPercentage = $normDist->take(round($normDist->count() * $plus, 0, PHP_ROUND_HALF_DOWN)); | |
// separate bankrupted and healthy companies from the records above ($normDistForThatPercentage) | |
$bankruptedCompaniesOfTheYear = $normDistForThatPercentage->where('bankrupted', 'yes')->unique('company_id'); | |
$healthyCompaniesOfTheYear = $normDistForThatPercentage->where('bankrupted', 'no')->unique('company_id'); | |
// Add numbers and store to the variables respectively. | |
// This will help us determine the bankrupted and healthy company numbers for the percentage. | |
$countOfBankruptedCompaniesForPercentage += $bankruptedCompaniesOfTheYear->count(); | |
$countOfHealthyCompaniesForPercentage += $healthyCompaniesOfTheYear->count(); | |
} | |
// store same values under the given key for later use. | |
// ie; in some equation, a value from previous percentage is needed. | |
// storing them in a separate storage and using them will ease the process | |
\Cache::put($i.'bankrupted-percentage', $countOfBankruptedCompaniesForPercentage); | |
\Cache::put($i.'non-bankrupted-percentage', $countOfHealthyCompaniesForPercentage); | |
if ($i === 1) { | |
// for 1% row one is as follows | |
$row1 = $countOfHealthyCompaniesForPercentage; | |
} else { | |
// for 2% until 100% $row1 is stored as count of healthy companies for that percentage minus the value previous percentage. | |
// this will help us achieve isolate the difference instead of the accumulated numbers. | |
$row1 = $countOfHealthyCompaniesForPercentage - \Cache::get($i - 1 . 'non-bankrupted-percentage'); | |
} | |
// self explanatory | |
$row2 = $allBankruptedCompaniesCount - $countOfBankruptedCompaniesForPercentage; | |
// see $row1. Same principle but with bankrupted companies. | |
if ($i === 1) { | |
$row3 = $countOfBankruptedCompaniesForPercentage; | |
} else { | |
$row3 = $countOfBankruptedCompaniesForPercentage - \Cache::get($i - 1 . 'bankrupted-percentage'); | |
} | |
// self explanatory | |
if ($i === 1) { | |
$row4 = 0; | |
} else { | |
$row4 = $countOfHealthyCompaniesForPercentage; // note that this includes all records for the given percentage. | |
} | |
// store the values in arrays respectively for each percentage | |
$row1All[] = $row1; | |
$row2All[] = $row2; | |
$row3All[] = $row3; | |
$row4All[] = $row4; | |
// store the result of the equation in the array for each percentage | |
$rowResult[] = $row1 * $row2 + 0.5 * $row1 * $row3; | |
} | |
// get the sum of the all values and store them respectively | |
$rowResult = array_sum($rowResult); | |
$row3All = array_sum($row3All); | |
$row1All = array_sum($row1All); | |
// calculate auc | |
$auc = $rowResult / ($row3All * $row1All); | |
// calculate q1 | |
$q1 = $auc / (2 - $auc); | |
// calculate q2 | |
$q2 = 2 * ($auc * $auc) / (1 + $auc); | |
// calculate se | |
$se = sqrt(($auc * (1 - $auc) + ($row3All - 1) * ($q1 - ($auc * $auc)) + ($row1All - 1) * ($q2 - ($auc * $auc))) / ($row3All * $row1All)); | |
// calculate z | |
$z = ($auc - 0.5) / $se; | |
// calculate ar | |
$ar = 2 * ($auc - 0.5); | |
// store the results | |
$results = [ | |
'auc' => $auc, 'se' => $se, 'z' => $z, 'AR' => $ar, | |
]; | |
return $results; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment