2024-07-26 08:23:47 +00:00
|
|
|
#ifndef _STATISTICS_CPP_
|
|
|
|
#define _STATISTICS_CPP_
|
|
|
|
|
2022-11-11 13:56:59 +00:00
|
|
|
#include <vector>
|
|
|
|
#include <cstdio>
|
|
|
|
#include <cmath>
|
|
|
|
#include <array>
|
|
|
|
|
2024-07-26 08:23:47 +00:00
|
|
|
namespace stats {
|
2022-11-11 13:56:59 +00:00
|
|
|
|
|
|
|
using namespace std;
|
|
|
|
|
|
|
|
typedef double val;
|
|
|
|
|
|
|
|
struct inverse_error {};
|
|
|
|
|
|
|
|
template<size_t N> array<array<val, N>, N> invert(const array<array<val, N>, N>& T) {
|
|
|
|
int iN = N;
|
|
|
|
auto T1 = T, T2 = T;
|
|
|
|
for(int y=0; y<iN; y++)
|
|
|
|
for(int x=0; x<iN; x++)
|
|
|
|
T2[y][x] = (x==y);
|
|
|
|
|
|
|
|
for(int a=0; a<iN; a++) {
|
|
|
|
int best = a;
|
|
|
|
|
|
|
|
for(int b=a+1; b<iN; b++)
|
|
|
|
if(abs(T1[b][a]) > abs(T1[best][a]))
|
|
|
|
best = b;
|
|
|
|
|
|
|
|
int b = best;
|
|
|
|
|
|
|
|
if(b != a)
|
|
|
|
for(int c=0; c<iN; c++)
|
|
|
|
swap(T1[b][c], T1[a][c]), swap(T2[b][c], T2[a][c]);
|
|
|
|
|
|
|
|
if(abs(T1[a][a]) < 1e-6) throw inverse_error();
|
|
|
|
for(int b=a+1; b<iN; b++) {
|
|
|
|
val co = -T1[b][a] / T1[a][a];
|
|
|
|
for(int c=0; c<iN; c++) T1[b][c] += T1[a][c] * co, T2[b][c] += T2[a][c] * co;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
for(int a=N-1; a>=0; a--) {
|
|
|
|
for(int b=0; b<a; b++) {
|
|
|
|
val co = -T1[b][a] / T1[a][a];
|
|
|
|
for(int c=0; c<iN; c++) T1[b][c] += T1[a][c] * co, T2[b][c] += T2[a][c] * co;
|
|
|
|
}
|
|
|
|
val co = 1 / T1[a][a];
|
|
|
|
for(int c=0; c<iN; c++) T1[a][c] *= co, T2[a][c] *= co;
|
|
|
|
}
|
|
|
|
return T2;
|
|
|
|
}
|
|
|
|
|
|
|
|
template<size_t N> struct leastsquare_solution : public array<val, N> {
|
|
|
|
val operator() (const array<val, N> X) {
|
|
|
|
int iN = N;
|
|
|
|
val res = 0;
|
|
|
|
for(int j=0; j<iN; j++) res += X[j] * (*this)[j];
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template<size_t N> struct leastsquare_solver {
|
|
|
|
array<array<val, N>, N> toinvert;
|
|
|
|
array<val, N> Xty;
|
2023-08-23 17:44:37 +00:00
|
|
|
static constexpr int iN = N;
|
2022-11-11 13:56:59 +00:00
|
|
|
|
|
|
|
leastsquare_solver() {
|
|
|
|
for(int y=0; y<iN; y++) Xty[y] = 0;
|
|
|
|
for(int y=0; y<iN; y++)
|
|
|
|
for(int x=0; x<iN; x++)
|
|
|
|
toinvert[y][x] = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
void add_data(const array<val, N> X, val y) {
|
|
|
|
for(int j=0; j<iN; j++)
|
|
|
|
for(int k=0; k<iN; k++)
|
|
|
|
toinvert[j][k] += X[j] * X[k];
|
|
|
|
for(int j=0; j<iN; j++)
|
|
|
|
Xty[j] += X[j] * y;
|
|
|
|
}
|
|
|
|
|
|
|
|
void operator += (const leastsquare_solver<iN> other) {
|
|
|
|
for(int j=0; j<iN; j++)
|
|
|
|
for(int k=0; k<iN; k++)
|
|
|
|
toinvert[j][k] += other.toinvert[j][k];
|
|
|
|
for(int j=0; j<iN; j++)
|
|
|
|
Xty[j] += other.Xty[j];
|
|
|
|
}
|
|
|
|
|
|
|
|
leastsquare_solution<iN> solve() {
|
|
|
|
auto res = invert(toinvert);
|
|
|
|
leastsquare_solution<iN> s;
|
|
|
|
for(int i=0; i<iN; i++) {
|
|
|
|
s[i] = 0;
|
|
|
|
for(int j=0; j<iN; j++)
|
|
|
|
s[i] += res[i][j] * Xty[j];
|
|
|
|
}
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2024-07-28 06:31:03 +00:00
|
|
|
template<int dim1, int dim2> double small_kendall(const vector<pair<int, int>>& allp) {
|
2024-07-26 08:23:47 +00:00
|
|
|
int maxo = 0, maxe = 0;
|
|
|
|
for(const auto& a: allp) maxo = max(maxo, a.first), maxe = max(maxe, a.second);
|
|
|
|
maxo++; maxe++;
|
|
|
|
|
|
|
|
if(maxo >= dim1 || maxe >= dim2)
|
|
|
|
throw hr::hr_exception("small_kendall limit exceeded");
|
|
|
|
int cnt[dim1][dim2];
|
|
|
|
for(int a=0; a<maxo; a++)
|
|
|
|
for(int b=0; b<maxe; b++)
|
|
|
|
cnt[a][b] = 0;
|
|
|
|
for(const auto& a: allp) cnt[a.first][a.second]++;
|
|
|
|
|
|
|
|
// int i1 = 0, i2 = 0;
|
|
|
|
int K = hr::isize(allp);
|
|
|
|
// allp.emplace_back(maxo, maxe);
|
|
|
|
vector<int> counts(maxe, 0);
|
|
|
|
vector<int> totals(maxe);
|
|
|
|
double tau = 0;
|
|
|
|
for(int i=0; i<maxo; i++) {
|
|
|
|
totals[0] = 0;
|
|
|
|
for(int ii=1; ii<maxe; ii++)
|
|
|
|
totals[0] -= counts[ii];
|
|
|
|
for(int ii=1; ii<maxe; ii++)
|
|
|
|
totals[ii] = totals[ii-1] + counts[ii] + counts[ii-1];
|
|
|
|
|
|
|
|
for(int b=0; b<maxe; b++) {
|
|
|
|
tau += totals[b] * 1. * cnt[i][b];
|
|
|
|
counts[b] += cnt[i][b];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
double par = (K * (K-1.) / 2);
|
|
|
|
return tau / par;
|
|
|
|
}
|
|
|
|
|
2024-07-28 06:31:36 +00:00
|
|
|
template<class T> double kendall(vector<pair<int, T>> allp) {
|
|
|
|
int maxx = 0;
|
|
|
|
for(const auto& a: allp) maxx = max(maxx, a.first);
|
|
|
|
maxx++;
|
|
|
|
|
|
|
|
vector<int> counts(maxx, 0);
|
|
|
|
vector<int> totals(maxx, 0);
|
|
|
|
|
|
|
|
double tau = 0;
|
|
|
|
sort(allp.begin(), allp.end(), [] (auto a, auto b) { return a.second < b.second; });
|
|
|
|
|
|
|
|
auto last = allp[0].second;
|
|
|
|
vector<int> to_add;
|
|
|
|
for(const auto& a: allp) {
|
|
|
|
if(a.second != last) {
|
|
|
|
for(auto ad: to_add) {
|
|
|
|
totals[ad]++;
|
|
|
|
for(int x=ad+1; x<maxx; x++) totals[x] += 2;
|
|
|
|
}
|
|
|
|
to_add.clear();
|
|
|
|
last = a.second;
|
|
|
|
}
|
|
|
|
|
|
|
|
to_add.push_back(a.first);
|
|
|
|
tau += hr::isize(to_add);
|
|
|
|
tau += totals[a.first];
|
|
|
|
}
|
|
|
|
int K = hr::isize(allp);
|
|
|
|
tau -= K;
|
|
|
|
|
|
|
|
double par = K * (K-1.);
|
|
|
|
println(hr::hlog, tau, " / ", par);
|
|
|
|
return tau / par;
|
|
|
|
}
|
|
|
|
|
|
|
|
#if TEST
|
|
|
|
void test_kendall() {
|
|
|
|
if(1) {
|
|
|
|
vector<pair<int, int>> p = { {1,1}, {2,2}, {3,3}, {4,4} };
|
|
|
|
println(hlog, "p = ", stats::kendall(p));
|
|
|
|
|
|
|
|
vector<pair<int, int>> q = { {1,1}, {2,2}, {3,3}, {4,3} };
|
|
|
|
println(hlog, "q = ", stats::kendall(q));
|
|
|
|
|
|
|
|
vector<pair<int, int>> r = { {1,1}, {2,2}, {3,4}, {4,3} };
|
|
|
|
println(hlog, "r = ", stats::kendall(r));
|
|
|
|
|
|
|
|
vector<pair<int, int>> s = { {1,1}, {2,2}, {3,3}, {3,4} };
|
|
|
|
println(hlog, "s = ", stats::kendall(s));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif
|
2024-07-26 08:23:47 +00:00
|
|
|
|
2022-11-11 13:56:59 +00:00
|
|
|
}
|
2024-07-26 08:23:47 +00:00
|
|
|
#endif
|