2022-04-21 09:56:01 +00:00
|
|
|
// analyze the SOM test results
|
|
|
|
// Copyright (C) 2011-2022 Tehora and Zeno Rogue, see 'hyper.cpp' for details
|
|
|
|
|
|
|
|
#include "kohonen.h"
|
|
|
|
|
|
|
|
namespace rogueviz {
|
|
|
|
|
|
|
|
namespace kohonen_test {
|
|
|
|
extern string cg();
|
|
|
|
extern int data_scale, embed_scale;
|
|
|
|
extern int subdata_value;
|
|
|
|
}
|
|
|
|
|
|
|
|
double UNKNOWN_RESULT = -3;
|
|
|
|
|
|
|
|
using namespace kohonen_test;
|
|
|
|
|
|
|
|
namespace analyzer {
|
|
|
|
|
|
|
|
using measures::manidata;
|
|
|
|
using measures::MCOUNT;
|
|
|
|
|
|
|
|
struct maniset {
|
|
|
|
vector<string> names;
|
|
|
|
map<string, manidata> mdata;
|
|
|
|
};
|
|
|
|
|
|
|
|
maniset origs, embs;
|
|
|
|
|
|
|
|
void load_maniset(maniset& m, int scale) {
|
|
|
|
FILE *f = fopen((som_test_dir + "edgelists-" + its(scale) + ".txt").c_str(), "r");
|
|
|
|
while(true) {
|
|
|
|
char buf[200];
|
|
|
|
if(fscanf(f, "%s", buf) <= 0) break;
|
|
|
|
if(buf[0] != '#')
|
|
|
|
m.names.push_back(buf);
|
|
|
|
auto& md = m.mdata[buf];
|
|
|
|
int N, M;
|
2022-07-05 17:13:17 +00:00
|
|
|
hr::ignore(fscanf(f, "%d%d", &N, &M));
|
2022-04-21 09:56:01 +00:00
|
|
|
println(hlog, "reading ", buf, " of size ", N, " and ", M, " edges");
|
|
|
|
md.size = N;
|
|
|
|
auto& ed = md.edges;
|
|
|
|
for(int i=0; i<M; i++) {
|
|
|
|
int a, b;
|
2022-07-05 17:13:17 +00:00
|
|
|
hr::ignore(fscanf(f, "%d%d", &a, &b));
|
2022-04-21 09:56:01 +00:00
|
|
|
ed.emplace_back(a, b);
|
|
|
|
}
|
|
|
|
md.distances = measures::build_distance_matrix(N, md.edges);
|
|
|
|
}
|
|
|
|
fclose(f);
|
|
|
|
}
|
|
|
|
|
|
|
|
vector<pair<int, int> > vor_edges;
|
|
|
|
|
|
|
|
void analyze_test() {
|
|
|
|
|
|
|
|
println(hlog, "loading original manifolds");
|
|
|
|
load_maniset(origs, data_scale);
|
|
|
|
println(hlog, "loading embedding manifolds");
|
|
|
|
load_maniset(embs, embed_scale);
|
|
|
|
|
|
|
|
map<string, map<int, array<double, MCOUNT>>> results;
|
|
|
|
|
|
|
|
string tablefile = som_test_dir + "table" + cg() + ".csv";
|
|
|
|
|
|
|
|
if(1) {
|
|
|
|
|
|
|
|
fhstream res(tablefile, "rt");
|
|
|
|
if(!res.f) { println(hlog, "table ", tablefile, " missing"); }
|
|
|
|
else {
|
|
|
|
println(hlog, "reading the old table ", tablefile);
|
|
|
|
string s = scanline(res);
|
|
|
|
while(true) {
|
|
|
|
s = scanline(res);
|
|
|
|
if(s == "") break;
|
|
|
|
int i = 0;
|
|
|
|
while(s[i] != ';') i++;
|
|
|
|
i++;
|
|
|
|
while(s[i] != ';') i++;
|
|
|
|
i++;
|
|
|
|
int id;
|
|
|
|
sscanf(s.c_str()+i, "%d", &id);
|
|
|
|
auto& res = results[s.substr(0, i-1)][id];
|
|
|
|
for(auto& d: res) d = UNKNOWN_RESULT;
|
|
|
|
sscanf(s.c_str()+i, "%d;%lf;%lf;%lf;%lf;%lf;%lf;%lf;%lf;%lf;%lf;%lf;%lf", &id, &res[0], &res[1], &res[2], &res[3], &res[4], &res[5], &res[6], &res[7], &res[8], &res[9], &res[10], &res[11]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
for(string orig: origs.names) {
|
|
|
|
|
|
|
|
string dir = som_test_dir + "pairs" + cg();
|
|
|
|
|
|
|
|
for(string emb: embs.names) {
|
|
|
|
string fname = dir + "/" + orig + "-" + emb + ".txt";
|
|
|
|
if(!file_exists(fname)) continue;
|
|
|
|
FILE *f = fopen(fname.c_str(), "rt");
|
|
|
|
|
|
|
|
string vorname = dir + "/" + orig + "-" + emb + ".vor";
|
|
|
|
if(!file_exists(vorname)) continue;
|
|
|
|
FILE *vor = fopen(vorname.c_str(), "rt");
|
|
|
|
|
|
|
|
int No = origs.mdata[orig].size;
|
|
|
|
auto& edo = origs.mdata[orig].edges;
|
|
|
|
vector<pair<int, int>> edo_recreated;
|
|
|
|
|
|
|
|
int new_results = 0;
|
|
|
|
|
|
|
|
int current_row = -1;
|
|
|
|
|
|
|
|
auto& res1 = results[orig + ";" + emb];
|
|
|
|
for(int it=0; it<100; it++) {
|
|
|
|
|
|
|
|
bool know_result = res1.count(it);
|
|
|
|
auto& res = res1[it];
|
|
|
|
if(!know_result)
|
|
|
|
for(int kk=0; kk<MCOUNT; kk++)
|
|
|
|
res[kk] = UNKNOWN_RESULT;
|
|
|
|
|
|
|
|
vector<int> mapp(No, -2);
|
|
|
|
for(int k=0; k<MCOUNT; k++) {
|
|
|
|
bool recompute = false;
|
|
|
|
ld energy = 0;
|
|
|
|
if(res[k] == UNKNOWN_RESULT || recompute) {
|
|
|
|
|
|
|
|
while(current_row < it) {
|
|
|
|
current_row++;
|
2022-07-05 17:13:17 +00:00
|
|
|
for(int i=0; i<No; i++) hr::ignore(fscanf(f, "%d", &mapp[i]));
|
|
|
|
int V = 0; hr::ignore(fscanf(vor, "%d", &V));
|
2022-04-21 09:56:01 +00:00
|
|
|
vor_edges.resize(V);
|
2022-07-05 17:13:17 +00:00
|
|
|
for(int i=0; i<V; i++) hr::ignore(fscanf(vor, "%d%d", &vor_edges[i].first, &vor_edges[i].second));
|
2022-04-21 09:56:01 +00:00
|
|
|
if(mapp.back() == -2) goto next_pair;
|
|
|
|
if(current_row == it)
|
|
|
|
edo_recreated = measures::recreate_topology(mapp, edo);
|
|
|
|
}
|
|
|
|
|
|
|
|
new_results++;
|
|
|
|
|
|
|
|
energy = evaluate_measure(embs.mdata[emb], origs.mdata[orig], mapp, vor_edges, edo_recreated, k);
|
|
|
|
|
|
|
|
if(recompute && res[k] != UNKNOWN_RESULT) {
|
|
|
|
if(abs(res[k] - energy) > 1e-5) {
|
|
|
|
println(hlog, "ERROR in ", orig, "->", emb, " in ", cg(), " index ", it, " : was ", res[k], " is ", energy);
|
|
|
|
if(subdata_value) res[k] = energy;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
res[k] = energy;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
next_pair:
|
|
|
|
fclose(f);
|
|
|
|
|
|
|
|
if(new_results) println(hlog, "computed ", new_results, " new results in ", cg(), " for ", orig, " -> ", emb);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fhstream o(som_test_dir + "summary" + cg() + ".html", "wt");
|
|
|
|
println(o, "<html><body>\n");
|
|
|
|
|
|
|
|
if(1) {
|
|
|
|
println(o, "<table>\n");
|
|
|
|
|
|
|
|
int manis = 0;
|
|
|
|
|
|
|
|
for(string orig: origs.names) {
|
|
|
|
|
|
|
|
print(o, "<tr><th>", orig, "</th>");
|
|
|
|
for(string emb: embs.names) print(o, "<th>", emb, "</th>");
|
|
|
|
print(o, "</tr>\n");
|
|
|
|
|
|
|
|
for(int k: {0, 2, 4, 7, 8, 10, 11}) {
|
|
|
|
print(o, "<tr><td>", measures::catnames[k], "</td>");
|
|
|
|
|
|
|
|
for(string emb: embs.names) {
|
|
|
|
ld tenergy = 0;
|
|
|
|
int att = 0;
|
|
|
|
auto& res1 = results[orig + ";" + emb];
|
|
|
|
|
|
|
|
for(int it=0; it<100; it++) {
|
|
|
|
if(!res1.count(it)) continue;
|
|
|
|
auto& res = res1[it];
|
|
|
|
if(res[k] == UNKNOWN_RESULT) continue;
|
|
|
|
att++; tenergy += res[k];
|
|
|
|
}
|
|
|
|
|
|
|
|
if(orig == emb)
|
|
|
|
print(o, "<td bgcolor=\"#C0FFFF\">");
|
|
|
|
else if(manis & 1)
|
|
|
|
print(o, "<td bgcolor=\"#FFFFC0\">");
|
|
|
|
else
|
|
|
|
print(o, "<td>");
|
|
|
|
if(att == 100)
|
|
|
|
print(o, tenergy * 1. / att);
|
|
|
|
else if(att)
|
|
|
|
print(o, tenergy * 1. / att, "?");
|
|
|
|
else
|
|
|
|
print(o, "???");
|
|
|
|
print(o, "</td>");
|
|
|
|
}
|
|
|
|
print(o, "</tr>\n");
|
|
|
|
}
|
|
|
|
manis++;
|
|
|
|
}
|
|
|
|
print(o, "</table>\n");
|
|
|
|
}
|
|
|
|
print(o, "</body></html>\n");
|
|
|
|
|
|
|
|
fhstream res(som_test_dir + "table" + cg() + ".csv", "wt");
|
|
|
|
print(res, "orig;emb;index");
|
|
|
|
for(int i=0; i<MCOUNT; i++) print(res, ";", measures::catnames[i]); println(res);
|
|
|
|
|
|
|
|
for(auto& p: results) {
|
|
|
|
for(auto& p2: p.second) {
|
|
|
|
auto &p3 = p2.second;
|
|
|
|
print(res, p.first, ";", p2.first);
|
|
|
|
for(int i=0; i<MCOUNT; i++) print(res, ";", p3[i]); println(res);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
auto khook = arg::add3("-analyze-test", analyze_test);
|
|
|
|
|
|
|
|
}}
|
|
|
|
|
|
|
|
|
2022-07-05 17:13:17 +00:00
|
|
|
|