From 91faa3faf3eea384c31406d73a094b8a3b9735cf Mon Sep 17 00:00:00 2001 From: Zeno Rogue Date: Thu, 21 Apr 2022 11:56:01 +0200 Subject: [PATCH] rv::som:: added our tests --- rogueviz/som/analyzer.cpp | 229 ++++++ rogueviz/som/kohonen.h | 52 ++ rogueviz/som/measures.cpp | 239 +++++++ rogueviz/som/tests.cpp | 1383 +++++++++++++++++++++++++++++++++++++ rogueviz/som/voronoi.cpp | 258 +++++++ 5 files changed, 2161 insertions(+) create mode 100644 rogueviz/som/analyzer.cpp create mode 100644 rogueviz/som/measures.cpp create mode 100644 rogueviz/som/tests.cpp create mode 100644 rogueviz/som/voronoi.cpp diff --git a/rogueviz/som/analyzer.cpp b/rogueviz/som/analyzer.cpp new file mode 100644 index 00000000..41dcf7f1 --- /dev/null +++ b/rogueviz/som/analyzer.cpp @@ -0,0 +1,229 @@ +// analyze the SOM test results +// Copyright (C) 2011-2022 Tehora and Zeno Rogue, see 'hyper.cpp' for details + +#include "kohonen.h" + +namespace rogueviz { + +namespace kohonen_test { +extern string cg(); +extern int data_scale, embed_scale; +extern int subdata_value; +} + +double UNKNOWN_RESULT = -3; + +using namespace kohonen_test; + +namespace analyzer { + +using measures::manidata; +using measures::MCOUNT; + +struct maniset { + vector names; + map mdata; + }; + +maniset origs, embs; + +void load_maniset(maniset& m, int scale) { + FILE *f = fopen((som_test_dir + "edgelists-" + its(scale) + ".txt").c_str(), "r"); + while(true) { + char buf[200]; + if(fscanf(f, "%s", buf) <= 0) break; + if(buf[0] != '#') + m.names.push_back(buf); + auto& md = m.mdata[buf]; + int N, M; + fscanf(f, "%d%d", &N, &M); + println(hlog, "reading ", buf, " of size ", N, " and ", M, " edges"); + md.size = N; + auto& ed = md.edges; + for(int i=0; i > vor_edges; + +void analyze_test() { + + println(hlog, "loading original manifolds"); + load_maniset(origs, data_scale); + println(hlog, "loading embedding manifolds"); + load_maniset(embs, embed_scale); + + map>> results; + + string tablefile = som_test_dir + "table" + cg() + ".csv"; + + if(1) { + + fhstream res(tablefile, "rt"); + if(!res.f) { println(hlog, "table ", tablefile, " missing"); } + else { + println(hlog, "reading the old table ", tablefile); + string s = scanline(res); + while(true) { + s = scanline(res); + if(s == "") break; + int i = 0; + while(s[i] != ';') i++; + i++; + while(s[i] != ';') i++; + i++; + int id; + sscanf(s.c_str()+i, "%d", &id); + auto& res = results[s.substr(0, i-1)][id]; + for(auto& d: res) d = UNKNOWN_RESULT; + sscanf(s.c_str()+i, "%d;%lf;%lf;%lf;%lf;%lf;%lf;%lf;%lf;%lf;%lf;%lf;%lf", &id, &res[0], &res[1], &res[2], &res[3], &res[4], &res[5], &res[6], &res[7], &res[8], &res[9], &res[10], &res[11]); + } + } + } + + for(string orig: origs.names) { + + string dir = som_test_dir + "pairs" + cg(); + + for(string emb: embs.names) { + string fname = dir + "/" + orig + "-" + emb + ".txt"; + if(!file_exists(fname)) continue; + FILE *f = fopen(fname.c_str(), "rt"); + + string vorname = dir + "/" + orig + "-" + emb + ".vor"; + if(!file_exists(vorname)) continue; + FILE *vor = fopen(vorname.c_str(), "rt"); + + int No = origs.mdata[orig].size; + auto& edo = origs.mdata[orig].edges; + vector> edo_recreated; + + int new_results = 0; + + int current_row = -1; + + auto& res1 = results[orig + ";" + emb]; + for(int it=0; it<100; it++) { + + bool know_result = res1.count(it); + auto& res = res1[it]; + if(!know_result) + for(int kk=0; kk mapp(No, -2); + for(int k=0; k 1e-5) { + println(hlog, "ERROR in ", orig, "->", emb, " in ", cg(), " index ", it, " : was ", res[k], " is ", energy); + if(subdata_value) res[k] = energy; + } + } + else { + res[k] = energy; + } + } + } + } + next_pair: + fclose(f); + + if(new_results) println(hlog, "computed ", new_results, " new results in ", cg(), " for ", orig, " -> ", emb); + } + } + + fhstream o(som_test_dir + "summary" + cg() + ".html", "wt"); + println(o, "\n"); + + if(1) { + println(o, "\n"); + + int manis = 0; + + for(string orig: origs.names) { + + print(o, ""); + for(string emb: embs.names) print(o, ""); + print(o, "\n"); + + for(int k: {0, 2, 4, 7, 8, 10, 11}) { + print(o, ""); + + for(string emb: embs.names) { + ld tenergy = 0; + int att = 0; + auto& res1 = results[orig + ";" + emb]; + + for(int it=0; it<100; it++) { + if(!res1.count(it)) continue; + auto& res = res1[it]; + if(res[k] == UNKNOWN_RESULT) continue; + att++; tenergy += res[k]; + } + + if(orig == emb) + print(o, ""); + } + print(o, "\n"); + } + manis++; + } + print(o, "
", orig, "", emb, "
", measures::catnames[k], ""); + else if(manis & 1) + print(o, ""); + else + print(o, ""); + if(att == 100) + print(o, tenergy * 1. / att); + else if(att) + print(o, tenergy * 1. / att, "?"); + else + print(o, "???"); + print(o, "
\n"); + } + print(o, "\n"); + + fhstream res(som_test_dir + "table" + cg() + ".csv", "wt"); + print(res, "orig;emb;index"); + for(int i=0; i sample_vdata_id; + extern int tmax; extern int samples; extern int t, tmax, lpct, cells; @@ -33,6 +35,7 @@ extern int dispersion_count; extern double learning_factor, distmul; extern double ttpower; extern int min_group, max_group, columns; +extern bool dispersion_long; struct neuron { kohvec net; @@ -58,6 +61,8 @@ extern vector samples_to_show; extern vector net; extern vector colnames; +extern vector sample_sequence; + void initialize_neurons(); void initialize_neurons_initial(); void initialize_dispersion(); @@ -80,6 +85,8 @@ double vnorm(kohvec& a, kohvec& b); namespace embeddings { +ld gaussian_random(); + using kohonen::columns; using kohonen::kohvec; using kohonen::alloc; @@ -90,14 +97,59 @@ void mark_signposts(bool full, const vector& ac); void mark_signposts_subg(int a, int b, const vector& ac); void generate_rug(int i, bool close); void init_landscape(int dimensions); +void init_landscape_det(const vector& ac); extern map rug_coordinates; extern void get_coordinates(kohvec& v, cell *c, cell *c0); extern vector signposts; +extern map delta_at; + } +namespace voronoi { + +struct manifold { + + int N; /* the number of tiles */ + int T; /* the number of triangles */ + + /* triangles between three adjacent tiles. triangles[i] are the tiles of triangle i */ + vector> triangles; + + /* triangles_of_tile[t] is ids of triangles which contain t */ + vector> triangles_of_tile; + + map, vector> triangles_of_edge; + + void generate_data(); + }; + +inline string debug_str; +manifold build_manifold(const vector& cells); +vector > compute_voronoi_edges(manifold& m); +} + +namespace measures { + +struct manidata { + int size; + vector > distances; + vector > edges; + }; + +static const int MCOUNT = 12; + +extern vector catnames; + +vector> recreate_topology(const vector& mapp, const vector >& edges); +vector> build_distance_matrix(int N, const vector>& vedges); +ld evaluate_measure(manidata& emb, manidata& orig, vector& mapp, vector >& vor_edges, vector>& edo_recreated, int k); + +} + +static const string som_test_dir = "results/"; } diff --git a/rogueviz/som/measures.cpp b/rogueviz/som/measures.cpp new file mode 100644 index 00000000..76c651f8 --- /dev/null +++ b/rogueviz/som/measures.cpp @@ -0,0 +1,239 @@ +// test measures +// Copyright (C) 2011-2022 Tehora and Zeno Rogue, see 'hyper.cpp' for details + +#include "kohonen.h" + +namespace rogueviz { +namespace measures { + +double kendall(const vector>& allp) { + int maxo = 0, maxe = 0; + for(const auto& a: allp) maxo = max(maxo, a.first), maxe = max(maxe, a.second); + maxo++; maxe++; + + if(maxo >= 256 || maxe >= 256) { + println(hlog, "more than 256!\n"); + exit(1); + } + int cnt[256][256]; + for(int a=0; a counts(maxe, 0); + vector totals(maxe); + double tau = 0; + for(int i=0; i> recreate_topology(const vector& mapp, const vector >& edges) { + + auto cmapp = mapp; + for(int i=0; i= 0) cmapp[i] = i; + + while(true) { + vector > changes; + for(auto e: edges) { + if(cmapp[e.first] == -1 && cmapp[e.second] >= 0) changes.emplace_back(e.first, cmapp[e.second]); + if(cmapp[e.second] == -1 && cmapp[e.first] >= 0) changes.emplace_back(e.second, cmapp[e.first]); + } + if(changes.empty()) break; + for(auto ch: changes) cmapp[ch.first] = ch.second; + } + + set> subedges; + for(auto e: edges) { + auto a = cmapp[e.first], b = cmapp[e.second]; + if(a==b) continue; + if(a> result; + for(auto sube: subedges) + result.emplace_back(sube.first, sube.second); + + return result; + } + +vector> build_distance_matrix(int N, const vector>& vedges) { + vector> res; + res.resize(N); + for(auto& r: res) r.resize(N, 1000); + vector> edges_of(N); + for(auto [a, b]: vedges) edges_of[a].push_back(b), edges_of[b].push_back(a); + + for(int i=0; i bfs; + auto visit = [&] (int id, int dist) { + if(d[id] <= dist) return; + d[id] = dist; + bfs.push(id); + }; + visit(i, 0); + while(!bfs.empty()) { + auto j = bfs.front(); + for(auto e: edges_of[j]) visit(e, d[j]+1); + bfs.pop(); + } + } + + return res; + } + +vector catnames = {"energy", "rips", "kendall", "empty", "emptyx", "subedges", "emptyxx", "villman1", "villman2", "vedges", "maxvill1", "maxvill2"}; + +ld evaluate_measure(manidata& emb, manidata& orig, vector& mapp, vector >& vor_edges, vector>& edo_recreated, int k) { + + int No = orig.size; + int Ne = emb.size; + auto& edo = orig.edges; + auto& ede = emb.edges; + auto& dise = emb.distances; + auto& diso = orig.distances; + ld energy = 0; + + if(k == 2) { + vector > allp; + allp.reserve(No * No); + for(int i=0; i empty(Ne, true); + for(int i=0; i> adj(Ne); + vector> on(Ne); + for(int i=0; i 1 && diso[oi1][oi2] <= 1) + empty = false; + } + if(empty && k == 6) { + for(auto i1: adj[i]) + for(auto i2: adj[i]) + for(auto i11: adj[i1]) { + for(auto i21: adj[i2]) + for(auto oi1: on[i11]) + for(auto oi2: on[i21]) + if(dise[i11][i21] == dise[i11][i] + dise[i21][i] && diso[oi1][oi2] <= 1) + empty = false; + + for(auto oi1: on[i11]) + for(auto oi2: on[i2]) + if(dise[i11][i2] == dise[i11][i] + dise[i2][i] && diso[oi1][oi2] <= 1) + empty = false; + } + } + if(empty) energy++; + } + for(auto p: edo_recreated) + diso[p.first][p.second] += 100, + diso[p.second][p.first] += 100; + } + else if(k == 5) energy = isize(edo_recreated); + else if(k == 1) { + for(auto [a,b]: edo_recreated) { + if(mapp[a] == -1 || mapp[b] == -1) continue; + int d = dise[mapp[a]][mapp[b]]; + if(k == 9) + energy += d * d; + else if(k == 1) + energy += (d < 2 ? 0 : d == 2 ? 1 : 100); + } + } + else if(k == 0) { + for(auto [a,b]: edo) { + if(mapp[a] == -1 || mapp[b] == -1) continue; + int d = dise[mapp[a]][mapp[b]]; + energy += d * d; + } + } + else if(k == 7) { + for(auto [a,b]: vor_edges) { + int d = dise[a][b] - 1; + energy += d*d; + } + } + else if(k == 8) { + auto disv = build_distance_matrix(Ne, vor_edges); + int bugs = 0; + for(auto [a,b]: ede) { + int d = disv[a][b] - 1; + if(d >= 900) bugs++; + energy += d*d; + } + if(bugs) println(hlog, "bugs=", bugs); + } + else if(k == 10) { + for(auto [a,b]: vor_edges) + energy = max(energy, dise[a][b] - 1); + } + else if(k == 11) { + auto disv = build_distance_matrix(Ne, vor_edges); + for(auto [a,b]: ede) + energy = max(energy, disv[a][b] - 1); + } + else if(k == 9) + energy = isize(vor_edges); + return energy; + } + +/* +void test_kendall() { + for(string orig: origs.names) { + int No = origs.mdata[orig].size; + vector > allp; + auto& diso = origs.mdata[orig].distances; + + for(int i=0; i + +namespace rogueviz { + +transmatrix& memo_relative_matrix(cell *c1, cell *c2); + +namespace kohonen_test { + +using namespace kohonen; + +void equal_weights() { + alloc(weights); + for(auto& w: weights) w = 1; + } + +void show_all() { + samples_to_show.clear(); + for(int i=0; i > voronoi_edges; + +bool nmap; + +vector where; +int max_distance = 999; + +bool using_subdata; +vector orig_data; +vector sub_indices; +vector inv_sub_indices; + +void ideal() { + for(int i=0; i is_special; +vector ctrdist; +int ctrdist_max; + +voronoi::manifold data_manifold; + +using measures::manidata; +manidata test_emb, test_orig; + +void create_manidata(manidata& mdata) { + auto ac = gen_neuron_cells(); + auto& edges = mdata.edges; + edges.clear(); + mdata.size = isize(ac); + mdata.distances.clear(); + for(int i=0; igamestart(); + where.clear(); + is_special.clear(); + ctrdist.clear(); + ctrdist_max = 0; + edge_data = false; + + drawthemap(); + data.clear(); samples_to_show.clear(); + auto ac = gen_neuron_cells(); + + for(auto c: ac) { + if(celldistance(c, c0) > max_distance) continue; + where.push_back(c); + sample s; + embeddings::get_coordinates(s.val, c, c0); + data.push_back(move(s)); + } + samples = isize(data); + test_orig.size = samples; + colnames.resize(columns); + for(int i=0; itype != (S3 == 3 ? 6 : 4); + is_special.push_back(sign); + ctrdist.push_back(celldist(where[i])); + ctrdist_max = max(ctrdist_max, ctrdist.back()); + } + + create_manidata(test_orig); + } + +void create_subdata(int qty) { + if(!using_subdata) + orig_data = data; + using_subdata = true; + int N = isize(orig_data); + sub_indices.resize(N); + for(int i=0; idead = true; + vdata.clear(); + sample_vdata_id.clear(); + state &= ~KS_SAMPLES; + + for(int idx: sub_indices) data.push_back(orig_data[idx]); + samples = isize(data); + + inv_sub_indices.clear(); + inv_sub_indices.resize(N, -1); + for(int i=0; i ids; + for(auto e: edges) { + if(e.first == i) ids.push_back(e.second); + if(e.second == i) ids.push_back(e.first); + } + vdata[i].name = lalign(0, "#", i, " ", ids); + } + + auto any = add_edgetype("adjacent"); + + if(!using_subdata) { + for(auto e: edges) + addedge(e.first, e.second, 1, false, any); + } + else { + vector nearest(isize(orig_data), -1); + set> subedges; + for(int i=0; i > changes; + for(auto e: edges) { + if(nearest[e.first] == -1 && nearest[e.second] >= 0) changes.emplace_back(e.first, nearest[e.second]); + if(nearest[e.second] == -1 && nearest[e.first] >= 0) changes.emplace_back(e.second, nearest[e.first]); + } + if(changes.empty()) break; + // hrandom_shuffle(changes); + for(auto ch: changes) nearest[ch.first] = ch.second; + } + for(auto e: edges) + if(nearest[e.first] != nearest[e.second]) + subedges.emplace(nearest[e.first], nearest[e.second]); + // for(auto se: subedges) println(hlog, "subedges = ", se); + for(auto sube: subedges) + addedge(sube.first, sube.second, 1, false, any); + } + + println(hlog, "edgedata created, ", using_subdata); + } + +void sphere_test() { + + create_data(); + + initialize_dispersion(); + initialize_neurons_initial(); + + analyze(); + + create_edgedata(); + + ideal(); + analyze(); + } + +void sphere_test_no_disp() { + + create_data(); + + initialize_neurons_initial(); + analyze(); + create_edgedata(); + ideal(); + analyze(); + } + +void check_energy() { + vector dlist; + + vector win_cells(samples); + for(int i=0; i distlist; + for(int i=0; i mapp(test_orig.size, 0); + map id; + for(int i=0; i> edo_recreated = measures::recreate_topology(mapp, test_orig.edges); + for(int k=0; k get_parameters() { + return vector { ttpower, learning_factor, gaussian ? distmul : dispersion_end_at-1 }; + } + +void set_parameters(const vector& v) { + ttpower = v[0]; + learning_factor = v[1]; + if(gaussian) distmul = v[2]; + else dispersion_end_at = v[2] + 1; + } + +void som_table() { + sphere_test(); + + map, pair > tries; + + map, string> sucorder; + + auto bttpower = ttpower; + auto blearning = learning_factor; + auto bdist = distmul; + auto bdispe = dispersion_end_at - 1; + + ld last_distmul = -1; + + auto set_parameters = [&] (array& u) { + distmul = bdist * exp(u[0] / 5.); + dispersion_end_at = 1 + bdispe * exp(u[0] / 5.); + ttpower = bttpower * exp(u[2] / 5.); + learning_factor = blearning * exp(u[1] / 5.); + + if(last_distmul != distmul) { + last_distmul = distmul, state &=~ KS_DISPERSION; + } + }; + + array best = {0, 0, 0}; + + int maxtry = 20; + + while(true) { + + array best_at; + ld bestval = -1; + + vector vals; + + for(int k=0; k<27; k++) { + array cnt; + int k1 = k; + for(int i=0; i<3; i++) cnt[2-i] = best[2-i] + (k1%3-1), k1 /= 3; + set_parameters(cnt); + do { + tries[cnt].second++; + dynamicval dd(debugflags, 0); + bool chk = check(false); + if(chk) + tries[cnt].first++; + sucorder[cnt] += (chk ? 'y' : 'n'); + } + while(tries[cnt].second < maxtry); + ld val_here = tries[cnt].first * 1. / tries[cnt].second; + if(val_here > bestval) bestval = val_here, best_at = cnt; + vals.push_back(val_here); + } + + sort(vals.begin(), vals.end()); + + best = best_at; + set_parameters(best); + println(hlog, "score ", bestval, " at ", best_at, " : ", tie(distmul, dispersion_end_at, learning_factor, ttpower), " x", tries[best].second, " s=", vals[vals.size()-2]); + if(tries[best].second > maxtry) + maxtry = tries[best].second; + + if(tries[best].second >= 1000) { + println(hlog, "suc ", best, " :\n", sucorder[best]); + + for(int vv=10; vv>=0; vv--) { + dynamicval ks(qpct, 0); + t = vv ? tmax * vv / 10 : 1; + step(); + println(hlog, "t=", t); + println(hlog, "dispersion_count = ", dispersion_count); + } + + return; + } + } + } + +vector shapelist; +map shapes; +map embeddings; + +int data_scale = 1; +int embed_scale = 1; +int current_scale = 1; + +void set_gp(int a, int b) { + a *= current_scale; b *= current_scale; + if(a == 1 && b == 1) + set_variation(eVariation::bitruncated); + else if(a == 1 && b == 0) + set_variation(eVariation::pure); + else { + set_variation(eVariation::goldberg); + gp::param = gp::loc(a, b); + } + } + +void set_restrict() { + kohonen::kqty = 5000; + kohonen::kohrestrict = 520 * current_scale * current_scale; + } + +void set_torus2(int a, int b, int c, int d, int e) { + using namespace euc; + auto& T0 = eu_input.user_axes; + T0[0][0] = a; + T0[0][1] = b; + T0[1][0] = c; + T0[1][1] = d; + eu_input.twisted = e; + build_torus3(); + } + +int dim = 10; + +string emb; + +void add(string name, reaction_t embed, reaction_t set) { + shapelist.push_back(name); + shapes[name] = set; + embeddings[name] = embed; + } + +void set_euclid3(int x, int y, int z, int twist) { + using namespace euc; + auto& T0 = eu_input.user_axes; + for(int i=0; i<3; i++) + for(int j=0; j<3; j++) T0[i][j] = 0; + + T0[0][0] = x; + T0[1][1] = y; + T0[2][2] = z; + + eu_input.twisted = twist; + build_torus3(); + } + +void klein_signposts() { + embeddings::etype = embeddings::eSignpost; + println(hlog, "marking klein signposts"); + embeddings::signposts.clear(); + + for(cell *c: currentmap->allcells()) setdist(c, 7, nullptr); + + for(int x=0; x<4; x++) + for(int y=0; y<13; y++) { + cellwalker cw = cellwalker(currentmap->gamestart(), 0); + for(int i=0; itype; i++) { + shiftpoint h = tC0(g * currentmap->adj(w, i)); + hyperpoint onscreen; + applymodel(h, onscreen); + maxs = max(maxs, onscreen[0] / cd->xsize); + maxs = max(maxs, onscreen[1] / cd->ysize); + } + } + pconf.alpha = 1; + pconf.scale = pconf.scale / 2 / maxs / cd->radius; + pconf.scale /= 1.2; + if(bounded) pconf.scale = WDIM == 3 ? 0.2 : 0.07; + } + + if(GDIM == 3) pmodel = mdPerspective; + if(nil || sol) pmodel = mdGeodesic; + + vid.use_smart_range = 2; + vid.smart_range_detail = 7; + vid.cells_generated_limit = 999999; + vid.cells_drawn_limit = 200000; + } + +void shot_settings_png() { + vid.use_smart_range = 2; + vid.smart_range_detail = 0.5; + + shot::shotx = 500; + shot::shoty = 500; + } + +bool more = true; + +void create_index() { + + system(("mkdir " + som_test_dir).c_str()); + + fhstream f(som_test_dir + "index-" + its(current_scale) + ".html", "wt"); + + fhstream csv(som_test_dir + "manifold-data-" + its(current_scale) + ".csv", "wt"); + + fhstream tex(som_test_dir + "manifold-data-" + its(current_scale) + ".tex", "wt"); + println(f, ""); + + // fhstream distf(som_test_dir + "distlists-" + its(current_scale) + ".txt", "wt"); + + bool add_header = true; + + for(auto s: shapelist) { + + sphere_data = false; + println(hlog, "building ", s); + kohonen::kqty = kohonen::krad = 0; + kohonen::kohrestrict = 999999999; + stop_game(); + shapes[s](); + // if(!euclid) continue; + start_game(); + + initialize_rv(); + embeddings[s](); + + println(hlog, "create_data"); + create_data(); + + println(hlog, "sphere_test"); + sphere_test_no_disp(); + + println(hlog, "building disttable"); + + vector disttable(100, 0); + int pairs = 0; + + test_orig.distances = measures::build_distance_matrix(test_orig.size, test_orig.edges); + int N = test_orig.size; + for(int i=0; i
"); + + println(f, "shape ", s, " : ", samples, " items, ", isize(test_orig.edges), " edges, dim ", columns, " (", emb, "), ", full_geometry_name()); + + println(f, "
"); + fflush(f.f); + + again: + if(add_header) print(csv, "name"); else print(csv, s); + if(add_header) print(tex, "name"); else print(tex, s); + #define Out(title,value) if(add_header) { print(csv, ";", title); print(tex, "&", title); } else { print(csv, ";", value); print(tex, "&", value); } + + double avgdist = 0, avgdist2 = 0, sqsum = 0; + int maxdist = 0; + for(int i=0; i<100; i++) { + if(disttable[i] > 0) maxdist = i; + avgdist += i * disttable[i]; + avgdist2 += i * i * disttable[i]; + sqsum += disttable[i] * (disttable[i]-1.); + } + disttable.resize(maxdist + 1); + if(more) println(hlog, disttable, " pairs = ", pairs); + + avgdist /= pairs; + avgdist2 /= pairs; + double kmax = 1 - sqsum / (pairs * (pairs-1.)); + + Out("samples", samples); + Out("edges", isize(test_orig.edges)); + Out("columns", columns); + Out("embtype", emb); + Out("gpx", gp::univ_param().first); + Out("gpy", gp::univ_param().second); + Out("orientable", nonorientable ? 0 : 1); + Out("symmetric", (flags & m_symmetric) ? 1 : 0); + Out("closed", bounded ? 1 : 0); + Out("quotient", quotient ? 1 : 0); + Out("dim", WDIM); + Out("valence", S3); + Out("tile", S7); + + println(hlog, "gen neuron cells"); + auto ac = gen_neuron_cells(); + int sum = 0; + for(cell *c: ac) sum += c->type; + ld curvature = (S3 == 3 ? 6 : S3 >= OINF ? 2 : 4) - sum * 1. / isize(ac); + if(GDIM == 3) curvature = hyperbolic ? -1 : sphere ? 1 : 0; + Out("curvature", curvature); + + println(hlog, "compute geometry data"); + auto gd = compute_geometry_data(); + Out("euler", gd.euler); + Out("area", gd.area); + + Out("geometry", + hyperbolic ? "hyperbolic" : + euclid ? "euclidean" : + sphere ? "spherical" : + "other"); + + if(more) { + Out("maxdist", maxdist); + Out("avgdist", avgdist); + Out("avgdist2", avgdist2); + Out("kmax", kmax); + } + + println(csv); println(tex, "\\\\"); + fflush(csv.f); fflush(f.f); fflush(tex.f); + + if(add_header) { add_header = false; goto again; } + + println(hlog, "geom = ", s, " delta = ", isize(embeddings::delta_at)); + } + + println(f, ""); + } + +unsigned hash(string s) { + unsigned res = 0; + for(char c: s) res = res * 37 + c; + return res; + } + +int subdata_value; + +bool only_landscape; + +string cg() { + string s = ""; + if(kohonen::gaussian == 1) s += "-cg"; + if(kohonen::gaussian == 2) s += "-gg"; + if(kohonen::dispersion_long) s += "-dl"; + if(ttpower != 1) s += "-tt" + lalign(0, ttpower); + if(subdata_value) s += "-s" + its(subdata_value); + if(landscape_dim) s += "-l" + its(landscape_dim); + if(data_scale) s += "-d" + its(data_scale); + if(embed_scale) s += "-e" + its(embed_scale); + return s; + } + +vector > saved_data; + +void all_pairs(bool one) { + + string dir = som_test_dir + "pairs" + cg(); + + system(("mkdir -p " + dir + "/img").c_str()); + + int sid = 0; + for(auto s1: shapelist) { + for(auto s2: shapelist) { + sid++; + + if(kohonen::gaussian == 2 && s2.substr(0, 4) != "disk" && s2.substr(0, 6) != "sphere") continue; + if(only_landscape && s1.substr(0, 4) != "disk") continue; + + string fname_vor = dir + "/" + s1 + "-" + s2 + ".vor"; + string fname = dir + "/" + s1 + "-" + s2 + ".txt"; + if(file_exists(fname)) continue; + + fhstream f(fname, "wt"); + fhstream fvor(fname_vor, "wt"); + + println(hlog, "mapping ", s1, " to " ,s2); + + shrand(hash(s1) ^ hash(s2)); + + sphere_data = false; using_subdata = false; + kohonen::kqty = kohonen::krad = 0; + kohonen::kohrestrict = 999999999; + current_scale = data_scale; + stop_game(); + shapes[s1](); + start_game(); + + initialize_rv(); + + if(landscape_dim) { + saved_data.clear(); + for(int i=0; i<100; i++) { + sphere_data = false; + data.clear(); + embeddings[s1](); + create_data(); + saved_data.push_back(data); + if(i < 5) + for(int j=0; j<20; j++) + println(hlog, "saved data ", i, ":", j, ": ", saved_data[i][j].val); + } + } + else { + embeddings[s1](); + create_data(); + } + + stop_game(); + + kohonen::kqty = kohonen::krad = 0; + kohonen::kohrestrict = 999999999; + + current_scale = embed_scale; + shapes[s2](); + + initialize_dispersion(); + initialize_neurons_initial(); + + analyze(); + + create_edgedata(); + + int orig_samples = samples; + + for(int i=0; i<100; i++) { + println(hlog, "iteration ", lalign(3, i), " of ", fname); + + if(landscape_dim) { data = orig_data = saved_data[i]; } + + if(subdata_value) create_subdata(subdata_value); + + set_neuron_initial(); + + if(0) { + println(hlog, "initial neuron values:"); indenter ind(2); + for(auto& n: net) println(hlog, n.net); + } + + t = tmax; + dynamicval ks(qpct, 0); + while(!finished()) kohonen::step(); + + for(int i=0; i rnd; + for(int i=0; i<10; i++) rnd.push_back(hrand(1000)); + println(hlog, "finished ", s1, "-", s2, " rnd = ", rnd); + println(rndcheck, "finished ", s1, "-", s2, " rnd = ", rnd); + } + + if(one) exit(0); + } + } + + system("touch done"); + } + +bool verify_distlists = false; + +void create_edgelists() { + fhstream f("results/edgelists-" + its(current_scale) + ".txt", "wt"); + for(auto s: shapelist) { + + sphere_data = false; + kohonen::kqty = kohonen::krad = 0; + kohonen::kohrestrict = 999999999; + stop_game(); + shapes[s](); + // if(!euclid) continue; + start_game(); + + initialize_rv(); + embeddings[s](); + + println(hlog, "create_data"); + create_data(); + + println(hlog, "sphere_test"); + sphere_test_no_disp(); + + auto ac = gen_neuron_cells(); + + int N = isize(ac); + int M = isize(test_orig.edges); + print(f, s, "\n", N, " ", M); + for(auto e: test_orig.edges) print(f, " ", e.first, " ", e.second); + + if(verify_distlists) { + test_orig.distances = measures::build_distance_matrix(test_orig.size, test_orig.edges); + for(int i=0; iwall = waPlatform; + } + + else if(argis("-som-rug")) { + PHASE(3); + start_game(); + initialize_rv(); + shift(); embeddings::generate_rug(argi(), true); + } + + else if(argis("-som-rug-show")) { + PHASE(3); + start_game(); + initialize_rv(); + shift(); embeddings::generate_rug(argi(), false); + } + + else if(argis("-som-proj")) { + PHASE(3); embeddings::etype = embeddings::eProjection; + } + + else if(argis("-som-emb")) { + PHASE(3); embeddings::etype = embeddings::eNatural; + } + + /* other stuff */ + + else if(argis("-som-test")) { + PHASE(3); + start_game(); + sphere_test(); + println(hlog, "data = ", isize(data), " neurons = ", isize(net)); + } + + else if(argis("-som-cdata")) { + PHASE(3); + start_game(); + create_data(); + } + + else if(argis("-subdata")) { + shift(); + create_subdata(argi()); + } + + else if(argis("-subdata-val")) { + shift(); + subdata_value = argi(); + } + + else if(argis("-landscape-dim")) { + shift(); landscape_dim = argi(); + } + + else if(argis("-som-optimize")) { + PHASE(3); + start_game(); + // som_optimize(); + } + + else if(argis("-som-table")) { + PHASE(3); + start_game(); + som_table(); + } + + else if(argis("-som-scale-data")) { + PHASE(3); + shift(); current_scale = data_scale = argi(); + } + + else if(argis("-som-scale-embed")) { + PHASE(3); + shift(); current_scale = embed_scale = argi(); + } + + else if(argis("-by-name")) { + PHASE(3); + init_shapes(); + land_structure = lsSingle; + shift(); string s = args(); + + if(shapes.count(s)) { + kohonen::kqty = kohonen::krad = 0; + kohonen::kohrestrict = 999999999; + stop_game(); + shapes[s](); + start_game(); + initialize_rv(); + embeddings[s](); + println(hlog, "embedding used: ", emb, " for: ", s); + } + else { + println(hlog, "shape unknown: ", s); + } + } + + else if(argis("-only-landscape")) { + only_landscape = true; + } + + else if(argis("-som-experiment")) { + PHASE(3); + init_shapes(); + land_structure = lsSingle; + all_pairs(false); + } + + else if(argis("-som-experiment1")) { + PHASE(3); + init_shapes(); + land_structure = lsSingle; + all_pairs(true); + } + + else if(argis("-som-experiment-index")) { + PHASE(3); + init_shapes(); + land_structure = lsSingle; + create_index(); + } + + else if(argis("-edgecheck")) { + PHASE(3); + unsigned x = 1; + for(auto e: test_orig.edges) { + x*= 7; + x += e.first; + x += 123 * e.second; + } + println(hlog, "x = ", x, " edges = ", isize(test_orig.edges)); + } + + else if(argis("-som-experiment-tables")) { + PHASE(3); + init_shapes(); + land_structure = lsSingle; + create_edgelists(); + } + + else if(argis("-ex")) exit(0); + + else return 1; + return 0; + } + +auto hooks3 = addHook(hooks_args, 100, readArgs); +#endif + +auto khook = addHook(hooks_handleKey, 150, kst_key) + + addHook(hooks_configfile, 100, [] { + param_i(ks_empty, "ks_empty", 0); + param_i(ks_distant, "ks_distant", 0); + param_i(ks_nonadj, "ks_nonadj", 0); + param_i(max_distance, "ks_max"); + }) + + addHook(hooks_markers, 100, [] () { + int N = isize(net); + bool multidraw = quotient; + bool use_brm = bounded && isize(currentmap->allcells()) <= brm_limit; + vid.linewidth *= 3; + for(auto e: voronoi_edges) + if(e.first < N && e.second < N) + for(const shiftmatrix& V1 : hr::span_at(current_display->all_drawn_copies, net[e.first].where)) { + if(use_brm) { + auto V2 = brm_get(net[e.first].where, C0, net[e.second].where, C0); + queueline(V1*C0, V1*V2*C0, 0xC010C0FF, vid.linequality + 3); + } + else if(multidraw || elliptic) { + auto V2 = memo_relative_matrix(net[e.second].where, net[e.first].where); + queueline(V1*C0, V1*V2*C0, 0xC010C0FF, vid.linequality + 3); + } + else + for(const shiftmatrix& V2 : hr::span_at(current_display->all_drawn_copies, net[e.second].where)) + queueline(V1*C0, V2*C0, 0xC010C0FF, vid.linequality + 3); + } + vid.linewidth /= 3; + }) + + addHook(anims::hooks_record_anim, 100, [] (int i, int noframes) { + bool steps = false; + ld nf = noframes; + while(t * nf > (nf - i) * tmax) + step(), steps = true; + if(steps) analyze(); + }); + +}} + diff --git a/rogueviz/som/voronoi.cpp b/rogueviz/som/voronoi.cpp new file mode 100644 index 00000000..a935549f --- /dev/null +++ b/rogueviz/som/voronoi.cpp @@ -0,0 +1,258 @@ +// Voronoi used to measure the quality of the embedding (Villman's measure) +// Copyright (C) 2011-2022 Tehora and Zeno Rogue, see 'hyper.cpp' for details + +#include "kohonen.h" + +namespace rogueviz { + +namespace voronoi { + +void manifold::generate_data() { + T = isize(triangles); + triangles_of_tile.resize(N); + + for(int i=0; i& cells) { + map neuron_id; + int N = isize(cells); + for(int i=0; i > faces_seen; + + for(auto c: cells) { + for(int i=0; itype; i++) { + cellwalker cw1(c, i); + cellwalker cw2 = cw1; + vector tlist; + do { + cw2 += wstep; + cw2++; + auto p = at_or_null(neuron_id, cw2.at); + if(!p) goto next; + tlist.push_back(*p); + } + while(cw2 != cw1); + if(1) { + int minv = 0; + for(int i=0; i tlist_sorted; + for(int i=minv; i tlist_sorted.back()) + reverse(tlist_sorted.begin()+1, tlist_sorted.end()); + faces_seen.insert(tlist_sorted); + } + next: ; + } + } + + manifold m; + m.N = N; + for(const auto& v: faces_seen) + for(int i=2; i > compute_voronoi_edges(manifold& m) { + + using kohonen::net; + using kohonen::vnorm; + using kohonen::vshift; + using kohonen::data; + using kohonen::kohvec; + using kohonen::samples; + + vector best_tile; + /* for every neuron, compute its best tile */ + int N = isize(net); + for(int i=0; i; + set< neuron_triangle_pair > visited; + queue q; + + vector projected(N); + + auto visit = [&] (neuron_triangle_pair p) { + if(visited.count(p)) return; + visited.insert(p); + q.push(p); + }; + + kohvec at; + kohonen::alloc(at); + + auto project = [&] (kohvec& at, const array& tri, int i, int j) { + int k = SUBD-i-j; + for(auto& x: at) x = 0; + vshift(at, data[tri[0]].val, i * 1. / SUBD); + vshift(at, data[tri[1]].val, j * 1. / SUBD); + vshift(at, data[tri[2]].val, k * 1. / SUBD); + }; + + set already_picked; + + map which_best; + + /* project all the net[ni].net on the manifold */ + for(int ni=0; ni triangles_to_visit; + queue triangles_queue; + + auto visit1 = [&] (int tri) { + if(triangles_to_visit.count(tri)) return; + triangles_to_visit.insert(tri); + triangles_queue.push(tri); + }; + + for(int tr: m.triangles_of_tile[best_tile[ni]]) + visit1(tr); + + auto& bes = which_best[ni]; + + while(!triangles_queue.empty()) { + int tri = triangles_queue.front(); + triangles_queue.pop(); + + for(int i=0; i<=SUBD; i++) + for(int j=0; j<=SUBD-i; j++) { + project(at, m.triangles[tri], i, j); + ld dist = vnorm(at, net[ni].net); + if(dist < best_dist && !already_picked.count(at)) { + best_dist = dist, best = at, best_tri = tri; + bes = lalign(0, tie(tri, i, j)); + better = [&tri, i, j, &m, &visit1] () { + auto flip_edge = [&] (int t1, int t2) { + if(t2 < t1) swap(t1, t2); + for(auto tri1: m.triangles_of_edge[{t1, t2}]) + visit1(tri1); + }; + auto& tria = m.triangles[tri]; + if(i == 0) flip_edge(tria[1], tria[2]); + if(j == 0) flip_edge(tria[0], tria[2]); + if(i+j == SUBD) flip_edge(tria[0], tria[1]); + }; + } + } + + better(); + } + projected[ni] = best; + already_picked.insert(best); + visit({ni, best_tri}); + } + + struct triangle_data { + double dist[SUBD+1][SUBD+1]; + int which[SUBD+1][SUBD+1]; + triangle_data() { + for(int i=0; i<=SUBD; i++) + for(int j=0; j<=SUBD; j++) + dist[i][j] = HUGE_VAL, which[i][j] = -1; + } + }; + + vector tdatas(m.T); + + while(!q.empty()) { + auto ntp = q.front(); + q.pop(); + auto ni = ntp.first; + auto& tri = m.triangles[ntp.second]; + auto& td = tdatas[ntp.second]; + + for(int i=0; i<=SUBD; i++) + for(int j=0; j<=SUBD-i; j++) { + project(at, tri, i, j); + ld dist = vnorm(at, projected[ni]); + auto& odist = td.dist[i][j]; + bool tie = abs(dist - odist) < 1e-6; + if(tie ? ni < td.which[i][j] : dist < odist) { + td.dist[i][j] = dist, + td.which[i][j] = ni; + auto flip_edge = [&] (int t1, int t2) { + if(t2 < t1) swap(t1, t2); + for(auto tr: m.triangles_of_edge[{t1, t2}]) { + visit({ni, tr}); + } + }; + if(i == 0) flip_edge(tri[1], tri[2]); + if(j == 0) flip_edge(tri[0], tri[2]); + if(i+j == SUBD) flip_edge(tri[0], tri[1]); + } + } + } + + set > voronoi_edges; + + auto add_edge = [&] (int i, int j) { + if(i>j) swap(i, j); + if(i==j) return; + voronoi_edges.insert({i, j}); + }; + + for(auto& td: tdatas) { + for(int i=0; i<=SUBD; i++) + for(int j=0; j<=SUBD-i; j++) { + if(i>0) add_edge(td.which[i][j], td.which[i-1][j]); + if(j>0) add_edge(td.which[i][j], td.which[i][j-1]); + if(j>0) add_edge(td.which[i][j], td.which[i+1][j-1]); + } + } + + if(1) { + vector degs(N, 0); + for(auto e: voronoi_edges) degs[e.first]++, degs[e.second]++; + for(int v=0; v > result; + for(auto ve: voronoi_edges) result.push_back(ve); + return result; + } + +} +}