// Hyperbolic Rogue // Copyright (C) 2011-2018 Zeno and Tehora Rogue, see 'hyper.cpp' for details // Kohonen's self-organizing maps. // This is a part of RogueViz, not a part of HyperRogue. namespace rogueviz { namespace kohonen { int cols; typedef vector kohvec; struct sample { kohvec val; string name; }; vector data; map sample_vdata_id; int whattodraw[3] = {-2,-2,-2}; int min_group = 10, max_group = 10; struct neuron { kohvec net; cell *where; double udist; int lpbak; int col; int allsamples, drawn_samples, csample, bestsample, max_group_here; neuron() { drawn_samples = allsamples = bestsample = 0; max_group_here = max_group; } }; vector colnames; kohvec weights; vector net; int neuronId(neuron& n) { return &n - &(net[0]); } void alloc(kohvec& k) { k.resize(cols); } bool neurons_indexed = false; int samples; template T sqr(T x) { return x*x; } vector whowon; void normalize() { alloc(weights); for(int k=0; k samples_to_show; void loadsamples(const string& fname) { FILE *f = fopen(fname.c_str(), "rt"); if(!f) { fprintf(stderr, "Could not load samples: %s\n", fname.c_str()); return; } if(fscanf(f, "%d", &cols) != 1) { printf("Bad format: %s\n", fname.c_str()); fclose(f); return; } printf("Loading samples: %s\n", fname.c_str()); while(true) { sample s; bool shown = false; alloc(s.val); if(feof(f)) break; for(int i=0; ilandparam, n.where->landparam = neuronId(n); } else { for(neuron& n: net) n.where->landparam = n.lpbak; } } neuron *getNeuron(cell *c) { if(!c) return NULL; setindex(true); if(c->landparam < 0 || c->landparam >= cells) return NULL; neuron& ret = net[c->landparam]; if(ret.where != c) return NULL; return &ret; } neuron *getNeuronSlow(cell *c) { if(neurons_indexed) return getNeuron(c); for(neuron& n: net) if(n.where == c) return &n; return NULL; } double maxudist; neuron *distfrom; void coloring() { if(noshow) return; setindex(false); bool besttofind = true; for(int pid=0; pid<3; pid++) { int c = whattodraw[pid]; if(c == -5) { if(besttofind) { besttofind = false; for(neuron& n: net) { double bdiff = 1e20; for(auto p: sample_vdata_id) { double diff = vnorm(n.net, data[p.first].val); if(diff < bdiff) bdiff = diff, n.bestsample = p.second; } } } for(int i=0; ilandparam, pid) = part(vdata[net[i].bestsample].cp.color1, pid+1); } else { vector listing; for(neuron& n: net) switch(c) { case -4: listing.push_back(log(5+n.allsamples)); break; case -3: if(distfrom) listing.push_back(vnorm(n.net, distfrom->net)); else listing.push_back(0); break; case -2: listing.push_back(n.udist); break; case -1: listing.push_back(-n.udist); break; default: listing.push_back(n.net[c]); break; } double minl = listing[0], maxl = listing[0]; for(double& d: listing) minl = min(minl, d), maxl = max(maxl, d); if(maxl-minl < 1e-3) maxl = minl+1e-3; for(int i=0; ilandparam, pid) = (255 * (listing[i] - minl)) / (maxl - minl); } } } void distribute_neurons() { whowon.resize(samples); for(neuron& n: net) n.drawn_samples = 0, n.csample = 0; for(auto p: sample_vdata_id) { int s = p.first; auto& w = winner(s); whowon[s] = &w; w.drawn_samples++; } ld rad = .25 * scalefactor; for(auto p: sample_vdata_id) { int id = p.second; int s = p.first; auto& w = *whowon[s]; vdata[id].m->base = w.where; vdata[id].m->at = spin(2*M_PI*w.csample / w.drawn_samples) * xpush(rad * (w.drawn_samples-1) / w.drawn_samples); w.csample++; } shmup::fixStorage(); setindex(false); } void analyze() { setindex(true); maxudist = 0; for(neuron& n: net) { int qty = 0; double total = 0; forCellEx(c2, n.where) { neuron *n2 = getNeuron(c2); if(!n2) continue; qty++; total += sqrt(vnorm(n.net, n2->net)); } n.udist = total / qty; maxudist = max(maxudist, n.udist); } if(!noshow) distribute_neurons(); coloring(); } // traditionally Gaussian blur is used in the Kohonen algoritm // but it does not seem to make much sense in hyperbolic geometry // especially wrapped one. // GAUSSIAN==1: use the Gaussian blur, on celldistance // GAUSSIAN==2: use the Gaussian blur, on true distance // GAUSSIAN==0: simulate the dispersion on our network int gaussian = 0; double mydistance(cell *c1, cell *c2) { if(gaussian == 2) return hdist(tC0(ggmatrix(c1)), tC0(ggmatrix(c2))); else return celldistance(c1, c2); } struct cellcrawler { struct cellcrawlerdata { cellwalker orig; int from, spin, dist; cellwalker target; cellcrawlerdata(const cellwalker& o, int fr, int sp) : orig(o), from(fr), spin(sp) {} }; vector data; void store(const cellwalker& o, int from, int spin, manual_celllister& cl) { if(!cl.add(o.at)) return; data.emplace_back(o, from, spin); } void build(const cellwalker& start) { data.clear(); manual_celllister cl; store(start, 0, 0, cl); for(int i=0; itype; j++) { cellwalker cw = cw0 + j + wstep; if(!getNeuron(cw.at)) continue; store(cw, i, j, cl); } } if(gaussian) for(cellcrawlerdata& s: data) s.dist = mydistance(s.orig.at, start.at); } void sprawl(const cellwalker& start) { data[0].target = start; for(int i=1; i> dispersion; }; double dispersion_end_at = 1.5; double dispersion_precision = .0001; int dispersion_each = 1; int dispersion_count; void buildcellcrawler(cell *c, cellcrawler& cr, int dir) { cr.build(cellwalker(c,dir)); if(!gaussian) { vector curtemp; vector newtemp; vector qty; vector > pairs; int N = isize(net); curtemp.resize(N, 0); newtemp.resize(N, 0); qty.resize(N, 0); for(int i=0; i vmin * dispersion_end_at; iter++) { if(iter % dispersion_each == 0) { d.emplace_back(N); auto& dispvec = d.back(); for(int i=0; i vmax) vmax = curtemp[i]; } dispersion_count = isize(d); printf("Dispersion count = %d\n", dispersion_count); } } map scc; pair get_cellcrawler_id(cell *c) { if(among(geometry, gZebraQuotient, gMinimal)) { // Zebra Quotient does exhibit some symmetries, // but these are so small anyway that it is safer to just build // a crawler for every neuron return make_pair(neuronId(*getNeuronSlow(c)), 0); } if(torus && (torusconfig::tmflags() & torusconfig::TF_KLEIN)) return make_pair(cell_to_pair(c).second * 2 + ctof(c), 0); int id = 0, dir = 0; if(GOLDBERG) { gp::local_info li = gp::get_local_info(c); id = (li.relative.first & 15) + (li.relative.second & 15) * 16 + fix6(li.total_dir) * 256; // ld = li.last_dir; } else { id = c->type == S7; // if(id == 0) ld = c->c.spin(0); } /* if(geometry == gZebraQuotient) { id = 8*id + ld; id = 64 * id + c->master->zebraval; return make_pair(id, 0); } */ return make_pair(id, dir); } /* unit test: do the crawlers work correctly? */ bool verify_crawler(cellcrawler& cc, cellwalker cw) { cc.sprawl(cw); for(auto& d: cc.data) if(celldistance(cw.at, d.target.at) != d.dist) return false; vector cellcounter(cells, 0); for(auto& d: cc.data) cellcounter[d.target.at->landparam]++; for(int i=0; iallcells(); cells = isize(allcells); net.resize(cells); for(int i=0; i allcrawlers; int uniq = 0, failures = 0; printf("Verifying crawlers...\n"); for(cell *c: allcells) { auto id = get_cellcrawler_id(c); if(allcrawlers.count(id.first)) { bool b = verify_crawler(allcrawlers[id.first], cellwalker(c, id.second)); if(!b) { printf("cell %p: type = %d id = %d dir = %d / earlier crawler failed\n", c, c->type, id.first, id.second); failures++; } } else { for(int i=0; itype; i++) for(auto& cc: allcrawlers) if(verify_crawler(cc.second, cellwalker(c, i))) { printf("cell %p: type = %d id = %d dir = %d / also works id %d in direction %d\n", c, c->type, id.first, id.second, cc.first, i); uniq--; goto breakcheck; } breakcheck: cellcrawler cr; cr.build(cellwalker(c, id.second)); allcrawlers[id.first] = move(cr); uniq++; } } printf("Crawlers constructed: %d (%d unique, %d failures)\n", isize(allcrawlers), uniq, failures); setindex(false); if(failures) exit(1); } bool finished() { return t == 0; } int krad; double ttpower = 1; void sominit(int); void step() { if(t == 0) return; sominit(2); double tt = (t-1.) / tmax; tt = pow(tt, ttpower); double sigma = maxdist * tt; int dispid = int(dispersion_count * tt); if(qpct) { int pct = (int) ((qpct * (t+.0)) / tmax); if(pct != lpct) { printf("pct %d lpct %d\n", pct, lpct); lpct = pct; analyze(); if(gaussian) printf("t = %6d/%6d %3d%% sigma=%10.7lf maxudist=%10.7lf\n", t, tmax, pct, sigma, maxudist); else printf("t = %6d/%6d %3d%% dispid=%5d maxudist=%10.7lf\n", t, tmax, pct, dispid, maxudist); } } int id = hrand(samples); neuron& n = winner(id); whowon.resize(samples); whowon[id] = &n; /* for(neuron& n2: net) { int d = celldistance(n.where, n2.where); double nu = learning_factor; // nu *= exp(-t*(double)maxdist/perdist); // nu *= exp(-t/t2); nu *= exp(-sqr(d/sigma)); for(int k=0; k fake(1,1); auto it = gaussian ? fake.begin() : s.dispersion[dispid].begin(); for(auto& sd: s.data) { neuron *n2 = getNeuron(sd.target.at); if(!n2) continue; double nu = learning_factor; if(gaussian) nu *= exp(-sqr(sd.dist/sigma)); else nu *= *(it++); for(int k=0; knet[k] += nu * (data[id].val[k] - n2->net[k]); } t--; if(t == 0) analyze(); } int initdiv = 1; int inited = 0; void uninit(int initto) { if(inited > initto) inited = initto; } vector bdiffs; vector bids; vector bdiffn; int showsample(int id) { if(sample_vdata_id.count(id)) return sample_vdata_id[id]; if(bids.size()) { if(net[bids[id]].drawn_samples >= net[bids[id]].max_group_here) { ld bdist = 1e18; int whichid = -1; for(auto p: sample_vdata_id) { int s = p.first; if(bids[s] == bids[id]) { ld cdist = vnorm(data[s].val, data[id].val); if(cdist < bdist) bdist = cdist, whichid = p.second; } } return whichid; } net[bids[id]].drawn_samples++; } int i = vdata.size(); sample_vdata_id[id] = i; vdata.emplace_back(); auto& v = vdata.back(); v.name = data[id].name; v.cp = dftcolor; createViz(i, bids.size() ? net[bids[id]].where : cwt.at, Id); v.m->store(); return i; } int showsample(string s) { if(s == "") return -1; int ret = -1; for(int i=0; i samplesbak; for(auto& n: net) if(n.allsamples) showsample(n.bestsample); analyze(); } int kohrestrict = 1000000; void sominit(int initto) { if(inited < 1 && initto >= 1) { inited = 1; if(!samples) { fprintf(stderr, "Error: SOM without samples\n"); exit(1); } init(); kind = kKohonen; printf("Initializing SOM (1)\n"); vector allcells; if(krad) { celllister cl(cwt.at, krad, 1000000, NULL); allcells = cl.lst; } else allcells = currentmap->allcells(); if(isize(allcells) > kohrestrict) { sort(allcells.begin(), allcells.end(), [] (cell *c1, cell *c2) { return hdist0(tC0(ggmatrix(c1))) < hdist0(tC0(ggmatrix(c2))); }); allcells.resize(kohrestrict); } cells = isize(allcells); net.resize(cells); for(int i=0; ilandparam = i; for(int i=0; iland = laCanvas; alloc(net[i].net); for(int k=0; k=7; d--) setdist(n.where, d, NULL); printf("samples = %d (%d) cells = %d\n", samples, isize(sample_vdata_id), cells); if(!noshow) for(int s: samples_to_show) { int vdid = isize(vdata); sample_vdata_id[s] = vdid; vdata.emplace_back(); auto &vd = vdata.back(); vd.name = data[s].name; vd.cp = dftcolor; createViz(vdid, cwt.at, Id); storeall(vdid); } samples_to_show.clear(); analyze(); } if(inited < 2 && initto >= 2) { inited = 2; printf("Initializing SOM (2)\n"); if(gaussian) { printf("dist = %lf\n", mydistance(net[0].where, net[1].where)); cell *c1 = net[cells/2].where; vector mapdist; for(neuron &n2: net) mapdist.push_back(mydistance(c1,n2.where)); sort(mapdist.begin(), mapdist.end()); maxdist = mapdist[isize(mapdist)*5/6] * distmul; printf("maxdist = %lf\n", maxdist); } dispersion_count = 0; scc.clear(); for(cell *c: currentmap->allcells()) { auto cid = get_cellcrawler_id(c); if(!scc.count(cid.first)) { printf("Building cellcrawler id = %x\n", cid.first); buildcellcrawler(c, scc[cid.first], cid.second); } } lpct = -46130; } } void describe_cell(cell *c) { if(cmode & sm::HELP) return; if(kind != kKohonen) return; neuron *n = getNeuronSlow(c); if(!n) return; help += "cell number: " + its(neuronId(*n)) + " (" + its(n->allsamples) + ")\n"; help += "parameters:"; for(int k=0; knet[k]); help += ", u-matrix = " + fts(n->udist); help += "\n"; vector> v; for(int s=0; snet, data[s].val), s); for(int i=1; i a, pair b) { return a.first < b.first; }); for(int i=0; i values; bool modified; }; vector levellines; bool on; void create() { int xlalpha = part(default_edgetype.color, 0); for(int i=0; i sample; for(int j=0; j<=1024; j++) sample.push_back(data[hrand(samples)].val[lv.column]); sort(sample.begin(), sample.end()); lv.values.clear(); lv.values.push_back(-1e10); for(int j=0; j<=1024; j+=1024 >> (lv.qty)) lv.values.push_back(sample[j]); lv.values.push_back(1e10); } } void draw() { if(!on) return; for(auto& g: gmatrix) { cell *c1 = g.first; transmatrix T = g.second; neuron *n1 = getNeuron(c1); if(!n1) continue; for(int i=0; itype; i++) { cell *c2 = c1->move(i); if(!c2) continue; cell *c3 = c1->modmove(i-1); if(!c3) continue; if(!gmatrix.count(c2)) continue; if(!gmatrix.count(c3)) continue; double d2 = hdist(tC0(T), tC0(gmatrix[c2])); double d3 = hdist(tC0(T), tC0(gmatrix[c3])); neuron *n2 = getNeuron(c2); if(!n2) continue; neuron *n3 = getNeuron(c3); if(!n3) continue; for(auto& l: levellines) { auto val1 = n1->net[l.column]; auto val2 = n2->net[l.column]; auto val3 = n3->net[l.column]; auto v1 = lower_bound(l.values.begin(), l.values.end(), val1); auto v2 = lower_bound(l.values.begin(), l.values.end(), val2); auto v3 = lower_bound(l.values.begin(), l.values.end(), val3); auto draw = [&] () { auto vmid = *v1; queueline( (T * ddspin(c1,i) * xpush0(d2 * (vmid-val1) / (val2-val1))), (T * ddspin(c1,i-1) * xpush0(d3 * (vmid-val1) / (val3-val1))), l.color, vid.linequality); }; while(v1 < v2 && v1 < v3) { draw(); v1++; } while(v1 > v2 && v1 > v3) { v1--; draw(); } } } } setindex(false); } void show() { if(levellines.size() == 0) create(); gamescreen(0); cmode = vid.xres > vid.yres * 1.4 ? sm::SIDE : 0; dialog::init("level lines"); char nx = 'a'; for(auto &l : levellines) { dialog::addSelItem(colnames[l.column], its(l.qty), nx++); dialog::lastItem().colorv = l.color >> 8; } dialog::addItem("exit menu", '0'); dialog::addItem("shift+letter to change color", 0); dialog::display(); keyhandler = [] (int sym, int uni) { dialog::handleNavigation(sym, uni); if(uni >= 'a' && uni - 'a' + isize(levellines)) { auto& l = levellines[uni - 'a']; dialog::editNumber(l.qty, 0, 10, 1, 0, colnames[l.column], XLAT("Controls the number of level lines.")); dialog::reaction = [&l] () { l.modified = true; build(); }; } else if(uni >= 'A' && uni - 'A' + isize(levellines)) { auto& l = levellines[uni - 'A']; dialog::openColorDialog(l.color, NULL); dialog::dialogflags |= sm::MAYDARK | sm::SIDE; } else if(doexiton(sym, uni)) popScreen(); }; } } void ksave(const string& fname) { sominit(1); FILE *f = fopen(fname.c_str(), "wt"); if(!f) { fprintf(stderr, "Could not save the network\n"); return; } fprintf(f, "%d %d\n", cells, t); for(neuron& n: net) { for(int k=0; k= lastprogress + (noGUI ? 500 : 100)) { if(noGUI) printf("%s\n", s.c_str()); else { clearMessages(); addMessage(s); mainloopiter(); } lastprogress = SDL_GetTicks(); } } template void save_raw(string fname, const vector& v) { FILE *f = fopen(fname.c_str(), "wb"); fwrite(&v[0], sizeof(v[0]), v.size(), f); fclose(f); } template void load_raw(string fname, vector& v) { FILE *f = fopen(fname.c_str(), "rb"); if(!f) { fprintf(stderr, "file does not exist: %s\n", fname.c_str()); exit(1); } fseek(f, 0, SEEK_END); auto s = ftell(f); rewind(f); v.resize(s / sizeof(v[0])); hr::ignore(fread(&v[0], sizeof(v[0]), v.size(), f)); fclose(f); } bool groupsizes_known = false; void do_classify() { sominit(1); if(bids.empty()) { printf("Classifying...\n"); bids.resize(samples, 0); bdiffs.resize(samples, 1e20); for(int s=0; s neurons_to_sort; for(int i=0; i samples_to_sort; for(int i=0; i> edgedata; load_raw(fname_edges, edgedata); int N = isize(edgedata); if(pick > 0 && pick < N) { for(int i=1; ivisible_from = 1. / N; vector> edgedata2; for(auto p: edgedata) edgedata2.emplace_back(showsample(p.first), showsample(p.second)); distribute_neurons(); int i = 0; for(auto p: edgedata2) addedge(p.first, p.second, 1 / (i+++.5), true, t); } void random_edges(int q) { auto t = add_edgetype("random"); vector ssamp; for(auto p: sample_vdata_id) ssamp.push_back(p.second); for(int i=0; i= '1' && uni <= '3') { int i = uni - '1'; whattodraw[i]++; if(whattodraw[i] == cols) whattodraw[i] = -5; coloring(); return true; } if(uni == '0') { for(char x: {'1','2','3'}) handleMenu(x, x); return true; } if(uni == '4') { pushScreen(levelline::show); return true; } return false; } #if CAP_COMMANDLINE int readArgs() { using namespace arg; // #1: load the samples if(argis("-som")) { PHASE(3); shift(); kohonen::loadsamples(args()); } // #2: set parameters else if(argis("-somkrad")) { gaussian = 0; uninit(0); } else if(argis("-somsim")) { gaussian = 0; uninit(1); } else if(argis("-somcgauss")) { gaussian = 1; uninit(1); } else if(argis("-somggauss")) { gaussian = 2; uninit(1); } else if(argis("-sompct")) { shift(); qpct = argi(); } else if(argis("-sompower")) { shift(); ttpower = argf(); } else if(argis("-somparam")) { shift(); (gaussian ? distmul : dispersion_end_at) = argf(); if(dispersion_end_at <= 1) { fprintf(stderr, "Dispersion parameter illegal\n"); dispersion_end_at = 1.5; } uninit(1); } else if(argis("-sominitdiv")) { shift(); initdiv = argi(); uninit(0); } else if(argis("-somtmax")) { shift(); t = (t*1./tmax) * argi(); tmax = argi(); } else if(argis("-somlearn")) { // this one can be changed at any moment shift(); learning_factor = argf(); } else if(argis("-somrun")) { t = tmax; sominit(1); } // #3: load the neuron data (usually without #2) else if(argis("-somload")) { PHASE(3); shift(); kohonen::kload(args()); } // #4: run, stop etc. else if(argis("-somrunto")) { int i = argi(); shift(); while(t > i) { if(t % 128 == 0) progress("Steps left: " + its(t)); kohonen::step(); } } else if(argis("-somstop")) { t = 0; } else if(argis("-somnoshow")) { noshow = true; } else if(argis("-somfinish")) { while(!finished()) { kohonen::step(); if(t % 128 == 0) progress("Steps left: " + its(t)); } } // #5 save data, classify etc. else if(argis("-somsave")) { PHASE(3); shift(); kohonen::ksave(args()); } else if(argis("-somsavew")) { PHASE(3); shift(); kohonen::ksavew(args()); } else if(argis("-somloadw")) { PHASE(3); shift(); kohonen::kloadw(args()); } else if(argis("-somclassify0")) { PHASE(3); shift(); kohonen::do_classify(); } else if(argis("-somclassify")) { PHASE(3); shift(); kohonen::kclassify(args()); } else if(argis("-somclassify-sr")) { PHASE(3); shift(); kohonen::kclassify_save_raw(args()); } else if(argis("-somclassify-lr")) { PHASE(3); shift(); kohonen::kclassify_load_raw(args()); } else if(argis("-somlistshown")) { PHASE(3); shift(); kohonen::klistsamples(args(), false, false); } else if(argis("-somlistbest")) { PHASE(3); shift(); kohonen::klistsamples(args(), true, false); } else if(argis("-somlistbestc")) { PHASE(3); shift(); kohonen::klistsamples(args(), true, true); } else if(argis("-somndist")) { PHASE(3); shift(); kohonen::neurondisttable(args()); } else if(argis("-somshowbest")) { showbestsamples(); } else if(argis("-somverify")) { start_game(); verify_crawlers(); } else if(argis("-somrestrict")) { shift(); kohrestrict = argi(); } else if(argis("-som-maxgroup")) { shift(); max_group = argi(); } else if(argis("-som-mingroup")) { shift(); min_group = argi(); } else if(argis("-som-fillgroups")) { fillgroups(); } else if(argis("-som-load-edges")) { shift(); string edgename = args(); shift(); kohonen::load_edges(args(), edgename, 0); } else if(argis("-som-random-edges")) { shift(); random_edges(argi()); } else if(argis("-som-load-n-edges")) { shift(); string edgename = args(); shift(); int n = argi(); shift(); kohonen::load_edges(args(), edgename, n); } else if(argis("-less-edges")) { shift(); double d = argf(); for(auto t: edgetypes) t->visible_from *= d; } else return 1; return 0; } auto hooks = addHook(hooks_args, 100, readArgs); #endif auto hooks2 = addHook(hooks_frame, 50, levelline::draw) + addHook(hooks_mouseover, 100, describe_cell); void clear() { printf("clearing Kohonen...\n"); data.clear(); sample_vdata_id.clear(); colnames.clear(); weights.clear(); net.clear(); whowon.clear(); samples_to_show.clear(); scc.clear(); bdiffs.clear(); bids.clear(); bdiffn.clear(); } }} namespace rogueviz { void mark(cell *c) { using namespace kohonen; if(kind == kKohonen && inited >= 1) { distfrom = getNeuronSlow(c); coloring(); } }}