diff --git a/kohonen.cpp b/kohonen.cpp index bf324605..cb441ff1 100644 --- a/kohonen.cpp +++ b/kohonen.cpp @@ -19,7 +19,7 @@ vector data; vector samples_shown; -int whattodraw[3]; +int whattodraw[3] = {-2,-2,-2}; struct neuron { kohvec net; @@ -27,7 +27,7 @@ struct neuron { double udist; int lpbak; int col; - int samples, csample; + int samples, csample, bestsample; }; kohvec weights; @@ -64,10 +64,15 @@ double vnorm(kohvec& a, kohvec& b) { return diff; } +void sominit(int); +void uninit(int); + void loadsamples(const char *fname) { - normalize(); FILE *f = fopen(fname, "rt"); - if(!f) return; + if(!f) { + fprintf(stderr, "Could not load samples\n"); + return; + } if(fscanf(f, "%d", &cols) != 1) { fclose(f); return; } while(true) { sample s; @@ -75,7 +80,7 @@ void loadsamples(const char *fname) { alloc(s.val); if(feof(f)) break; for(int i=0; i listing; - for(neuron& n: net) switch(c) { - case -4: - listing.push_back(n.samples); - break; - - case -3: - if(distfrom) - listing.push_back(vnorm(n.net, distfrom->net)); - else - listing.push_back(0); - break; - - case -2: - listing.push_back(n.udist); - break; - - case -1: - listing.push_back(-n.udist); - break; - - default: - listing.push_back(n.net[c]); - break; + + if(c == -5) { + if(besttofind) { + besttofind = false; + for(neuron& n: net) { + double bdiff = 1e20; + for(int i=0; ilandparam, pid) = part(vdata[net[i].bestsample].cp.color1, pid+1); + } + + else { + vector listing; + for(neuron& n: net) switch(c) { + case -4: + listing.push_back(n.samples); + break; + + case -3: + if(distfrom) + listing.push_back(vnorm(n.net, distfrom->net)); + else + listing.push_back(0); + break; + + case -2: + listing.push_back(n.udist); + break; + + case -1: + listing.push_back(-n.udist); + break; + + default: + listing.push_back(n.net[c]); + break; + } + + double minl = listing[0], maxl = listing[0]; + for(double& d: listing) minl = min(minl, d), maxl = max(maxl, d); + if(maxl-minl < 1e-3) maxl = minl+1e-3; + + for(int i=0; ilandparam, pid) = (255 * (listing[i] - minl)) / (maxl - minl); } - - double minl = listing[0], maxl = listing[0]; - for(double& d: listing) minl = min(minl, d), maxl = max(maxl, d); - if(maxl-minl < 1e-3) maxl = minl+1e-3; - - for(int i=0; ilandparam, pid) = (255 * (listing[i] - minl)) / (maxl - minl); } } @@ -202,31 +229,49 @@ void analyze() { maxudist = max(maxudist, n.udist); } - whowon.resize(samples); - - for(neuron& n: net) n.samples = 0; - - for(int id=0; idbase = w.where; + vdata[id].m->at = + spin(2*M_PI*w.csample / w.samples) * xpush(.25 * (w.samples-1) / w.samples); + w.csample++; + } + + shmup::fixStorage(); + setindex(false); } - for(int id=0; idbase = w.where; - vdata[id].m->at = - spin(2*M_PI*w.csample / w.samples) * xpush(.25 * (w.samples-1) / w.samples); - w.csample++; - } - - shmup::fixStorage(); - setindex(false); coloring(); } +// traditionally Gaussian blur is used in the Kohonen algoritm +// but it does not seem to make much sense in hyperbolic geometry +// especially wrapped one. +// GAUSSIAN==1: use the Gaussian blur, on celldistance +// GAUSSIAN==2: use the Gaussian blur, on true distance +// GAUSSIAN==0: simulate the dispersion on our network + +int gaussian = 0; + +double mydistance(cell *c1, cell *c2) { + if(gaussian == 2) return hdist(tC0(shmup::ggmatrix(c1)), tC0(shmup::ggmatrix(c2))); + else return celldistance(c1, c2); + } + struct cellcrawler { struct cellcrawlerdata { @@ -257,8 +302,8 @@ struct cellcrawler { store(cw, i, j); } } - for(cellcrawlerdata& s: data) - s.dist = celldistance(s.orig.c, start.c); + if(gaussian) for(cellcrawlerdata& s: data) + s.dist = mydistance(s.orig.c, start.c); } void sprawl(const cellwalker& start) { @@ -275,26 +320,16 @@ struct cellcrawler { } }; -// traditionally Gaussian blur is used in the Kohonen algoritm -// but it does not seem to make much sense in hyperbolic geometry -// especially wrapped one. -// GAUSSIAN==1: use the Gaussian blur -// GAUSSIAN==0: simulate the dispersion on our network - -#ifndef GAUSSIAN -#define GAUSSIAN 0 -#endif - cellcrawler scc[2]; // hex and non-hex -#if GAUSSIAN==0 +double dispersion_end_at = 1.5; + double dispersion_precision = .0001; int dispersion_each = 1; vector> dispersion[2]; int dispersion_count; -#endif void buildcellcrawler(cell *c) { int sccid = c->type != 6; @@ -302,97 +337,106 @@ void buildcellcrawler(cell *c) { cellcrawler& cr = scc[sccid]; cr.build(cellwalker(c,0)); -#if GAUSSIAN==0 - vector curtemp; - vector newtemp; - vector qty; - vector > pairs; - int N = size(net); + if(!gaussian) { + vector curtemp; + vector newtemp; + vector qty; + vector > pairs; + int N = size(net); + + curtemp.resize(N, 0); + newtemp.resize(N, 0); + qty.resize(N, 0); - curtemp.resize(N, 0); - newtemp.resize(N, 0); - qty.resize(N, 0); - - for(int i=0; i vmin * 1.5; iter++) { - if(iter % dispersion_each == 0) { - d.emplace_back(N); - auto& dispvec = d.back(); - for(int i=0; i vmax) vmax = curtemp[i]; + + curtemp[neuronId(*getNeuron(c))] = 1; + + ld vmin = 0, vmax = 1; + int iter; + + auto &d = dispersion[sccid]; + + d.clear(); + + printf("Building dispersion...\n"); + + for(iter=0; dispersion_count ? true : vmax > vmin * dispersion_end_at; iter++) { + if(iter % dispersion_each == 0) { + d.emplace_back(N); + auto& dispvec = d.back(); + for(int i=0; i vmax) vmax = curtemp[i]; + } + + dispersion_count = size(d); + printf("Dispersion count = %d\n", dispersion_count); } - - dispersion_count = size(d); - printf("Dispersion count = %d\n", dispersion_count); -#endif } bool finished() { return t == 0; } +int krad; + +double ttpower = 1; + +void sominit(int); + void step() { if(t == 0) return; + sominit(2); + + double tt = (t-1.) / tmax; + tt = pow(tt, ttpower); -#if GAUSSIAN==1 - double sigma = maxdist * t / (perdist*(double) mul); -#else - int dispid = int(dispersion_count * (t-1.) / tmax); -#endif + double sigma = maxdist * tt; + int dispid = int(dispersion_count * tt); -// double sigma = maxdist * exp(-t / t1); - int pct = (int) (100 * ((t*(double) mul) / perdist)); - if(pct != lpct) { - lpct = pct; - analyze(); -#if GAUSSIAN==1 - printf("t = %6d/%2dx%6d pct = %3d sigma=%10.7lf maxudist=%10.7lf\n", t, mul, perdist, pct, sigma, maxudist); -#else - printf("t = %6d/%2dx%6d pct = %3d dispid=%5d maxudist=%10.7lf\n", t, mul, perdist, pct, dispid, maxudist); -#endif + if(qpct) { + int pct = (int) ((qpct * (t+.0)) / tmax); + if(pct != lpct) { + printf("pct %d lpct %d\n", pct, lpct); + lpct = pct; + analyze(); + + if(gaussian) + printf("t = %6d/%6d %3d%% sigma=%10.7lf maxudist=%10.7lf\n", t, tmax, pct, sigma, maxudist); + else + printf("t = %6d/%6d %3d%% dispid=%5d maxudist=%10.7lf\n", t, tmax, pct, dispid, maxudist); + } } int id = hrand(samples); neuron& n = winner(id); + whowon.resize(samples); whowon[id] = &n; - - + /* for(neuron& n2: net) { int d = celldistance(n.where, n2.where); - double nu = maxfac; + double nu = learning_factor; // nu *= exp(-t*(double)maxdist/perdist); // nu *= exp(-t/t2); nu *= exp(-sqr(d/sigma)); @@ -404,18 +448,19 @@ void step() { cellcrawler& s = scc[sccid]; s.sprawl(cellwalker(n.where, 0)); -#if GAUSSIAN==0 - auto it = dispersion[sccid][dispid].begin(); -#endif + vector fake(1,1); + auto it = gaussian ? fake.begin() : dispersion[sccid][dispid].begin(); + for(auto& sd: s.data) { neuron *n2 = getNeuron(sd.target.c); if(!n2) continue; - double nu = maxfac; -#if GAUSSIAN==0 - nu *= *(it++); -#else - nu *= exp(-sqr(sd.dist/sigma)); -#endif + double nu = learning_factor; + + if(gaussian) + nu *= exp(-sqr(sd.dist/sigma)); + else + nu *= *(it++); + for(int k=0; knet[k] += nu * (data[id].val[k] - n2->net[k]); } @@ -424,60 +469,96 @@ void step() { if(t == 0) analyze(); } -void run(const char *fname, int _perdist, double _maxfac) { - perdist = _perdist; - maxfac = _maxfac; - init(); kind = kKohonen; - - loadsamples(fname); +int initdiv = 1; - /* if(geometry != gQuotient1) { - targetGeometry = gQuotient1; - restartGame('g'); - } - if(!purehepta) restartGame('7'); */ +int inited = 0; -#define Z 1 - - vector& allcells = currentmap->allcells(); - cells = size(allcells); - net.resize(cells); - for(int i=0; ilandparam = i; - for(int i=0; iland = laCanvas; - alloc(net[i].net); - - for(int k=0; k=7; d--) setdist(n.where, d, NULL); - - cell *c1 = net[cells/2].where; - vector mapdist; - for(neuron &n2: net) mapdist.push_back(celldistance(c1,n2.where)); - sort(mapdist.begin(), mapdist.end()); - maxdist = mapdist[size(mapdist)*5/6]; - - printf("samples = %d cells = %d maxdist = %d\n", samples, cells, maxdist); - -#if GAUSSIAN==0 - dispersion_count = 0; -#endif - c1 = currentmap->gamestart(); - cell *c2 = createMov(c1, 0); - buildcellcrawler(c1); - if(c1->type != c2->type) buildcellcrawler(c2); - - lpct = -46130; - mul = 1; - tmax = t = perdist*mul; - step(); - for(int i=0; i<3; i++) whattodraw[i] = -2; - analyze(); +void uninit(int initto) { + if(inited > initto) inited = initto; } + +void sominit(int initto) { + + if(inited < 1 && initto >= 1) { + inited = 1; + if(!samples) { + fprintf(stderr, "Error: SOM without samples\n"); + exit(1); + } + + init(); kind = kKohonen; + + /* if(geometry != gQuotient1) { + targetGeometry = gQuotient1; + restartGame('g'); + } + if(!purehepta) restartGame('7'); */ + + printf("Initializing SOM (1)\n"); + vector allcells; + + if(krad) { + celllister cl(cwt.c, krad, 1000000, NULL); + allcells = cl.lst; + } + else allcells = currentmap->allcells(); + + cells = size(allcells); + net.resize(cells); + for(int i=0; ilandparam = i; + for(int i=0; iland = laCanvas; + alloc(net[i].net); + + for(int k=0; k=7; d--) setdist(n.where, d, NULL); + + printf("samples = %d (%d) cells = %d\n", samples, size(samples_shown), cells); + + if(!noshow) { + vdata.resize(size(samples_shown)); + for(int i=0; i= 2) { + inited = 2; + + printf("Initializing SOM (2)\n"); + + if(gaussian) { + printf("dist = %lf\n", mydistance(net[0].where, net[1].where)); + cell *c1 = net[cells/2].where; + vector mapdist; + for(neuron &n2: net) mapdist.push_back(mydistance(c1,n2.where)); + sort(mapdist.begin(), mapdist.end()); + maxdist = mapdist[size(mapdist)*5/6] * distmul; + printf("maxdist = %lf\n", maxdist); + } + + dispersion_count = 0; + cell *c1 = currentmap->gamestart(); + cell *c2 = createMov(c1, 0); + buildcellcrawler(c1); + if(c1->type != c2->type) buildcellcrawler(c2); + + lpct = -46130; + } + } + void describe(cell *c) { if(cmode & sm::HELP) return; neuron *n = getNeuronSlow(c); @@ -496,7 +577,12 @@ void describe(cell *c) { } void ksave(const char *fname) { + sominit(1); FILE *f = fopen(fname, "wt"); + if(!f) { + fprintf(stderr, "Could not save the network\n"); + return; + } fprintf(f, "%d %d\n", cells, t); for(neuron& n: net) { for(int k=0; k bdiffs(samples, 1e20); + vector bids(samples, 0); + + printf("Classifying...\n"); + + for(neuron& n: net) n.samples = 0; + + for(int s=0; s bdiffn(cells, 1e20); + + printf("Finding samples...\n"); + + for(int s=0; s= '1' && uni <= '3') { int i = uni - '1'; whattodraw[i]++; - if(whattodraw[i] == cols) whattodraw[i] = -4; + if(whattodraw[i] == cols) whattodraw[i] = -5; coloring(); return true; } @@ -572,6 +737,102 @@ bool handleMenu(int sym, int uni) { return false; } +int readArgs() { + using namespace arg; + + // #1: load the samples + + if(argis("-som")) { + PHASE(3); + shift(); kohonen::loadsamples(args()); + } + + // #2: set parameters + + else if(argis("-somkrad")) { + gaussian = 0; uninit(0); + } + else if(argis("-somsim")) { + gaussian = 0; uninit(1); + } + else if(argis("-somcgauss")) { + gaussian = 1; uninit(1); + } + else if(argis("-somggauss")) { + gaussian = 2; uninit(1); + } + else if(argis("-sompct")) { + shift(); qpct = argi(); + } + else if(argis("-sompower")) { + shift(); ttpower = argf(); + } + else if(argis("-somparam")) { + shift(); (gaussian ? distmul : dispersion_end_at) = argf(); + if(dispersion_end_at <= 1) { + fprintf(stderr, "Dispersion parameter illegal\n"); + dispersion_end_at = 1.5; + } + uninit(1); + } + else if(argis("-sominitdiv")) { + shift(); initdiv = argi(); uninit(0); + } + else if(argis("-somtmax")) { + shift(); t = (t*1./tmax) * argi(); + tmax = argi(); + } + else if(argis("-somlearn")) { + // this one can be changed at any moment + shift(); learning_factor = argf(); + } + + else if(argis("-somrun")) { + t = tmax; sominit(1); + } + + // #3: load the neuron data (usually without #2) + else if(argis("-somload")) { + PHASE(3); + shift(); kohonen::kload(args()); + } + + // #4: run, stop etc. + else if(argis("-somrunto")) { + int i = argi(); + shift(); while(t > i) kohonen::step(); + } + else if(argis("-somstop")) { + t = 0; + } + else if(argis("-somnoshow")) { + noshow = true; + } + else if(argis("-somfinish")) { + while(!finished()) kohonen::step(); + } + + // #5 save data, classify etc. + else if(argis("-somsave")) { + PHASE(3); + shift(); kohonen::ksave(args()); + } + else if(argis("-somclassify")) { + PHASE(3); + shift(); kohonen::kclassify(args()); + } + else if(argis("-somclassify2")) { + PHASE(3); + shift(); const char *f1 = args(); + shift(); const char *f2 = args(); + kohonen::kclassify2(f1, f2); + } + + else return 1; + return 0; + } + +auto hooks = addHook(hooks_args, 100, readArgs); } void mark(cell *c) { @@ -579,3 +840,4 @@ void mark(cell *c) { distfrom = getNeuronSlow(c); coloring(); } + diff --git a/rogueviz.cpp b/rogueviz.cpp index 2d13cac9..f3c19a72 100644 --- a/rogueviz.cpp +++ b/rogueviz.cpp @@ -1685,29 +1685,9 @@ int readArgs() { else if(argis("-ggamma")) { shift(); ggamma = argf(); } - else if(argis("-som")) { - PHASE(3); - shift(); const char *fname = args(); - shift(); int percount = argi(); - shift(); kohonen::run(fname, percount, argf()); - } else if(argis("-rvpres")) { tour::slides = rvtour::rvslides; } - else if(argis("-somsave")) { - PHASE(3); - while(!kohonen::finished()) kohonen::step(); - shift(); kohonen::ksave(args()); - } - else if(argis("-somclassify")) { - PHASE(3); - while(!kohonen::finished()) kohonen::step(); - shift(); kohonen::kclassify(args()); - } - else if(argis("-somload")) { - PHASE(3); - shift(); kohonen::kload(args()); - } else if(argis("-nolegend")) { legend.clear(); }