1
0
mirror of https://github.com/zenorogue/hyperrogue.git synced 2024-11-09 23:49:53 +00:00

rogueviz:: sag:: separated into subfiles

This commit is contained in:
Zeno Rogue 2024-07-23 21:21:53 +02:00
parent 33622ed652
commit 32678b6efc
9 changed files with 2569 additions and 1411 deletions

50
rogueviz/sag/README.md Normal file
View File

@ -0,0 +1,50 @@
What is it
----------
The SAG module is used to create the embeddings of graphs, and to render them.
In general, this works by mapping the nodes of a graph to the
cells of a RogueViz-supported tessellation or honeycomb. The number of
usable cells is limited (that is, a fixed region is set in advance for the embedding).
Simulated Annealing is used to find the 'optimal' mapping. The function optimized
depends on the method. Currently, there are three main 'methods' implemented:
NEAREST: we minimize the sum of w * d over all edges, where w is the edge weight,
and d is the distance between the endpoints of that edge. In other words,
we want to place all nodes all close as possible, especially if the node weights are
big. See the following visualizations as an example of visualizations obtained using
this method:
https://www.youtube.com/watch?v=mDG3_f8R2Ns (SAG boardgames)
https://www.youtube.com/watch?v=WSyygk_3j9o (SAG roguelikes)
https://www.youtube.com/watch?v=HWQkDkeEUeM (SAG programming languages)
MATCH: we minimize the sum of squares of (d - a/w - b), where w and d are as above.
In other words, we want the distance between nodes to represent 1/w as well as possible;
a and b are scaling parameters.
(no examples for now)
LIKELIHOOD: this method is based on the Hyperbolic Random Graph model. According to
that model, each pair of nodes in distance d are connected with probability
1/(1+\exp((d-R)/T)). We maximize the likelihood, i.e., the product of these probabilities
for actual edges, and their complements for non-edges. In other words, nodes connected
with edges want to be close, while nodes not connected with edges want to be distant.
The following embeddings have been obtained using this method:
https://youtu.be/GQKaKF_yOL4 (brain connectomes)
The rest of this README details how to use SAG.
Cells
-----
Not yet documented
Graph
-----
Just use `-sag-weighted` to read a weighted graph (in format `node1;node2;weight`),
or `-sag-unweighted` to read an unweighted graph (in format `node1 node2`).

269
rogueviz/sag/annealing.cpp Normal file
View File

@ -0,0 +1,269 @@
// RogueViz -- SAG embedder: the implementation of Simulated Annealing
// Copyright (C) 2011-2024 Zeno Rogue, see 'hyper.cpp' for details
#include "../rogueviz.h"
#include <sys/mman.h>
#include <fcntl.h>
namespace rogueviz {
namespace sag {
enum eSagmode { sagOff, sagHC, sagSA };
eSagmode sagmode; // 0 - off, 1 - hillclimbing, 2 - SA
const char *sagmodes[3] = {"off", "HC", "SA"};
ld temperature = 0;
int hightemp = 10;
int lowtemp = -15;
long long numiter = 0;
int vizsa_start;
int vizsa_len = 5;
bool chance(double p) {
p *= double(hrngen.max()) + 1;
auto l = hrngen();
auto pv = (decltype(l)) p;
if(l < pv) return true;
if(l == pv) return chance(p-pv);
return false;
}
bool twoway = false;
int moves, nomoves;
void saiter() {
int DN = isize(sagid);
int t1 = hrand(DN);
int sid1 = sagid[t1];
int sid2;
int s = twoway ? pick(1,4) : hrand(4)+1;
if(s == 4) sid2 = hrand(isize(sagcells));
else {
sid2 = sid1;
for(int ii=0; ii<s; ii++) sid2 = hrand_elt(neighbors[sid2]);
}
int t2 = allow_doubles ? -1 : sagnode[sid2];
sagnode[sid1] = -1; sagid[t1] = -1;
sagnode[sid2] = -1; if(t2 >= 0) sagid[t2] = -1;
double change =
costat(t1,sid2) + costat(t2,sid1) - costat(t1,sid1) - costat(t2,sid2);
sagnode[sid1] = t1; sagid[t1] = sid1;
sagnode[sid2] = t2; if(t2 >= 0) sagid[t2] = sid2;
if(change > 0 && (sagmode == sagHC || !chance(exp(-change * exp(-temperature))))) { nomoves++; return; }
moves++;
sagnode[sid1] = t2; sagnode[sid2] = t1;
sagid[t1] = sid2; if(t2 >= 0) sagid[t2] = sid1;
if(should_good) {
auto dcost = cost;
compute_cost();
println(hlog, "dcost=", dcost, " change=", change, " cost=", cost, " error = ", dcost + change - cost);
if(abs(dcost + change - cost) > .1) throw hr_exception("dcost fail");
cost = dcost;
}
cost += change;
}
ld checkmark_cost;
int hillclimb() {
int DN = isize(sagid);
int changes = 0;
vector<ld> succ;
for(int t1=0; t1<DN; t1++) {
int sid1 = sagid[t1];
for(int sid2: neighbors[sid1]) {
int t2 = allow_doubles ? -1 : sagnode[sid2];
sagnode[sid1] = -1; sagid[t1] = -1;
sagnode[sid2] = -1; if(t2 >= 0) sagid[t2] = -1;
double change =
costat(t1,sid2) + costat(t2,sid1) - (costat(t1,sid1) + costat(t2,sid2));
if(change >= -1e-10) {
sagnode[sid1] = t1; sagid[t1] = sid1;
sagnode[sid2] = t2; if(t2 >= 0) sagid[t2] = sid2;
}
else {
changes++;
sagnode[sid1] = t2; sagnode[sid2] = t1;
sagid[t1] = sid2; if(t2 >= 0) sagid[t2] = sid1;
cost += change;
succ.push_back(change);
break;
}
}
}
// println(hlog, "successes = ", succ);
return changes;
}
int checkmark_hillclimb() {
compute_cost();
if(cost > checkmark_cost) {
println(hlog, "checkmark failed");
throw hr_exception("checkmark failed");
return 0;
}
checkmark_cost = cost;
return hillclimb();
}
int view_each = 1000;
void dofullsa(ld satime) {
sagmode = sagSA;
int t1 = SDL_GetTicks();
int tl = -999999;
while(true) {
int t2 = SDL_GetTicks();
double d = (t2-t1) / (1000. * satime);
if(d > 1) break;
temperature = hightemp - (d*(hightemp-lowtemp));
for(int i=0; i<10000; i++) {
numiter++;
sag::saiter();
}
if(t2 - tl > view_each * .98) {
tl = t2;
println(hlog, format("it %12Ld temp %6.4f [1/e at %13.6f] cost = %f ",
numiter, double(sag::temperature), (double) exp(sag::temperature),
double(sag::cost)));
}
}
temperature = -5;
sagmode = sagOff;
create_viz();
}
/** after how many moves should we fix the values of R and T during SA */
int recost_each;
/** 2 = fix both R and T, 1 = fix only R, 0 = fix nothing, 3 = fix both R and T but avoid fixing early */
int autofix_rt;
void optimize_sag_loglik_logistic();
void compute_loglik_tab();
bool output_fullsa = true;
void dofullsa_iterations(long long saiter) {
sagmode = sagSA;
moves = 0; nomoves = 0; numiter = 0;
// decltype(SDL_GetTicks()) t1 = SDL_GetTicks();
// println(hlog, "before dofullsa_iterations, cost = ", double(sag::cost), " iterations = ", fts(saiter));
ld last_ratio;
int lpct = 0;
bool was_fixed = false;
for(int i=0; i<saiter; i++) {
temperature = hightemp - ((i+.5)/saiter*(hightemp-lowtemp));
numiter++;
sag::saiter();
if(recost_each && moves > recost_each) {
last_ratio = moves / (moves + nomoves + 0.);
if(((autofix_rt == 3 && was_fixed) || nomoves > recost_each) && autofix_rt) {
was_fixed = true;
optimize_sag_loglik_logistic();
if(autofix_rt == 1) {
lgsag.T = best.T;
compute_loglik_tab();
compute_cost();
}
}
nomoves = 0; moves = 0;
}
int cpct = numiter * 20 / (saiter-1);
if(cpct > lpct && output_fullsa) {
lpct = cpct;
println(hlog, format("it %12Ld ratio %6.3f temp %8.4f step %9.3g cost %9.2f R=%8.4f T=%8.4f",
numiter, last_ratio, double(sag::temperature), (double) exp(sag::temperature), cost, lgsag.R, lgsag.T));
}
/* if(numiter % 10000 == 0) {
auto t2 = SDL_GetTicks();
if(int(t2 - t1) > view_each) {
t1 = t2;
println(hlog, format("it %12Ld temp %6.4f [1/e at %13.6f] cost = %f ",
numiter, double(sag::temperature), (double) exp(sag::temperature),
double(sag::cost)));
}
} */
}
// println(hlog, "after dofullsa_iterations, cost = ", double(sag::cost));
temperature = -5;
sagmode = sagOff;
create_viz();
}
int anneal_read_args() {
#if CAP_COMMANDLINE
using namespace arg;
if(0) ;
else if(argis("-sagtemp")) {
shift(); sag::hightemp = argi();
shift(); sag::lowtemp = argi();
}
else if(argis("-sagfull")) {
shift(); sag::dofullsa(argf());
}
else if(argis("-sagfulli")) {
shift(); sag::dofullsa_iterations(argll());
}
else if(argis("-sagmode")) {
shift();
vizsa_start = 0;
sagmode = (eSagmode) argi();
if(sagmode == sagSA) {
shift(); temperature = argf();
}
}
else if(argis("-sag-recost")) {
method = smLogistic; prepare_method();
shift(); recost_each = argi();
shift(); autofix_rt = argi();
}
else return 1;
#endif
return 0;
}
int ahanneal = addHook(hooks_args, 100, anneal_read_args);
}
}

638
rogueviz/sag/cells.cpp Normal file
View File

@ -0,0 +1,638 @@
// RogueViz -- SAG embedder: cell constructor
// Copyright (C) 2011-24 Zeno Rogue, see 'hyper.cpp' for details
#include "../rogueviz.h"
#include <sys/mman.h>
#include <fcntl.h>
namespace rogueviz {
namespace sag {
namespace cells {
bool angular = false;
using subcell = pair<cell*, int>;
/** all the SAG cells */
vector<subcell> sagcells;
/** sagcells[ids[c]]] == c */
map<subcell, int> ids;
/** if i in neighbors[j], sagcells[i] is a neighbor of sagcells[j] */
vector<vector<int>> neighbors;
vector<hyperpoint> sagsubcell_point;
vector<transmatrix> sagsubcell_inv;
/** matrix for every sagcell, not subdivided */
vector<transmatrix> cell_matrix;
/** point for every sagcell */
vector<hyperpoint> cellpoint;
/** precision of geometric distances */
int gdist_prec;
/** max edge for dijkstra */
int dijkstra_maxedge;
/** dijkstra with tile distances */
bool dijkstra_tile;
string distance_file;
ld pdist(hyperpoint hi, hyperpoint hj);
/** the maximum value in sagdist +1 */
int max_sag_dist;
/** new style cell request */
int cell_request;
/** the structure type used to hold a N*N table of distances */
struct sagdist_t {
using distance = unsigned short;
distance* tab;
void* tabmap;
int fd;
size_t N;
int format;
distance* begin() { return tab; }
distance* end() { return tab+N*N; }
sagdist_t() { tab = nullptr; fd = 0; format = 2; }
distance* operator [] (int y) { return tab + N * y; }
void init(int _N, distance val) {
clear();
N = _N;
tab = new distance[N*N];
for(size_t i=0; i<N*N; i++) tab[i] = val;
}
void map(string fname) {
clear();
fd = open(fname.c_str(), O_RDONLY | O_LARGEFILE);
if(fd == -1) throw hr_exception("open failed in map");
read(fd, &N, 8);
tabmap = (distance*) mmap(nullptr, N*N*sizeof(distance)+8, PROT_READ, MAP_SHARED, fd, 0);
if(tabmap == MAP_FAILED) {
perror("mmap");
throw hr_exception("Mapping Failed\n");
}
tab = (distance*) (((char*)tabmap) + 8);
println(hlog, "test: ", test());
}
void load_old(string fname) {
vector<vector<distance>> old;
clear();
fhstream f(fname, "rb");
f.read(old);
init(isize(old), 0);
auto ptr = tab;
for(auto& row: old) for(auto val: row) *(ptr++) = val;
}
void load(string fname) {
if(format == 1) map(fname);
if(format == 2) load_old(fname);
throw hr_exception("sagdist format unknown");
}
vector<int> test() {
vector<int> ttab = {int(N)};
for(int a=0; a<4; a++) for(int b=0; b<4; b++) ttab.push_back((*this)[a][b]);
for(size_t a=N-4; a<N; a++) for(size_t b=N-4; b<N; b++) ttab.push_back((*this)[a][b]);
return ttab;
}
void save(string fname) {
fd = open(fname.c_str(), O_WRONLY | O_CREAT | O_TRUNC, 0666);
write(fd, &N, 8);
size_t size = N*N*sizeof(distance);
println(hlog, "size is ", hr::format("%zd", size));
char *p = (char*) tab;
while(size) {
size_t written = write(fd, p, size);
if(written <= 0) throw hr_exception("bad written");
p += written; size -= written;
}
println(hlog, "test: ", test());
::close(fd);
}
void clear() {
if(fd) { munmap(tabmap, N*N*sizeof(distance)+8); ::close(fd); }
else delete[] tab;
tab = nullptr; fd = 0;
}
~sagdist_t() {
clear();
}
};
sagdist_t sagdist;
vector<hyperpoint> subcell_points;
/** currently implemented only for Solv and Nil! */
void generate_subcellpoints() {
start_game();
subcell_points.clear();
println(hlog, currentmap->get_cellshape(cwt.at).vertices_only);
ld mx = 1, my = 1, mz = 1;
if(sol) mx = my = mz = log(2);
for(int x=0; x<4; x++)
for(int y=0; y<4; y++) if((x&1) == (y&1))
for(int z=0; z<4; z++) if((x&1) == (z&1)) {
subcell_points.push_back(point31(mx * (x+.5-2)/4, my * (y+.5-2)/4, mz * (z+.5-2)/4));
}
println(hlog, subcell_points);
}
void ensure_subcell_points() {
if(isize(subcell_points) <= 1) subcell_points = { C0 };
}
void compute_dists() {
int N = isize(sagcells);
neighbors.clear();
neighbors.resize(N);
int Q = isize(subcell_points);
for(int b=0; b<Q; b++)
for(int i=0; i<N; i++)
for(cell *c1: adj_minefield_cells(sagcells[i].first))
if(ids.count({c1,b})) neighbors[i].push_back(ids[{c1,b}]);
for(int i=0; i<N; i++) for(int b=0; b<Q; b++) if(b != sagcells[i].second)
neighbors[i].push_back(ids[{sagcells[i].first, b}]);
const ld ERRORV = -17.3;
transmatrix unknown = Id; unknown[0][0] = ERRORV;
cell_matrix.clear();
cell_matrix.resize(N, unknown);
cellpoint.clear();
cellpoint.resize(N, C0);
vector<int> visited;
auto visit = [&] (int id, const transmatrix& T) {
if(cell_matrix[id][0][0] != ERRORV) return;
cell_matrix[id] = T;
visited.push_back(id);
};
visit(0, Id);
for(int i=0; i<isize(visited); i++) {
cell *c0 = sagcells[visited[i]].first;
const transmatrix& T0 = cell_matrix[visited[i]];
for(int d=0; d<c0->type; d++)
if(ids.count({c0->move(d), 0}))
visit(ids[{c0->move(d), 0}], T0 * currentmap->adj(c0, d));
for(int q=0; q<Q; q++)
cellpoint[visited[i]/Q*Q+q] = T0 * subcell_points[q];
}
if(distance_file != "") {
sagdist.map(distance_file);
}
else if(gdist_prec && dijkstra_maxedge) {
sagdist.init(N, N);
println(hlog, "Computing Dijkstra distances...");
vector<vector<pair<int, ld>>> dijkstra_edges(N);
for(int i=0; i<N; i++) {
celllister cl(sagcells[i].first, dijkstra_maxedge, 50000, nullptr);
for(auto c1: cl.lst) for(int q=0; q<Q; q++) if(c1 != sagcells[i].first || q != sagcells[i].second) if(ids.count({c1, q}))
dijkstra_edges[i].emplace_back(ids[{c1, q}], pdist(cellpoint[i], cellpoint[ids[{c1, q}]]));
if(i == 0) println(hlog, i, " has ", isize(dijkstra_edges[i]), " edges");
}
parallelize(N, [&] (int a, int b) {
vector<ld> distances(N);
for(int i=a; i<b; i++) {
if(i % 500 == 0) println(hlog, "computing dijkstra for ", i , "/", N);
for(int j=0; j<N; j++) distances[j] = HUGE_VAL;
std::priority_queue<pair<ld, int>> pq;
auto visit = [&] (int i, ld dist) {
if(distances[i] <= dist) return;
distances[i] = dist;
pq.emplace(-dist, i);
};
visit(i, 0);
while(!pq.empty()) {
ld d = -pq.top().first;
int at = pq.top().second;
pq.pop();
for(auto e: dijkstra_edges[at]) visit(e.first, d + e.second);
}
for(int j=0; j<N; j++) sagdist[i][j] = distances[j] * gdist_prec + .5;
}
return 0;
}
);
println(hlog, "N0 = ", neighbors[0]);
println(hlog, "N1 = ", neighbors[1]);
}
else if(gdist_prec) {
sagdist.init(N, N);
println(hlog, "Computing distances... (N=", N, ")");
ld mx = 1;
for(int i=0; i<N; i++)
for(int j=0; j<N; j++) {
ld d = pdist(cellpoint[i], cellpoint[j]);
sagdist[i][j] = (d + .5) * gdist_prec;
if(d > mx) {
println(hlog, kz(cellpoint[i]), kz(cellpoint[j]), " :: ", mx = d);
}
}
}
else {
println(hlog, "no gdist_prec");
sagdist.init(N, N);
for(int i=0; i<N; i++) {
auto sdi = sagdist[i];
vector<int> q;
auto visit = [&] (int j, int dist) { if(sdi[j] < N) return; sdi[j] = dist; q.push_back(j); };
visit(i, 0);
for(int j=0; j<isize(q); j++) for(int k: neighbors[q[j]]) visit(k, sdi[q[j]]+1);
}
}
max_sag_dist = 0;
for(auto x: sagdist) max_sag_dist = max<int>(max_sag_dist, x);
max_sag_dist++;
println(hlog, "max_sag_dist = ", max_sag_dist);
}
bool legacy;
/* legacy method */
void init_snake(int n) {
sagcells.clear();
ids.clear();
auto enlist = [&] (cellwalker cw) {
ids[{cw.at, 0}] = isize(sagcells);
sagcells.emplace_back(cw.at, 0);
};
cellwalker cw = cwt;
enlist(cw);
cw += wstep;
enlist(cw);
for(int i=2; i<n; i++) {
cw += wstep;
while(ids.count({cw.at, 0})) {
cw = cw + wstep + 1 + wstep;
}
enlist(cw); cw += 1;
}
}
void init_cells_to_all() {
ensure_subcell_points();
sagcells.clear();
for(auto c: currentmap->allcells()) for(int i=0; i<isize(subcell_points); i++) {
ids[{c, i}] = isize(sagcells);
sagcells.emplace_back(c, i);
}
}
void compute_creq_neighbors() {
int SN = isize(sagcells);
neighbors.resize(SN);
vector<int> mindist_for(SN, 30000);
for(int i=0; i<SN; i++) {
auto& m = mindist_for[i];
for(int j=0; j<SN; j++) if(j != i) m = min<int>(m, sagdist[i][j]);
}
for(int i=0; i<SN; i++)
for(int j=0; j<SN; j++) if(i != j && sagdist[i][j] < mindist_for[i] + mindist_for[j]) neighbors[i].push_back(j);
max_sag_dist = 0;
for(auto x: sagdist) max_sag_dist = max<int>(max_sag_dist, x);
max_sag_dist++;
println(hlog, neighbors[0]);
hlog.flush();
}
vector<vector<pair<ld, subcell>>> dijkstra_edges;
void find_cells() {
println(hlog, "cellcount = ", cellcount);
ensure_subcell_points();
struct qitem {
ld dist; subcell sc; transmatrix T;
bool operator < (const qitem& b) const { return dist > b.dist + 1e-6; }
};
std::priority_queue<qitem> pq;
auto visit = [&] (subcell sc, ld dist, const transmatrix& T) {
if(ids.count(sc)) return;
pq.emplace(qitem{dist, sc, T});
};
sagsubcell_point.clear();
sagsubcell_inv.clear();
int Q = isize(subcell_points);
visit(subcell{cwt.at,0}, 0, Id);
ld maxdist0 = 0;
for(int i=0;; i++) {
if(pq.empty()) { println(hlog, "no more"); break; }
auto p = pq.top();
pq.pop();
ld dist = p.dist;
auto sc = p.sc;
transmatrix T = p.T;
if(ids.count(sc)) { i--; continue; }
if(i == cell_request-1) maxdist0 = dist;
if(i >= cell_request && dist > maxdist0 + 1e-6) break;
sagcells.push_back(sc);
sagsubcell_point.push_back(T * subcell_points[sc.second]);
sagsubcell_inv.push_back(inverse(T));
ids[sc] = i;
println(hlog, "cell ", i, " is ", sc, " at ", sagsubcell_point.back(), " in distance ", dist);
if(dijkstra_maxedge) {
dijkstra_edges.emplace_back();
auto& de = dijkstra_edges.back();
set<cell*> vis;
vector<tuple<cell*, transmatrix, int>> q;
auto visit1 = [&] (cell *c, transmatrix T, int d) {
if(vis.count(c)) return;
vis.insert(c);
q.emplace_back(c, T, d);
};
visit1(sc.first, Id, 0);
for(int i1=0; i1 < isize(q); i1++) {
cell *c = get<0>(q[i1]);
transmatrix T1 = get<1>(q[i1]);
int dist1 = get<2>(q[i1]);
if(dist1 < dijkstra_maxedge) for(int j=0; j<c->type; j++) {
cell *c1 = c->cmove(j);
visit1(c1, T1 * currentmap->adj(c, j), dist1+1);
}
for(int q=0; q<Q; q++) {
subcell sc1 {c, q};
ld ndist = dijkstra_tile ? dist1 : pdist(subcell_points[sc.second], T1 * subcell_points[q]);
de.push_back({ndist, sc1});
visit(sc1, dist + ndist, T*T1);
}
}
}
else {
for(int j=0; j<sc.first->type; j++) for(int k=0; k<Q; k++) {
cell *c1 = sc.first->cmove(j);
transmatrix T1 = T * currentmap->adj(sc.first, j);
visit(subcell{c1, k}, pdist(C0, T1*C0), T1);
}
}
}
int SN = isize(sagcells);
println(hlog, "number of cells found: ", SN, " dijkstra_maxedge = ", dijkstra_maxedge);
all_disk_cells_sorted = {};
for(auto p: ids) if(all_disk_cells_sorted.empty() || p.first.first != all_disk_cells_sorted.back()) all_disk_cells_sorted.push_back(p.first.first);
for(cell *c: all_disk_cells_sorted) c->mpdist = 0, c->land = laCanvas, c->landparam = 0x101010, c->wall = waNone;
}
void init_cell_request() {
println(hlog, "generating on cell request");
find_cells();
int SN = isize(sagcells);
sagdist.init(SN, 0);
if(!dijkstra_maxedge) {
println(hlog, "computing sagdist ...");
parallelize(SN, [&] (int a, int b) {
for(int i=a; i<b; i++) {
for(int j=0; j<SN; j++) {
ld dist = pdist(sagsubcell_point[i], sagsubcell_point[j]);
sagdist[i][j] = int(dist * gdist_prec + 0.5);
if(i < j && sagdist[i][j] == 0) println(hlog, "for ", tie(i,j), " pdist computed as ", dist);
}
}
return 0;
});
println(hlog, "... done");
}
else {
vector<vector<pair<ld, int>>> dijkstra_edges_2;
dijkstra_edges_2.resize(SN);
for(int i=0; i<SN; i++) for(auto p: dijkstra_edges[i]) if(ids.count(p.second)) dijkstra_edges_2[i].emplace_back(p.first, ids[p.second]);
parallelize(SN, [&] (int a, int b) {
vector<ld> distances(SN);
for(int i=a; i<b; i++) {
if(i % 500 == 0) println(hlog, "computing dijkstra for ", i , "/", SN);
for(int j=0; j<SN; j++) distances[j] = HUGE_VAL;
std::priority_queue<pair<ld, int>> pq;
auto visit = [&] (int i, ld dist) {
if(distances[i] <= dist) return;
distances[i] = dist;
pq.emplace(-dist, i);
};
visit(i, 0);
while(!pq.empty()) {
ld d = -pq.top().first;
int at = pq.top().second;
pq.pop();
for(auto e: dijkstra_edges_2[at]) {
// println(hlog, "move from ", at, " to ", e.first, " for ", d, "+", e.second);
visit(e.second, d + e.first);
}
}
for(int j=0; j<SN; j++) sagdist[i][j] = distances[j] * gdist_prec + .5;
}
return 0;
});
}
compute_creq_neighbors();
}
bool distance_only;
void init_cells() {
if(state & SS_CELLS) return;
sag::init();
state |= SS_CELLS;
if(cell_request) {
if(distance_file != "") {
println(hlog, "loading graph ", distance_file);
sagdist.map(distance_file);
if(distance_only) {
sagcells.resize(sagdist.N, subcell{nullptr, 0});
}
else {
find_cells();
}
compute_creq_neighbors();
return;
}
init_cell_request();
return;
}
else if(legacy) state |= SS_NEED_SNAKE;
else init_cells_to_all();
if(!cell_request) compute_dists();
}
void init_snake_if_needed() {
if(!(state & SS_NEED_SNAKE)) return;
state &=~ SS_NEED_SNAKE;
init_snake(2 * isize(sagid));
compute_dists();
}
ld pdist(hyperpoint hi, hyperpoint hj) {
if(sol && angular) {
return 10 * asinh(hypot_d(3, lie_log(shiftless(gpushxto0(hi) * hj))) / 10);
}
if(sol) return min(geo_dist(hi, hj), geo_dist(hj, hi));
if(mproduct && angular) {
auto di = product_decompose(hi);
auto dj = product_decompose(hj);
ld x = hdist(di.second, dj.second);
ld z = di.first - dj.first;
auto res = sqrt((x*x+z*z) * (x > 0 ? sinh(x) / x : 1));
return res;
}
return geo_dist(hi, hj);
};
void geo_stats() {
init_cells();
println(hlog, "counting sagdist, N=", int(sagdist.N), " max_sag_dist = ", max_sag_dist);
vector<short> sgdc(max_sag_dist, 0);
for(auto x: sagdist) sgdc[x]++;
println(hlog, "building sorted_sagdist");
vector<short> sorted_sagdist;
for(int i=0; i<max_sag_dist; i++) for(int j=0; j<sgdc[i]; j++) sorted_sagdist.push_back(i);
println(hlog, "computing min_max_nei");
int minnei = 500, maxnei = 0;
int SN = sagdist.N;
for(int i=0; i<SN; i++) for(int j: neighbors[i]) {
if(sagdist[i][j] < minnei) minnei = sagdist[i][j];
if(sagdist[i][j] > maxnei) maxnei = sagdist[i][j];
}
for(int i=0; i<3; i++) {
bool first = false;
#define out(x, y) if(i == 0) println(hlog, x, " = ", y); else if(first) print(hlog, ";"); first = true; if(i == 1) print(hlog, x); if(i == 2) print(hlog, y);
out("nodes", SN);
out("maxsagdist", max_sag_dist);
out("dim", (euclid && WDIM == 2 && euc::eu.user_axes[1][1] == 1) ? 1 : WDIM);
out("geometry", S3 >= OINF ? "tree" : hyperbolic ? "hyperbolic" : sphere ? "sphere" : euclid ? "euclid" : nil ? "nil" : sol ? "solv" : mproduct ? "product" : "other");
out("closed", max_sag_dist == isize(sagcells) ? 0 : closed_manifold ? 1 : 0);
out("angular", angular);
for(int p: {1, 10, 50}) { out(format("sagdist%02d", p), sorted_sagdist[(p * sorted_sagdist.size()) / 100]); }
out("minnei", minnei);
out("maxnei", maxnei);
out("neighbors", isize(neighbors[0]));
println(hlog);
#undef out
}
}
bool visualize_subcells_on = false;
bool visualize_subcells(cell *c, const shiftmatrix& V) {
if(!visualize_subcells_on) return false;
for(int i=0; i<isize(subcell_points); i++) {
auto p = at_or_null(ids, pair<cell*,int>{c, i});
if(!p) continue;
queuepolyat(V * rgpushxto0(subcell_points[i]), cgi.shSnowball, 0x80FF80FF, PPR::FLOORb);
for(auto nei: neighbors[*p]) if(nei<*p) {
queueline(V * subcell_points[i], V * sagsubcell_inv[*p] * sagsubcell_point[nei], 0x8000FF, 3).prio = PPR::FLOORa;
}
}
return false;
}
int cell_read_args() {
#if CAP_COMMANDLINE
using namespace arg;
if(0) ;
else if(argis("-sag_gdist")) {
shift(); gdist_prec = argi();
}
else if(argis("-sag_gdist_dijkstra")) {
shift(); dijkstra_maxedge = argi(); dijkstra_tile = false;
}
else if(argis("-sag-dtile")) {
dijkstra_tile = true; dijkstra_maxedge = 1;
}
else if(argis("-sag_gdist_save")) {
shift();
sagdist.save(args());
}
else if(argis("-sag_gdist_load")) {
distance_only = false;
shift(); distance_file = args();
}
else if(argis("-sag-gdist_load1")) {
distance_only = true;
shift(); distance_file = args();
}
else if(argis("-sag-angular")) {
shift(); angular = argi();
}
else if(argis("-sag-geo-stats")) geo_stats();
else if(argis("-sag-creq")) {
shift(); cell_request = argi();
}
else if(argis("-sag-initcells")) {
init_cells();
}
else if(argis("-gen-subcellpoints")) {
generate_subcellpoints();
}
/* to viz only subcellpoints */
else if(argis("-sag-clear")) {
shmup::monstersAt.clear();
}
else return 1;
#endif
return 0;
}
int ah = addHook(hooks_args, 100, cell_read_args);
}
}
}

251
rogueviz/sag/continuous.cpp Normal file
View File

@ -0,0 +1,251 @@
#include "../rogueviz.h"
#include <sys/mman.h>
#include <fcntl.h>
namespace rogueviz {
namespace sag {
bool embedding;
dhrg::logistic lgemb(1, 1);
vector<hyperpoint> placement;
int embiter;
ld pdist(int i, int j) {
return pdist(placement[i], placement[j]);
};
void disttable_add(ld dist, int qty0, int qty1) {
using namespace dhrg;
size_t i = dist * llcont_approx_prec;
constexpr array<ll, 2> zero = {0, 0};
while(disttable_approx.size() <= i) disttable_approx.push_back(zero);
disttable_approx[i][0] += qty0;
disttable_approx[i][1] += qty1;
}
void compute_loglik() {
dhrg::llcont_approx_prec = 10;
dhrg::disttable_approx.clear();
int DN = isize(sagid);
for(int i=0; i<DN; i++)
for(int j=0; j<i; j++)
disttable_add(pdist(i, j), 1, 0);
for(int i=0; i<isize(sagedges); i++) {
edgeinfo& ei = sagedges[i];
if(ei.i != ei.j)
disttable_add(pdist(ei.i, ei.j), -1, 1);
}
dhrg::logisticfun lc = dhrg::loglik_cont_approx;
dhrg::fast_loglik_cont(lgemb, lc, nullptr, 1, 1e-5);
println(hlog, "loglik = ", format("%.6f", lc(lgemb)), " R = ", lgemb.R, " T = ", lgemb.T, " iterations = ", embiter);
}
void prepare_embedding() {
map<int, transmatrix> maps;
vector<int> visited;
auto visit = [&] (int id, const transmatrix& T) {
if(maps.count(id)) return;
maps[id] = T;
visited.push_back(id);
};
visit(0, Id);
for(int i=0; i<isize(visited); i++) {
cell *c0 = sagcells[i].first;
transmatrix T0 = maps[i];
for(int d=0; d<c0->type; d++)
if(ids.count({c0->move(d), 0}))
visit(ids[{c0->move(d), 0}], T0 * currentmap->adj(c0, d));
}
int DN = isize(sagid);
placement.resize(DN);
for(int i=0; i<DN; i++) placement[i] = tC0(maps[sagid[i]]);
}
void reassign_embedding() {
int DN = isize(sagid);
for(int i=0; i<DN; i++) {
vdata[i].m->base = sagcells[0].first;
vdata[i].m->at = rgpushxto0(placement[i]);
virtualRebase(vdata[i].m);
forgetedges(i);
}
shmup::fixStorage();
}
void improve_embedding() {
embiter++;
if(placement.empty()) {
prepare_embedding();
compute_loglik();
}
ld eps = .1;
int DN = isize(sagid);
hyperpoint h = C0;
for(int i=0; i<WDIM; i++) h[i] += (hrandf() - 0.5) * eps;
h = normalize(h);
auto nplacement = placement;
parallelize(DN, [&] (int a, int b) {
for(int i=a; i<b; i++) {
hyperpoint np = rgpushxto0(placement[i]) * h;
ld change;
for(auto e: edges_yes[i]) change -= lgemb.lyes(pdist(placement[i], placement[e]));
for(auto e: edges_no[i]) change -= lgemb.lno(pdist(placement[i], placement[e]));
for(auto e: edges_yes[i]) change += lgemb.lyes(pdist(np, placement[e]));
for(auto e: edges_no[i]) change += lgemb.lno(pdist(np, placement[e]));
if(change > 0) nplacement[i] = np;
}
return 0;
});
placement = nplacement;
}
void save_embedding(const string& fname) {
fhstream f(fname, "wt");
for(int i=0; i<isize(sagid); i++) {
println(f, vdata[i].name);
for(int d=0; d<MDIM; d++)
println(f, format("%.20f", placement[i][d]));
}
}
void load_embedding(const string& fname) {
prepare_embedding();
fhstream f(fname, "rt");
if(informat == 2) {
/* H2 embedding */
while(!feof(f.f)) {
string lab = scan<string>(f);
int id;
if(!labeler.count(lab)) {
printf("unknown vertex: %s\n", lab.c_str());
continue;
}
else id = getid(lab);
ld alpha, r;
if(1) {
dynamicval<eGeometry> g(geometry, gNormal);
hyperpoint h;
for(int d=0; d<MDIM; d++) h[d] = scan<ld>(f);
alpha = atan2(h);
r = hdist0(h);
println(hlog, "read ", lab, " as ", h, " which is ", tie(alpha, r));
}
placement[id] = direct_exp(cspin(0, 2, alpha) * ctangent(0, r));
println(hlog, "dist = ", pdist(placement[id], C0), " expected: ", r);
}
}
else if(informat == 3) {
/* BFKL */
string ignore;
if(!scan(f, ignore, ignore, ignore, ignore, ignore, ignore, ignore, ignore)) {
printf("Error: incorrect format of the first line\n"); exit(1);
}
while(true) {
string lab = scan<string>(f);
if(lab == "" || lab == "#ROGUEVIZ_ENDOFDATA") break;
ld r, alpha;
if(!scan(f, r, alpha)) { printf("Error: incorrect format of r/alpha\n"); exit(1); }
hyperpoint h = spin(alpha * degree) * xpush0(r);
if(!labeler.count(lab)) {
printf("unknown vertex: %s\n", lab.c_str());
}
else {
int id = getid(lab);
placement[id] = h;
}
}
}
else if(informat == 4) {
while(true) {
string lab = scan<string>(f);
if(lab == "") break;
ld r, alpha;
if(!scan(f, r, alpha)) { printf("Error: incorrect format of r/alpha\n"); exit(1); }
hyperpoint h = spin(alpha) * xpush0(r);
if(!labeler.count(lab)) {
printf("unknown vertex: %s\n", lab.c_str());
}
else {
int id = getid(lab);
placement[id] = h;
}
}
}
else {
while(!feof(f.f)) {
string lab = scan<string>(f);
int id;
if(!labeler.count(lab)) {
printf("unknown vertex: %s\n", lab.c_str());
continue;
}
else id = getid(lab);
hyperpoint h;
for(int d=0; d<MDIM; d++) h[d] = scan<ld>(f);
placement[id] = h;
}
}
reassign_embedding();
compute_loglik();
}
int embturn = 1;
void embedding_iterate() {
int t1 = SDL_GetTicks();
for(int i=0; i<embturn; i++) {
improve_embedding();
}
int t2 = SDL_GetTicks();
int t = t2 - t1;
if(t < 50) embturn *= 2;
else if(t > 200) embturn = (embturn + 1) / 2;
else embturn = (embturn * 100 + (t-1)) / t;
compute_loglik();
if(auto_visualize) reassign_embedding();
}
int embed_read_args() {
#if CAP_COMMANDLINE
using namespace arg;
if(0) ;
else if(argis("-sagembed")) {
sag::embedding = true;
}
else if(argis("-sagembedoff")) {
sag::embedding = false;
}
else if(argis("-sagsavee")) {
PHASE(3); shift(); sag::save_embedding(args());
}
else if(argis("-sagloade")) {
PHASE(3); shift(); sag::load_embedding(args());
}
else return 1;
#endif
return 0;
}
int ahembed = addHook(hooks_args, 100, embed_read_args);
}
}

399
rogueviz/sag/data.cpp Normal file
View File

@ -0,0 +1,399 @@
// RogueViz -- SAG embedder: data manager
// Copyright (C) 2011-24 Zeno Rogue, see 'hyper.cpp' for details
#include "../rogueviz.h"
#include <sys/mman.h>
#include <fcntl.h>
namespace rogueviz {
namespace sag {
using namespace cells;
edgetype *sag_edge;
/** if this is true, no nodes are allowed to be on the same subcell */
bool allow_doubles = false;
/** node i is on sagcells[sagid[i]] */
vector<int> sagid;
/** what node is on sagcells[i] (need loglik_repeat to be off) */
vector<int> sagnode;
/* separate hubs -- only for smClosest */
ld hub_penalty;
string hub_filename;
vector<int> hubval;
vector<edgeinfo> sagedges;
vector<vector<int>> edges_yes, edges_no;
ld edgepower=1, edgemul=1;
void init();
void compute_cost();
void prepare_graph() {
int DN = isize(sagid);
println(hlog, "prepare_graph with DN = ", DN);
set<pair<int, int>> alledges;
for(auto e: sagedges) {
if(e.i == e.j) continue;
alledges.emplace(e.i, e.j);
alledges.emplace(e.j, e.i);
}
edges_yes.clear(); edges_yes.resize(DN);
edges_no.clear(); edges_no.resize(DN);
for(int i=0; i<DN; i++) for(int j=0; j<DN; j++) if(i != j) {
if(alledges.count({i, j}))
edges_yes[i].push_back(j);
else
edges_no[i].push_back(j);
}
sagnode.clear();
sagnode.resize(isize(sagcells), -1);
for(int i=0; i<DN; i++)
sagnode[sagid[i]] = i;
compute_cost();
}
void set_inverse();
void place_correctly() {
int DN = isize(sagid);
vector<int> qon(isize(sagcells), 0);
for(int i=0; i<DN; i++) qon[sagid[i]]++;
vector<int> qsf(isize(sagcells), 0);
ld rad = .25 * cgi.scalefactor;
if(isize(subcell_points) > 1) rad /= pow(isize(subcell_points), WDIM);
for(int i=0; i<DN; i++) {
int ci = sag::sagid[i];
vdata[i].m->base = sagcells[ci].first;
vdata[i].m->at = Id;
if(allow_doubles) vdata[i].m->at =
spin(TAU*(qsf[ci]++) / qon[ci]) * xpush(rad * (qon[ci]-1) / qon[ci]);
if(isize(subcell_points) > 1)
vdata[i].m->at = rgpushxto0(subcell_points[sagcells[ci].second]) * vdata[i].m->at;
}
}
bool visualization_active;
void forgetedges(int id) {
for(int i=0; i<isize(vdata[id].edges); i++)
vdata[id].edges[i].second->orig = NULL;
}
void create_viz() {
if(distance_only) return;
int DN = isize(sagid);
bool vact = state & SS_GRAPH;
state |= SS_GRAPH;
if(!vact) for(int i=0; i<DN; i++) vdata[i].data = 0;
if(!vact) for(int i=0; i<isize(sagedges); i++) {
edgeinfo& ei = sagedges[i];
ei.weight2 = pow((double) ei.weight, (double) edgepower) * edgemul;
addedge0(ei.i, ei.j, &ei);
}
if(sagcells[0].first == nullptr) return;
if(vact) for(int i=0; i<DN; i++) forgetedges(i);
if(!vact) for(int i=0; i<DN; i++) {
vertexdata& vd = vdata[i];
vd.cp = colorpair(dftcolor);
rogueviz::createViz(i, sagcells[sagid[i]].first, Id);
}
place_correctly();
if(!vact) storeall();
if(vact) shmup::fixStorage();
set_inverse();
vact = true;
}
/** save the SAG solution (sagid) */
void save_sag_solution(const string& fname) {
if(!(state & SS_DATA)) throw hr_exception("save_sag_solution with no data");
FILE *f = fopen(fname.c_str(), "wt");
if(!f) throw hr_exception("failed to save SAG solution");
for(int i=0; i<isize(sagid); i++)
fprintf(f, "%s;%d\n", vdata[i].name.c_str(), sagid[i]);
fclose(f);
}
/** load the SAG solution (sagid) */
void load_sag_solution(const string& fname) {
if(!(state & SS_DATA)) throw hr_exception("load_sag_solution with no data");
printf("Loading the sag from: %s\n", fname.c_str());
FILE *sf = fopen(fname.c_str(), "rt");
if(!sf) throw hr_exception("failed to load SAG solution");
int SN = isize(sagcells);
if(sf) while(true) {
string lab;
while(true) {
int c = fgetc(sf);
if(c == EOF) goto afterload;
else if(c == ',' || c == ';') break;
else if(rv_ignore(c)) ;
else lab += c;
}
int sid = -1;
int err = fscanf(sf, "%d", &sid);
if(sid < 0 || sid >= SN || err < 1) sid = -1;
if(!labeler.count(lab)) {
printf("unknown vertex: %s\n", lab.c_str());
}
else {
int id = getid(lab);
sagid[id] = sid;
}
}
afterload:
if(sf) fclose(sf);
prepare_graph();
create_viz();
}
void load_sag_solution_basic(const string& fname) {
if(!(state & SS_DATA)) throw hr_exception("load_sag_solution_basic with no data");
FILE *f = fopen(fname.c_str(), "rt");
for(auto& i: sagid) fscanf(f, "%d", &i);
fclose(f);
println(hlog, "loaded sagid = ", sagid);
prepare_graph();
create_viz();
}
void after_data() {
state |= SS_DATA;
init_snake_if_needed();
int DN = isize(vdata);
int SN = isize(sagcells);
if(SN < DN) {
println(hlog, "SN = ", SN, " DN = ", DN);
throw hr_exception("not enough cells for SAG");
}
sagid.resize(DN);
for(int i=0; i<DN; i++) sagid[i] = i;
prepare_graph();
create_viz();
}
/** load all the edges */
void read_weighted(const char *fname) {
println(hlog, "read_weighted called");
if(state & SS_DATA) return;
state |= SS_WEIGHTED;
init_cells();
maxweight = 0;
fhstream f(fname, "rt");
if(!f.f) throw hr_exception("readsag_weighted: failed to open");
while(!feof(f.f)) {
string l1, l2;
while(true) {
int c = fgetc(f.f);
if(c == EOF) goto after;
else if(c == ';') break;
else if(rv_ignore(c)) ;
else l1 += c;
}
while(true) {
int c = fgetc(f.f);
if(c == EOF) goto after;
else if(c == ';') break;
else if(rv_ignore(c)) ;
else l2 += c;
}
ld wei;
if(!scan(f, wei)) continue;
edgeinfo ei(sag_edge);
ei.i = getid(l1);
ei.j = getid(l2);
ei.weight = wei;
sagedges.push_back(ei);
}
after:
println(hlog, "weighted graph ", fname, " read successfully");
after_data();
}
/** load edges, in */
void read_unweighted(const char *fname) {
if(state & SS_DATA) return;
init_cells();
fhstream f(fname, "rt");
if(!f.f) throw hr_exception("readsag_weighted: failed to open");
scanline(f);
set<pair<int, int> > edges;
int all = 0, good = 0;
while(!feof(f.f)) {
string l1 = scan<string>(f);
string l2 = scan<string>(f);
if(l1 == "") continue;
if(l2 == "") continue;
edgeinfo ei(sag_edge);
ei.i = getid(l1);
ei.j = getid(l2);
if(ei.i > ei.j) swap(ei.i, ei.j);
all++;
if(edges.count({ei.i, ei.j})) continue;
good++;
edges.emplace(ei.i, ei.j);
ei.weight = 1;
sagedges.push_back(ei);
}
println(hlog, "unweighted graph ", fname, " read successfully");
println(hlog, "N = ", isize(vdata), " edges = ", good, "/", all);
after_data();
}
void read_hubs(const string& fname) {
if(!(state && SS_DATA)) throw hr_exception("read_hubs with no data");
hubval.resize(isize(vdata), -1);
fhstream f(fname, "rt");
if(!f.f) { printf("Failed to open hub file: %s\n", fname.c_str()); exit(1); }
println(hlog, "loading hubs: ", fname);
while(!feof(f.f)) {
string l1, l2;
while(true) {
int c = fgetc(f.f);
if(c == EOF) return;
else if(c == ';') break;
else if(rv_ignore(c)) ;
else l1 += c;
}
while(true) {
int c = fgetc(f.f);
if(c == EOF) return;
else if(c == ';') return;
else if(rv_ignore(c)) break;
else l2 += c;
}
if(!id_known(l1)) {
printf("label unknown: %s\n", l1.c_str());
exit(1);
}
hubval[getid(l1)] = atoi(l2.c_str());
}
}
void generate_fake_data(int n, int m) {
if(state & SS_DATA) return;
init_cells();
state |= SS_WEIGHTED;
sagid.resize(n);
for(int i=0; i<n; i++) sagid[i] = i;
hrandom_shuffle(sagid);
if(m > n || m < 0) throw hr_exception("generate_fake_data parameters incorrect");
sagid.resize(m);
int DN = isize(sagid);
vdata.resize(DN);
for(int i=0; i<DN; i++)
vdata[i].name = its(i) + "@" + its(sagid[i]);
sag_edge = add_edgetype("SAG edge");
for(int i=0; i<DN; i++)
for(int j=i+1; j<DN; j++) {
edgeinfo ei(sag_edge);
ei.i = i;
ei.j = j;
ei.weight = 1. / sagdist[sagid[i]][sagid[j]];
sagedges.push_back(ei);
}
after_data();
for(int i=0; i<DN; i++) {
color_t col = ccolor::formula(sagcells[sagid[i]].first);
col <<= 8;
col |= 0xFF;
vdata[i].cp.color1 = vdata[i].cp.color2 = col;
}
}
int data_read_args() {
#if CAP_COMMANDLINE
using namespace arg;
if(0) ;
else if(argis("-sagmin")) {
auto& ed = sag_edge ? *sag_edge : default_edgetype;
shift_arg_formula(ed.visible_from);
ed.visible_from_hi = ed.visible_from;
}
else if(argis("-sagminhi")) {
auto& ed = sag_edge ? *sag_edge : default_edgetype;
shift_arg_formula(ed.visible_from_hi);
}
else if(argis("-sag-edgepower")) {
shift_arg_formula(sag::edgepower);
shift_arg_formula(sag::edgemul);
}
else if(argis("-sag-weighted")) {
PHASE(3);
shift(); sag::read_weighted(argcs());
}
else if(argis("-sag-unweighted")) {
PHASE(3);
shift(); sag::read_unweighted(argcs());
}
else if(argis("-saghubs")) {
PHASE(3);
shift_arg_formula(sag::hub_penalty);
shift(); sag::read_hubs(argcs());
}
else if(argis("-sag-generate")) {
PHASE(3);
shift(); int n = argi();
shift(); int m = argi();
sag::generate_fake_data(n, m);
}
// (3) load the initial positioning
else if(argis("-sag-load-sol")) {
PHASE(3); shift(); sag::load_sag_solution(args());
}
else if(argis("-sag-load-solution")) {
PHASE(3); shift(); sag::load_sag_solution_basic(args());
}
else if(argis("-sag-save-sol")) {
PHASE(3); shift(); sag::save_sag_solution(args());
}
else return 1;
#endif
return 0;
}
int ahdata = addHook(hooks_args, 100, data_read_args);
}
}

View File

@ -0,0 +1,299 @@
// RogueViz -- SAG embedder: some experiments
// Copyright (C) 2011-24 Zeno Rogue, see 'hyper.cpp' for details
#include "../rogueviz.h"
#include <sys/mman.h>
#include <fcntl.h>
namespace rogueviz {
namespace sag {
int recover_from;
vector<pair<dhrg::logistic, ld>> results;
ld bestcost;
int logid;
int lastmethod = 0;
int mul_used;
bool optimized_embedding(int mul, ld bonus = 0) {
if(logid < recover_from) { println(hlog, "skipped ", logid++, " due to recover"); return false; }
println(hlog, "starting, logid = ", logid, " recover_from = ", recover_from, " R = ", best.R+bonus, " T = ", best.T);
int DN = isize(sagid);
mul_used = mul;
lgsag_pre = best;
lgsag_pre.R += bonus;
lgsag = lgsag_pre;
compute_loglik_tab(); compute_cost();
dofullsa_iterations(mul * DN);
optimize_sag_loglik_logistic();
hlog.flush();
while(true) {
int ch = hillclimb();
println(hlog, "changes = ", ch, " cost = ", cost, " R=", lgsag.R, " T=", lgsag.T);
if(!ch) break;
optimize_sag_loglik_logistic();
hlog.flush();
}
bool new_best = cost < bestcost;
if(new_best) best = lgsag, bestcost = cost;
sag::output_stats();
results.emplace_back(lgsag, cost);
return new_best;
}
void sag_new_experiment() {
view_each = 999999;
println(hlog, "SAG new experiment started");
int DN = isize(sagid);
println(hlog, "N = ", DN);
hlog.flush();
twoway = true; allow_doubles = true;
method = smLogistic;
if(recover_from) lgsag = best;
if(!recover_from) lgsag.R = max_sag_dist;
if(!recover_from) lgsag.T = 1;
compute_loglik_tab();
compute_cost();
best = lgsag; bestcost = HUGE_VAL;
for(int i=10; i<=1; i--)
optimized_embedding(15000, i);
for(int i=1; i<=10; i++)
optimized_embedding(15000);
for(int i=1; i<=10; i++)
optimized_embedding(15000, -i);
optimized_embedding(24000);
optimized_embedding(32000);
optimized_embedding(40000);
optimized_embedding(48000);
optimized_embedding(60000);
optimized_embedding(80000);
optimized_embedding(100000);
optimized_embedding(120000);
optimized_embedding(120000);
optimized_embedding(120000);
}
void sag_v5() {
println(hlog, "SAG v5 started");
int DN = isize(sagid);
println(hlog, "N = ", DN);
recost_each = DN; autofix_rt = 2;
hlog.flush();
twoway = true; allow_doubles = true;
method = smLogistic;
lgsag.R = max_sag_dist;
lgsag.T = 1;
compute_loglik_tab();
compute_cost();
best = lgsag; bestcost = HUGE_VAL;
for(int i=0; i<30; i++) optimized_embedding(10000);
}
void sag_v6() {
println(hlog, "SAG v6 started");
int DN = isize(sagid);
println(hlog, "N = ", DN);
recost_each = DN; autofix_rt = 2;
hlog.flush();
twoway = true; allow_doubles = true;
method = smLogistic;
lgsag.R = max_sag_dist;
lgsag.T = 1;
compute_loglik_tab();
compute_cost();
best = lgsag; bestcost = HUGE_VAL;
for(int i=0; i<30; i++) optimized_embedding(100000);
autofix_rt = 1;
for(int i=0; i<30; i++) optimized_embedding(100000);
autofix_rt = 0;
for(int i=0; i<30; i++) optimized_embedding(100000);
}
void sag_test_mul() {
allow_doubles = true; twoway = true;
int DN = isize(sagid);
// lgsag.R=9.19925; lgsag.T=0.587723;
method = smLogistic;
lgsag.R = max_sag_dist;
lgsag.T = 1;
compute_loglik_tab();
compute_cost();
best = lgsag; bestcost = 999999;
recost_each = DN; autofix_rt = 2;
output_fullsa = false;
println(hlog, "sag_test_mul started");
if(1) for(int mul=25;; mul *= 2) for(int af: {3, 2, 1, 0}) {
autofix_rt = af;
ld tcost = 0;
ld tcost2 = 0;
int qty = 100;
vector<ld> costs;
// println(hlog, "R=", best.R, " T=", best.T);
for(int i=0; i<qty; i++) {
println(hlog, tie(mul, af, i));
lgsag = best; compute_loglik_tab(); compute_cost();
sag::dofullsa_iterations(mul * DN);
sag::optimize_sag_loglik_logistic();
checkmark_cost = HUGE_VAL;
while(sag::checkmark_hillclimb()) sag::optimize_sag_loglik_logistic();
tcost += cost;
tcost2 += cost * cost;
costs.push_back(cost);
bool new_best = cost < bestcost;
if(new_best) best = lgsag, bestcost = cost;
// println(hlog, "CSV;", mul, ";", af, ";", i, ";", cost);
}
sort(costs.begin(), costs.end());
println(hlog, "mul=", mul, " AF=", autofix_rt, " ECost=", tcost/qty, " sigma Cost = ", sqrt(tcost2/qty - tcost*tcost/qty/qty), " : ", costs);
}
auto tpair = [&] (int lt, int ht) {
ld tcost = 0;
for(int i=0; i<20; i++) {
sagnode.clear();
sagnode.resize(isize(sagcells), -1);
for(int i=0; i<DN; i++) sagid[i] = i;
for(int i=0; i<DN; i++) sagnode[i] = i;
compute_cost();
lowtemp = 20;
hightemp = 20;
sag::dofullsa_iterations(20 * DN);
lowtemp = lt;
hightemp = ht;
sag::dofullsa_iterations(1000 * DN);
tcost += cost;
}
println(hlog, "lt=", lt, " ht=", ht, " tcost=", tcost);
};
if(false)
for(int lt: {-3, -5, -10, -15, -20, -25})
for(int ht: {-2, -1, 0, 1, 2, 5, 10})
tpair(lt, ht);
}
void write_colors(string s) {
auto_orth(true);
if(s == "-") return;
fhstream f(s, "wt");
for(int i=0; i<isize(vdata); i++) {
println(f, vdata[i].name, ",", format("%8x", vdata[i].cp.color1));
}
}
void sag_new_experiment_viz() {
view_each = 999999;
println(hlog, "SAG new experiment started");
int DN = isize(sagid);
println(hlog, "N = ", DN);
hlog.flush();
twoway = true; allow_doubles = true;
method = smLogistic;
lgsag.R = max_sag_dist;
lgsag.T = 1;
compute_loglik_tab();
compute_cost();
best = lgsag; bestcost = HUGE_VAL;
optimized_embedding(15000, 0);
optimized_embedding(15000, 0);
}
bool report_tempi = false;
string auto_save;
void output_stats() {
if(auto_save != "" && cost < best_cost) {
println(hlog, "cost ", cost, " beats ", best_cost);
best_cost = cost;
sag::save_sag_solution(auto_save);
}
println(hlog, "solution: ", sagid);
int DN = isize(sagid);
auto [mAP, MeanRank] = compute_mAP();
dhrg::iddata routing_result;
if(!known_pairs) { known_pairs = true; dhrg::prepare_pairs(DN, [] (int i) { return edges_yes[i]; }); }
dhrg::greedy_routing(routing_result, [] (int i, int j) { return sagdist[sagid[i]][sagid[j]]; });
print(hlog, "CSV;", logid++, ";", isize(sagnode), ";", DN, ";", isize(sagedges), ";", lgsag_pre.R, ";", lgsag_pre.T, ";", lgsag.R, ";", lgsag.T, ";", cost, ";", mAP, ";", MeanRank, ";", routing_result.suc / routing_result.tot, ";", routing_result.routedist / routing_result.bestdist);
if(lastmethod) print(hlog, ";", lastmethod);
if(mul_used) print(hlog, ";", mul_used);
if(report_tempi) print(hlog, ";", hightemp,";",lowtemp,";",format("%lld", numiter));
println(hlog);
}
int exp_read_args() {
#if CAP_COMMANDLINE
using namespace arg;
if(0) ;
else if(argis("-sag-new")) sag_new_experiment();
else if(argis("-sag-v5")) sag_v5();
else if(argis("-sag-v6")) sag_v6();
else if(argis("-sag-new-viz")) sag_new_experiment_viz();
else if(argis("-sag-test-mul")) sag_test_mul();
else if(argis("-sag-write-colors")) {
shift(); write_colors(args());
}
else if(argis("-sag-recover")) {
shift(); best.R = argf();
shift(); best.T = argf();
shift(); bestcost = argf();
shift(); recover_from = argi();
println(hlog, "set recover_from to ", recover_from);
// 58.6509;7.08961;26492.1
}
else if(argis("-sag-load-solution")) {
PHASE(3); shift(); sag::load_sag_solution_basic(args());
method = smLogistic;
lgsag.R = max_sag_dist;
lgsag.T = 1;
opt_debug = true;
twoway = true; allow_doubles = true;
optimize_sag_loglik_auto();
}
else if(argis("-sagstats-logid")) {
shift(); logid = argi();
}
else if(argis("-sag0")) {
sag::report_tempi = true;
numiter = 0;
}
else if(argis("-sagsave-auto")) {
PHASE(3); shift(); auto_save = args();
}
else if(argis("-sagstats")) {
output_stats();
}
else return 1;
#endif
return 0;
}
int ahexp = addHook(hooks_args, 100, exp_read_args);
}
}

312
rogueviz/sag/functions.cpp Normal file
View File

@ -0,0 +1,312 @@
// RogueViz -- SAG embedder: implementation of SAG method evaluation
// Copyright (C) 2011-24 Zeno Rogue, see 'hyper.cpp' for details
#include "../rogueviz.h"
#include "../dhrg/dhrg.h"
#include "../leastsquare.cpp"
namespace rogueviz {
namespace sag {
enum eSagMethod { smClosest, smLogistic, smMatch };
eSagMethod method;
vector<string> method_names = {"closest", "logistic", "match"};
int method_count = 3;
/* parameters for smMatch */
ld match_a = 1, match_b = 0;
/* parameters for smLogistic */
dhrg::logistic lgsag(1, 1), lgsag_pre(1, 1);
vector<ld> loglik_tab_y, loglik_tab_n;
dhrg::logistic best;
bool opt_debug = false;
bool should_good = false;
double costat(int vid, int sid) {
if(vid < 0) return 0;
double cost = 0;
switch(method) {
case smLogistic: {
auto s = sagdist[sid];
for(auto j: edges_yes[vid]) if(sagid[j] >= -1)
cost += loglik_tab_y[s[sagid[j]]];
for(auto j: edges_no[vid]) if(sagid[j] >= -1)
cost += loglik_tab_n[s[sagid[j]]];
return -cost;
}
case smMatch: {
vertexdata& vd = vdata[vid];
for(int j=0; j<isize(vd.edges); j++) {
edgeinfo *ei = vd.edges[j].second;
int t2 = vd.edges[j].first;
if(sagid[t2] != -1) {
ld cdist = sagdist[sid][sagid[t2]];
ld expect = match_a / ei->weight2 + match_b;
ld dist = cdist - expect;
cost += dist * dist;
}
}
return cost;
}
case smClosest: {
vertexdata& vd = vdata[vid];
for(int j=0; j<isize(vd.edges); j++) {
edgeinfo *ei = vd.edges[j].second;
int t2 = vd.edges[j].first;
if(sagid[t2] != -1) cost += sagdist[sid][sagid[t2]] * ei->weight2;
}
if(!hubval.empty()) {
for(auto sid2: neighbors[sid]) {
int vid2 = sagnode[sid2];
if(vid2 >= 0 && (hubval[vid] & hubval[vid]) == 0)
cost += hub_penalty;
}
}
return cost;
}
}
throw hr_exception("unknwon SAG method");
}
double cost;
double best_cost = 1000000000;
void compute_cost() {
int DN = isize(sagid);
cost = 0;
for(int i=0; i<DN; i++)
cost += costat(i, sagid[i]);
cost /= 2;
}
void compute_loglik_tab() {
loglik_tab_y.resize(max_sag_dist);
loglik_tab_n.resize(max_sag_dist);
for(int i=0; i<max_sag_dist; i++) {
loglik_tab_y[i] = lgsag.lyes(i);
loglik_tab_n[i] = lgsag.lno(i);
}
}
void compute_auto_rt() {
ld sum0 = 0, sum1 = 0, sum2 = 0;
for(auto i: sagdist) {
sum0 ++;
sum1 += i;
sum2 += i*i;
}
lgsag.R = sum1 / sum0;
lgsag.T = sqrt((sum2 - sum1*sum1/sum0) / sum0);
println(hlog, "automatically set R = ", lgsag.R, " and ", lgsag.T, " max_sag_dist = ", max_sag_dist);
if(method == smLogistic) compute_loglik_tab();
}
void optimize_sag_loglik_logistic() {
vector<int> indist(max_sag_dist, 0);
const int mul = 1;
int N = isize(sagid);
for(int i=0; i<N; i++)
for(int j=0; j<i; j++) {
int d = sagdist[sagid[i]][sagid[j]];
indist[d]++;
}
vector<int> pedge(max_sag_dist, 0);
for(int i=0; i<isize(sagedges); i++) {
edgeinfo& ei = sagedges[i];
// if(int(sagdist[sagid[ei.i]][sagid[ei.j]] * mul) == 136) printf("E %d,%d\n", ei.i, ei.j);
if(ei.i != ei.j)
if(ei.weight >= sag_edge->visible_from)
pedge[sagdist[sagid[ei.i]][sagid[ei.j]] * mul]++;
}
if(opt_debug) for(int d=0; d<max_sag_dist; d++)
if(indist[d])
printf("%2d: %7d/%7d %7.3lf\n",
d, pedge[d], indist[d], double(pedge[d] * 100. / indist[d]));
ld loglik = 0;
for(int d=0; d<max_sag_dist; d++) {
int p = pedge[d], pq = indist[d];
int q = pq - p;
if(p && q) {
loglik += p * log(p) + q * log(q) - pq * log(pq);
if(opt_debug) println(hlog, tie(d, p, q), loglik);
}
}
if(opt_debug) println(hlog, "loglikelihood best = ", fts(loglik));
auto logisticf = [&] (dhrg::logistic& l) {
ld loglik = 0;
for(int d=0; d<max_sag_dist; d++) {
int p = pedge[d], pq = indist[d];
if(p) loglik += p * l.lyes(d);
if(pq > p) loglik += (pq-p) * l.lno(d);
}
return loglik;
};
if(opt_debug) println(hlog, "cost = ", cost, " logisticf = ", logisticf(lgsag), " R= ", lgsag.R, " T= ", lgsag.T);
if(should_good && abs(cost + logisticf(lgsag)) > 0.1) throw hr_exception("computation error");
dhrg::fast_loglik_cont(lgsag, logisticf, nullptr, 1, 1e-5);
if(opt_debug) println(hlog, "loglikelihood logistic = ", logisticf(lgsag), " R= ", lgsag.R, " T= ", lgsag.T);
if(method == smLogistic) {
compute_loglik_tab();
compute_cost();
if(opt_debug) println(hlog, "cost = ", cost);
}
}
void optimize_sag_loglik_match() {
if(state &~ SS_WEIGHTED) return;
lsq::leastsquare_solver<2> lsqs;
for(auto& ei: sagedges) {
ld y = sagdist[sagid[ei.i]][sagid[ei.j]];
ld x = 1. / ei.weight;
lsqs.add_data({{x, 1}}, y);
}
array<ld, 2> solution = lsqs.solve();
match_a = solution[0];
match_b = solution[1];
println(hlog, "got a = ", match_a, " b = ", match_b);
if(method == smMatch)
prepare_graph();
}
void optimize_sag_loglik_auto() {
if(method == smLogistic) optimize_sag_loglik_logistic();
if(method == smMatch) optimize_sag_loglik_match();
}
pair<ld, ld> compute_mAP() {
int DN = isize(sagid);
ld meanrank = 0;
int tgood = 0;
ld maprank = 0;
for(int i=0; i<DN; i++) {
vector<int> alldist(max_sag_dist, 0);
for(int j=0; j<DN; j++) if(i != j) alldist[sagdist[sagid[i]][sagid[j]]]++;
vector<int> edgedist(max_sag_dist, 0);
for(auto j: edges_yes[i]) edgedist[sagdist[sagid[i]][sagid[j]]]++;
int pgood = 0;
ld bad = 0;
ld ap = 0;
ld pall = 0;
for(int j=0; j<max_sag_dist; j++) {
ld good = edgedist[j];
ld all = alldist[j];
ld err = all - good;
bad += err / 2.;
meanrank += bad * good;
bad += err / 2.;
for(int k=0; k<good; k++) {
pgood++, pall++;
pall += err/2 / good;
ap += pgood / pall;
pall += err/2 / good;
}
if(!good) pall += err;
}
tgood += pgood;
if(pgood) maprank += ap / pgood;
}
return make_pair(maprank / DN, meanrank / tgood);
}
void prepare_method() {
if(method == smLogistic) compute_loglik_tab();
optimize_sag_loglik_auto();
}
bool known_pairs = false;
int function_read_args() {
#if CAP_COMMANDLINE
using namespace arg;
if(0) ;
else if(argis("-sagrt")) {
shift(); sag::lgsag.R = argf();
shift(); sag::lgsag.T = argf();
if(method == smLogistic) compute_loglik_tab();
}
else if(argis("-sagmatch-ab")) {
shift(); sag::match_a = argf();
shift(); sag::match_b = argf();
if(method == smMatch) prepare_graph();
}
else if(argis("-sagrt-auto")) {
compute_auto_rt();
}
else if(argis("-sag-method-closest")) {
method = smClosest; prepare_method();
}
else if(argis("-sag-method-logistic")) {
method = smLogistic; prepare_method();
}
else if(argis("-sag-method-match")) {
method = smMatch; prepare_method();
}
else if(argis("-sag-logistic-recalc")) {
method = smLogistic; prepare_method();
}
else if(argis("-sagloglik-l")) {
sag::optimize_sag_loglik_logistic();
}
else if(argis("-sagloglik-m")) {
sag::optimize_sag_loglik_match();
}
else if(argis("-sagloglik-a")) {
sag::optimize_sag_loglik_auto();
}
else return 1;
#endif
return 0;
}
int ahfun = addHook(hooks_args, 100, function_read_args);
}
}

File diff suppressed because it is too large Load Diff

40
rogueviz/sag/sag.h Normal file
View File

@ -0,0 +1,40 @@
// RogueViz -- SAG embedder: main header file
// Copyright (C) 2011-2024 Zeno Rogue, see 'hyper.cpp' for details
#ifndef _SAG_H_
#define _SAG_H_
#include "../rogueviz.h"
namespace rogueviz {
namespace sag {
/** SAG is generally active */
static const flagtype SS_GENERAL = 1;
/** the cells are known */
static const flagtype SS_CELLS = 2;
/** the graph is known */
static const flagtype SS_DATA = 4;
/** the graph is visualized */
static const flagtype SS_GRAPH = 8;
/** no cells because we need to call init_snake */
static const flagtype SS_NEED_SNAKE = 16;
/** the graph is weighted */
static const flagtype SS_WEIGHTED = 32;
extern flagtype state;
extern vector<int> sagid;
void init();
void clear();
}
}
#endif