Texture exchange functional

This commit is contained in:
jndean
2023-10-22 18:08:25 +00:00
parent b5e35b40a6
commit ab0be68a00
5 changed files with 249 additions and 157 deletions

View File

@@ -20,6 +20,7 @@ extern "C" {
#define IPUMAPPEDLINEUPDATES (2)
#define IPUPROGBUFSIZE (128)
#define IPUFIRSTRENDERTILE (0)
#define IPUNUMRENDERTILES (32)
#define IPUCOLSPERRENDERTILE (SCREENWIDTH / IPUNUMRENDERTILES)
@@ -30,7 +31,9 @@ extern "C" {
#define IPUNUMTEXTURETILES (IPUTEXTURETILESPERRENDERTILE * IPUNUMRENDERTILES)
#define IPUNUMTEXTURECACHELINES (1)
#define IPUTEXTURECACHELINESIZE (128 / sizeof(int))
#define IPUREDUCTIONTILE (IPUFIRSTTEXTURETILE + IPUNUMTEXTURETILES)
// #define IPUREDUCTIONPERIOD (2)
#define IPUCOMMSBUFSIZE (IPUNUMRENDERTILES)
typedef struct {

View File

@@ -12,16 +12,19 @@
// Remeber! Copies of these vars exits independently on each tile
unsigned* tileLocalProgBuf;
unsigned* tileLocalCommsBuf;
unsigned* tileLocalTextureBuf;
int textureFetchCount = 0;
const int tmpRepeatCount = 4;
// -------- Components for the tiles that serve textures ------------ //
extern "C"
__SUPER__
void IPU_R_InitTextureTile(unsigned* progBuf, int progBufSize) {
// progBuf starts with a directory of 2 program offsets
unsigned* progHead = progBuf + 2;
// progBuf starts with a directory of 3 program offsets
unsigned* progHead = progBuf + 3;
unsigned* progEnd = &progBuf[progBufSize];
// Figure out which tiles are involved
@@ -51,28 +54,51 @@ void IPU_R_InitTextureTile(unsigned* progBuf, int progBufSize) {
progHead = assembler.assemble(progHead, progEnd - progHead);
progHead++;
}
// Third program receives the `done`
{
progBuf[2] = progHead - progBuf;
XCOMAssembler assembler;
int aggrTile = XCOM_logical2physical(IPUFIRSTRENDERTILE);
int sendCycle = XCOM_WORSTSENDDELAY;
int muxCycle = sendCycle + XCOM_TimeToMux(aggrTile, __builtin_ipu_get_tile_id());
assembler.addRecv(0, 1, aggrTile, muxCycle);
progHead = assembler.assemble(progHead, progEnd - progHead);
progHead++;
}
}
extern "C"
__SUPER__
void IPU_R_FulfilColumnRequest(unsigned* progBuf, unsigned* textureBuf) {
void IPU_R_FulfilColumnRequest(unsigned* progBuf, unsigned* textureBuf, unsigned* commsBuf) {
// Start of buffer is a directory of programs
auto recvProgram = &progBuf[progBuf[0]];
auto sendProgram = &progBuf[progBuf[1]];
auto aggrProgram = &progBuf[progBuf[2]];
for(; textureFetchCount < 6; textureFetchCount++) {
// for(; textureFetchCount < tmpRepeatCount; textureFetchCount++) {
while (1) {
XCOM_Execute(recvProgram, NULL, textureBuf);
// Unpack received data
unsigned textureNum = ((IPUColRequest_t*) textureBuf)->texture;
unsigned colNum = ((IPUColRequest_t*) textureBuf)->column;
byte c1 = (textureNum * 9) % 256;
byte c2 = (c1 + 1) % 256;
byte c3 = (c1 + 2) % 256;
byte c4 = (c1 + 1) % 256;
unsigned colour = c1 | (c2 << 8) | (c3 << 16) | (c4 << 24);
for (int i = 0; i < IPUTEXTURECACHELINESIZE; i++) {
textureBuf[i] = 0x20202020;
textureBuf[i] = colour;
}
XCOM_Execute(sendProgram, textureBuf, NULL);
// TMP
asm volatile(R"(
sans 1
sync 0x1
)");
XCOM_Execute(aggrProgram, NULL, commsBuf);
// if (tileID == 35) printf ("Texture flag: %d\n", commsBuf[0]);
if (commsBuf[0])
return;
}
}
@@ -80,28 +106,12 @@ void IPU_R_FulfilColumnRequest(unsigned* progBuf, unsigned* textureBuf) {
// -------- Components for the tiles that request textures ------------ //
#define NUMCACHECOLS (20)
#define CACHECOLSIZE (128)
static byte columnCache[NUMCACHECOLS][CACHECOLSIZE];
static int textureTileLUT[IPUTEXTURETILESPERRENDERTILE];
static int muxInstructionOffset;
extern "C"
__SUPER__
void IPU_R_InitColumnRequester(unsigned* progBuf, int progBufSize) {
// TMP colours
for (int i = 0; i < NUMCACHECOLS; ++i) {
unsigned* col = (unsigned*) columnCache[i];
byte c1 = (i * 8) % 256;
byte c2 = (c1 + 1) % 256;
byte c3 = (c1 + 2) % 256;
byte c4 = (c1 + 1) % 256;
unsigned packedColour = c1 | (c2 << 8) | (c3 << 16) | (c4 << 24);
for (int j = 0; j < CACHECOLSIZE / 4; j++) {
col[j] = packedColour;
}
}
// Figure out which tiles to talk to
int firstTextureTile = IPUFIRSTTEXTURETILE + (IPUTEXTURETILESPERRENDERTILE * (tileID - IPUFIRSTRENDERTILE));
@@ -112,115 +122,151 @@ void IPU_R_InitColumnRequester(unsigned* progBuf, int progBufSize) {
// ProgBuf starts with directory of programs
unsigned* progHead = progBuf + 3;
unsigned* progEnd = &progBuf[progBufSize];
int aggrTile = XCOM_logical2physical(IPUFIRSTRENDERTILE);
XCOMAssembler assembler;
// First Program: performs the request and receives the response
// First Program: performs the column request and sends flags to aggrTile
progBuf[0] = progHead - progBuf;
int messageSize = sizeof(IPUColRequest_t) / sizeof(int);
assembler.addSend(0, messageSize, XCOM_BROADCAST, XCOM_WORSTSENDDELAY);
// Do the first step of the `done` flag aggregation
if (__builtin_ipu_get_tile_id() == aggrTile) {
for (int slot = 1; slot < IPUNUMRENDERTILES; ++slot) {
int senderID = XCOM_logical2physical(IPUFIRSTRENDERTILE + slot);
bool clearMux = slot == IPUNUMRENDERTILES - 1;
int recvCycle = XCOM_WORSTRECVDELAY + XCOM_WORSTRECVDELAY + slot - 1;
unsigned* recvAddr = (unsigned*)(slot * sizeof(unsigned));
assembler.addRecv(recvAddr, 1, senderID, recvCycle, clearMux);
}
} else {
int myTimeSlot = tileID - (IPUFIRSTRENDERTILE + 1);
int direction = XCOM_EastOrWest(__builtin_ipu_get_tile_id(), aggrTile);
int muxCycle = XCOM_WORSTRECVDELAY + XCOM_WORSTRECVDELAY + myTimeSlot;
int sendCycle = muxCycle - XCOM_TimeToMux(__builtin_ipu_get_tile_id(), aggrTile);
unsigned* addr = (unsigned*) sizeof(IPUColRequest_t);
assembler.addSend(addr, 1, direction, sendCycle);
}
progHead = assembler.assemble(progHead, progEnd - progHead);
progHead++;
assembler.reset();
// Second Proggram: Receives the request response
progBuf[1] = progHead - progBuf;
assembler.addRecv(0, IPUTEXTURECACHELINESIZE, 0, XCOM_WORSTRECVDELAY);
unsigned* newProgHead = assembler.assemble(progHead, progEnd - progHead);
// Record the mux location for later live patching
// Record the mux-setting instruction location for later live patching
for (unsigned* inst = progHead; inst < newProgHead; ++inst) {
if ((*inst & 0xfc003fffu) == 0x64000000u) {
muxInstructionOffset = (inst - &progBuf[progBuf[0]]);
muxInstructionOffset = (inst - &progBuf[progBuf[1]]);
break;
}
}
progHead = newProgHead + 1; // +1 => don't overwrite the return statement
assembler.reset();
// Second Program: aggregates the 'done' flag across render tiles
progBuf[1] = progHead - progBuf;
int aggrTile = XCOM_logical2physical(IPUFIRSTRENDERTILE);
// Third Program: distributes the `done` flag
progBuf[2] = progHead - progBuf;
int sendCycle = XCOM_WORSTSENDDELAY;
if (__builtin_ipu_get_tile_id() == aggrTile) {
for (int slot = 0; slot < IPUNUMRENDERTILES - 1; ++slot) {
int senderID = XCOM_logical2physical(IPUFIRSTRENDERTILE + 1 + slot);
assembler.addRecv((unsigned*)(4*(slot + 1)), 1, senderID, XCOM_WORSTRECVDELAY + slot,
slot == IPUNUMRENDERTILES - 2);
}
progHead = assembler.assemble(progHead, progEnd - progHead);
progHead++;
// Third Program: aggregation tile distributes the answer
progBuf[2] = progHead - progBuf;
assembler.reset();
assembler.addSend(0, 1, XCOM_BROADCAST, XCOM_WORSTSENDDELAY);
progHead = assembler.assemble(progHead, progEnd - progHead);
progHead++;
assembler.addSend(0, 1, XCOM_BROADCAST, sendCycle);
} else {
int myTimeSlot = tileID - (IPUFIRSTRENDERTILE + 1);
int direction = XCOM_EastOrWest(__builtin_ipu_get_tile_id(), aggrTile);
int muxCycle = XCOM_WORSTRECVDELAY + myTimeSlot;
int sendCycle = muxCycle - XCOM_TimeToMux(__builtin_ipu_get_tile_id(), aggrTile);
assembler.addSend(0, 1, direction, sendCycle);
progHead = assembler.assemble(progHead, progEnd - progHead);
assembler.reset();
sendCycle = XCOM_WORSTSENDDELAY;
muxCycle = sendCycle + XCOM_TimeToMux(aggrTile, __builtin_ipu_get_tile_id());
int muxCycle = sendCycle + XCOM_TimeToMux(aggrTile, __builtin_ipu_get_tile_id());
assembler.addRecv(0, 1, aggrTile, muxCycle);
progHead = assembler.assemble(progHead, progEnd - progHead);
progHead++;
}
// assembler.reset();
progHead = assembler.assemble(progHead, progEnd - progHead);
progHead++;
assembler.reset();
}
__SUPER__
static bool checkRenderingFinished() {
auto aggregateProgA = &tileLocalProgBuf[tileLocalProgBuf[1]];
auto aggregateProgB = &tileLocalProgBuf[tileLocalProgBuf[2]];
tileLocalTextureBuf[0] = tileID + 100;
XCOM_Execute(
aggregateProgA,
tileLocalTextureBuf,
tileLocalTextureBuf
);
if (tileID == IPUFIRSTRENDERTILE) {
for (int i = 1; i < IPUNUMRENDERTILES; i++) {
tileLocalTextureBuf[0] += tileLocalTextureBuf[i];
}
XCOM_Execute(
aggregateProgB,
tileLocalTextureBuf,
NULL
);
}
printf("Aggregation result: %d\n", tileLocalTextureBuf[0]);
return tileLocalTextureBuf[0];
}
extern "C"
__SUPER__
byte* IPU_R_RequestColumn(int texture, int column) {
// progBuff starts with a program directory
auto requestProg = &tileLocalProgBuf[tileLocalProgBuf[0]];
auto aggregateProgA = &tileLocalProgBuf[tileLocalProgBuf[1]];
auto aggregateProgB = &tileLocalProgBuf[tileLocalProgBuf[2]];
auto receiveProg = &tileLocalProgBuf[tileLocalProgBuf[1]];
auto aggregateProg = &tileLocalProgBuf[tileLocalProgBuf[2]];
if (textureFetchCount++ < 6) {
int sourceTile = 0;
XCOM_PatchMuxAndExecute(
requestProg, // Prog
tileLocalTextureBuf, // Read offset
tileLocalTextureBuf, // Write offset
muxInstructionOffset, // Patch offset
textureTileLUT[sourceTile] // Mux value
// Populate buffer with data to be exchanged
IPUColRequest_t *request = (IPUColRequest_t*) tileLocalTextureBuf;
request->texture = texture;
request->column = column;
*((unsigned*) &request[1]) = 0; // The Done Flag
XCOM_Execute(
requestProg, // Prog
tileLocalTextureBuf, // Read offset
tileLocalCommsBuf // Write offset
);
// aggrTile aggregates
if (tileID == IPUFIRSTRENDERTILE) {
tileLocalCommsBuf[0] = 0;
}
int textureSourceTile = 0;
XCOM_PatchMuxAndExecute(
receiveProg, // Prog
NULL, // Read offset
tileLocalTextureBuf, // Write offset
muxInstructionOffset, // Patch offset
textureTileLUT[textureSourceTile] // Mux value
);
XCOM_Execute(
aggregateProg, // Prog
tileLocalCommsBuf, // Read offset
tileLocalCommsBuf // Write offset
);
return (byte*) tileLocalTextureBuf;
}
extern "C"
__SUPER__
void IPU_R_RenderTileDone() {
// progBuff starts with a program directory
auto requestProg = &tileLocalProgBuf[tileLocalProgBuf[0]];
auto aggregateProg = &tileLocalProgBuf[tileLocalProgBuf[2]];
while (1) {
// Populate buffer with data to be exchanged
IPUColRequest_t *request = (IPUColRequest_t*) tileLocalTextureBuf;
request->texture = 0xffffffff;
request->column = 0xffffffff;
*((unsigned*) &request[1]) = 1; // The `done` Flag
XCOM_Execute(
requestProg, // Prog
tileLocalTextureBuf, // Read offset
tileLocalCommsBuf // Write offset
);
checkRenderingFinished();
// aggrTile aggregates `done` flags
unsigned aggr = true;
if (tileID == IPUFIRSTRENDERTILE) {
for (int i = 1; i < IPUNUMRENDERTILES; i++) {
aggr &= tileLocalCommsBuf[i];
}
}
tileLocalCommsBuf[0] = aggr;
return (byte*) tileLocalTextureBuf;
// Don't bother receiving the response
asm volatile(R"(
sans 0
sync 0x1
)");
XCOM_Execute(
aggregateProg, // Prog
tileLocalCommsBuf, // Read offset
tileLocalCommsBuf // Write offset
);
if (tileLocalCommsBuf[0]) // Done flag
return;
}
return columnCache[texture % NUMCACHECOLS];
}
@@ -228,36 +274,59 @@ byte* IPU_R_RequestColumn(int texture, int column) {
// -------- Components for the sans tiles ------------ //
extern "C"
__SUPER__
void IPU_R_InitSansTile(unsigned* progBuf, int progBufSize) {
// TODO
XCOMAssembler assembler;
int srcTile = 0;
int sendCycle = XCOM_WORSTSENDDELAY;
int muxCycle = sendCycle + XCOM_TimeToMux(srcTile, __builtin_ipu_get_tile_id());
int messageSize = sizeof(IPUColRequest_t) / sizeof(int);
assembler.addRecv(0, messageSize, 0, muxCycle);
assembler.assemble(progBuf, progBufSize);
}
extern "C"
__SUPER__
void IPU_R_Sans(unsigned* progBuf, int progBufSize) {
(void) progBuf;
(void) progBufSize;
void IPU_R_Sans(unsigned* progBuf, unsigned* commsBuf) {
// Start of buffer is a directory of programs
auto recvProgram = &progBuf[progBuf[0]];
auto sendProgram = &progBuf[progBuf[1]];
auto aggrProgram = &progBuf[progBuf[2]];
for(; textureFetchCount < 6; textureFetchCount++) {
while (1) {
asm volatile(R"(
sans 1
sync 0x1
)");
// TMP
asm volatile(R"(
sans 1
sync 0x1
)");
XCOM_Execute(aggrProgram, commsBuf, commsBuf);
if (commsBuf[0])
return;
}
}
/*
THE PLAN
Prog 0 {
Render tiles send request from textureBuf[0:1], then
- aggrTile receives flags into comms buf
- the rest send done flag from textureBuf[2]
Texture tiles receive request into texture buf
Sans tiles sans x 2
}
Render tiles patch mux address
aggrTile aggregates flags into commsBuf[0]
Texture tiles fetch coluns into textureBuf
Sans tiles do nothing
Prog 1 {
Render tiles receive cols into textureBuf
TextureTiles send cols from texture buf
Sans tiles do nothing (no sans)
}
Prog 2 {
Render tiles receive doneFlag
AggrTile sends done flag
TextureTiles receive done flag
Sans tiles receive done flag
}
*/

View File

@@ -17,19 +17,17 @@ typedef struct {
extern unsigned* tileLocalProgBuf;
extern unsigned* tileLocalCommsBuf;
extern unsigned* tileLocalTextureBuf;
__SUPER__ void IPU_R_InitTextureTile(unsigned* progBuf, int progBufSize);
__SUPER__ void IPU_R_InitColumnRequester(unsigned* progBuf, int progBufSize);
__SUPER__ byte* IPU_R_RequestColumn(int texture, int column);
__SUPER__ void IPU_R_FulfilColumnRequest(unsigned* progBuf, unsigned* textureBuf);
__SUPER__ void IPU_R_Sans(unsigned* progBuf, int progBufSize);
__SUPER__ void IPU_R_FulfilColumnRequest(unsigned* progBuf, unsigned* textureBuf, unsigned* commsBuf);
__SUPER__ void IPU_R_Sans(unsigned* progBuf, unsigned* commsBuf);
__SUPER__ void IPU_R_RenderTileDone(void);
#ifdef __cplusplus

View File

@@ -49,6 +49,8 @@ struct
[[
poplar::constraint("region(*nonExecutableDummy) != region(*progBuf)"),
poplar::constraint("elem(*textureCache) != elem(*progBuf)"),
poplar::constraint("elem(*textureCache) != elem(*commsBuf)"),
poplar::constraint("elem(*progBuf) != elem(*commsBuf)"),
]]
R_RenderPlayerView_Vertex : public poplar::SupervisorVertex {
poplar::Input<poplar::Vector<unsigned char>> miscValues;
@@ -56,22 +58,21 @@ R_RenderPlayerView_Vertex : public poplar::SupervisorVertex {
poplar::InOut<poplar::Vector<
int, poplar::VectorLayout::SPAN, 4, true>> nonExecutableDummy;
poplar::InOut<poplar::Vector<unsigned>> progBuf;
poplar::InOut<poplar::Vector<unsigned>> commsBuf;
poplar::InOut<poplar::Vector<unsigned>> textureCache;
__SUPER__ void compute() {
assert(&frame[0] == I_VideoBuffer);
tileLocalProgBuf = &progBuf[0];
tileLocalCommsBuf = &commsBuf[0];
tileLocalTextureBuf = &textureCache[0];
// TMP
textureCache[0] = -1;
textureCache[1] = 1701;
IPU_R_RenderPlayerView_UnpackMiscValues(
(R_RenderPlayerView_MiscValues_t*) &miscValues[0]
);
R_RenderPlayerView(&players[displayplayer]);
IPU_R_RenderTileDone();
return;
}
};
@@ -92,31 +93,43 @@ class R_ExecuteSetViewSize_Vertex : public poplar::SupervisorVertex {
struct
[[
poplar::constraint("region(*dummy) != region(*progBuf)"),
poplar::constraint("region(*dummy) != region(*progBuf)"),
poplar::constraint("elem(*textureBuf) != elem(*progBuf)"),
poplar::constraint("elem(*textureBuf) != elem(*commsBuf)"),
poplar::constraint("elem(*progBuf) != elem(*commsBuf)"),
]]
R_FulfilColumnRequests_Vertex : public poplar::SupervisorVertex {
poplar::InOut<poplar::Vector<
int, poplar::VectorLayout::SPAN, 4, true>> dummy;
poplar::InOut<poplar::Vector<unsigned>> progBuf;
poplar::InOut<poplar::Vector<unsigned>> commsBuf;
poplar::Output<poplar::Vector<unsigned>> textureBuf;
__SUPER__ void compute() {
IPU_R_FulfilColumnRequest(&progBuf[0], &textureBuf[0]);
IPU_R_FulfilColumnRequest(&progBuf[0], &textureBuf[0], &commsBuf[0]);
}
};
struct R_InitTextureTile_Vertex : public poplar::SupervisorVertex {
struct
[[
poplar::constraint("region(*dummy) != region(*progBuf)"),
poplar::constraint("elem(*progBuf) != elem(*commsBuf)"),
]]
R_Sans_Vertex : public poplar::SupervisorVertex {
poplar::InOut<poplar::Vector<
int, poplar::VectorLayout::SPAN, 4, true>> dummy;
poplar::InOut<poplar::Vector<unsigned>> progBuf;
poplar::InOut<poplar::Vector<unsigned>> commsBuf;
__SUPER__ void compute() {
IPU_R_Sans(&progBuf[0], &commsBuf[0]);
}
};
struct R_InitTextureOrSans_Vertex : public poplar::SupervisorVertex {
poplar::Output<poplar::Vector<unsigned>> progBuf;
__SUPER__ void compute() {
IPU_R_InitTextureTile(&progBuf[0], progBuf.size());
}
};
struct R_Sans_Vertex : public poplar::SupervisorVertex {
__SUPER__ void compute() {
IPU_R_Sans(NULL, NULL);
}
};

View File

@@ -96,9 +96,11 @@ void IpuDoom::buildIpuGraph() {
// Stuff for exchange programs
poplar::Tensor nonExecutableDummy = m_ipuGraph.addVariable(poplar::INT, {totalTiles, 1}, "nonExecutableDummy");
poplar::Tensor progBuf = m_ipuGraph.addVariable(poplar::UNSIGNED_INT, {totalTiles, IPUPROGBUFSIZE}, "progBuf");
poplar::Tensor commsBuf = m_ipuGraph.addVariable(poplar::UNSIGNED_INT, {totalTiles, IPUCOMMSBUFSIZE}, "commsBuf");
for (unsigned t = 0; t < totalTiles; ++t) {
m_ipuGraph.setTileMapping(nonExecutableDummy[t], t);
m_ipuGraph.setTileMapping(progBuf[t], t);
m_ipuGraph.setTileMapping(commsBuf[t], t);
}
@@ -169,13 +171,12 @@ void IpuDoom::buildIpuGraph() {
m_ipuGraph.setTileMapping(vtx, logicalTile);
m_ipuGraph.setPerfEstimate(vtx, 100);
}
poplar::ComputeSet R_InitTextureTile_CS = m_ipuGraph.addComputeSet("R_InitTextureTile_CS");
for (int textureTile = 0; textureTile < IPUNUMTEXTURETILES; ++textureTile) {
int logicalTile = IPUFIRSTTEXTURETILE + textureTile;
vtx = m_ipuGraph.addVertex(R_InitTextureTile_CS, "R_InitTextureTile_Vertex", {
{"progBuf", progBuf[logicalTile]},
poplar::ComputeSet R_InitTextureOrSans_CS = m_ipuGraph.addComputeSet("R_InitTextureOrSans_CS");
for (unsigned tile = IPUFIRSTTEXTURETILE; tile < totalTiles; ++tile) {
vtx = m_ipuGraph.addVertex(R_InitTextureOrSans_CS, "R_InitTextureOrSans_Vertex", {
{"progBuf", progBuf[tile]},
});
m_ipuGraph.setTileMapping(vtx, logicalTile);
m_ipuGraph.setTileMapping(vtx, tile);
m_ipuGraph.setPerfEstimate(vtx, 2000);
}
@@ -186,7 +187,7 @@ void IpuDoom::buildIpuGraph() {
);
poplar::program::Sequence R_Init_prog({
poplar::program::Execute(R_InitTextureTile_CS),
poplar::program::Execute(R_InitTextureOrSans_CS),
poplar::program::Copy(miscValuesStream, m_miscValuesBuf),
poplar::program::Repeat(2, poplar::program::Sequence({ // <- number of R_Init_CS steps
poplar::program::Execute(R_Init_CS),
@@ -280,7 +281,7 @@ void IpuDoom::buildIpuGraph() {
poplar::UNSIGNED_INT,
{ IPUNUMRENDERTILES,
IPUNUMTEXTURECACHELINES * IPUTEXTURECACHELINESIZE },
"textureBuf");
"textureCache");
poplar::ComputeSet R_RenderPlayerView_CS = m_ipuGraph.addComputeSet("R_RenderPlayerView_CS");
for (int renderTile = 0; renderTile < IPUNUMRENDERTILES; ++renderTile) {
@@ -289,6 +290,7 @@ void IpuDoom::buildIpuGraph() {
{"frame", ipuFrameSlices[renderTile]},
{"textureCache", textureCache[renderTile]},
{"progBuf", progBuf[logicalTile]},
{"commsBuf", commsBuf[logicalTile]},
{"nonExecutableDummy", nonExecutableDummy[logicalTile]},
{"miscValues", m_miscValuesBuf},
});
@@ -302,13 +304,19 @@ void IpuDoom::buildIpuGraph() {
{"dummy", nonExecutableDummy[logicalTile]},
{"textureBuf", textureBuf[textureTile]},
{"progBuf", progBuf[logicalTile]},
{"commsBuf", commsBuf[logicalTile]},
});
m_ipuGraph.setTileMapping(vtx, logicalTile);
m_ipuGraph.setPerfEstimate(vtx, 1000);
m_ipuGraph.setTileMapping(textureBuf[textureTile], textureTile);
}
for (unsigned tile = IPUFIRSTTEXTURETILE + IPUNUMTEXTURETILES; tile < totalTiles; ++tile) {
m_ipuGraph.setTileMapping(m_ipuGraph.addVertex(R_RenderPlayerView_CS, "R_Sans_Vertex"), tile);
vtx = m_ipuGraph.addVertex(R_RenderPlayerView_CS, "R_Sans_Vertex", {
{"dummy", nonExecutableDummy[tile]},
{"progBuf", progBuf[tile]},
{"commsBuf", commsBuf[tile]},
});
m_ipuGraph.setTileMapping(vtx, tile);
}
// Cache line is used as the aggregation buffer, make sure it's big enough
assert(IPUTEXTURECACHELINESIZE >= IPUNUMRENDERTILES);
@@ -316,9 +324,7 @@ void IpuDoom::buildIpuGraph() {
poplar::program::Sequence R_RenderPlayerView_prog({
poplar::program::Copy(miscValuesStream, m_miscValuesBuf),
poplar::program::Copy(frameInStream, ipuFrame),
poplar::program::Sync(poplar::SyncType::INTERNAL),
poplar::program::Execute(R_RenderPlayerView_CS),
poplar::program::Sync(poplar::SyncType::INTERNAL),
poplar::program::Copy(ipuFrame, frameOutStream),
});
@@ -340,7 +346,8 @@ void IpuDoom::buildIpuGraph() {
// ---------------- Final prog --------------//
m_ipuEngine = std::make_unique<poplar::Engine>(std::move(poplar::Engine(
m_ipuGraph, {
m_ipuGraph,
{
IPU_MiscSetup_Prog,
G_DoLoadLevel_prog,
G_Ticker_prog,
@@ -350,7 +357,9 @@ void IpuDoom::buildIpuGraph() {
R_ExecuteSetViewSize_prog,
R_Init_prog,
IPU_Init_Prog,
})));
},
{{"opt.enableSkipSyncs", "false"}}
)));
m_ipuEngine->connectStream("miscValues-stream", m_miscValuesBuf_h);
m_ipuEngine->connectStream("lumpNum-stream", &m_lumpNum_h);