From ab0be68a0057996fa8229b86c5fda5285910ea65 Mon Sep 17 00:00:00 2001 From: jndean Date: Sun, 22 Oct 2023 18:08:25 +0000 Subject: [PATCH] Texture exchange functional --- src/ipu/ipu_interface.h | 5 +- src/ipu/ipu_texturetiles.cpp | 315 +++++++++++++++++++++-------------- src/ipu/ipu_texturetiles.h | 10 +- src/ipu/r_codelets.cpp | 41 +++-- src/ipu_host.cpp | 35 ++-- 5 files changed, 249 insertions(+), 157 deletions(-) diff --git a/src/ipu/ipu_interface.h b/src/ipu/ipu_interface.h index 2ac07ac..e761a8a 100644 --- a/src/ipu/ipu_interface.h +++ b/src/ipu/ipu_interface.h @@ -20,6 +20,7 @@ extern "C" { #define IPUMAPPEDLINEUPDATES (2) #define IPUPROGBUFSIZE (128) + #define IPUFIRSTRENDERTILE (0) #define IPUNUMRENDERTILES (32) #define IPUCOLSPERRENDERTILE (SCREENWIDTH / IPUNUMRENDERTILES) @@ -30,7 +31,9 @@ extern "C" { #define IPUNUMTEXTURETILES (IPUTEXTURETILESPERRENDERTILE * IPUNUMRENDERTILES) #define IPUNUMTEXTURECACHELINES (1) #define IPUTEXTURECACHELINESIZE (128 / sizeof(int)) -#define IPUREDUCTIONTILE (IPUFIRSTTEXTURETILE + IPUNUMTEXTURETILES) + +// #define IPUREDUCTIONPERIOD (2) +#define IPUCOMMSBUFSIZE (IPUNUMRENDERTILES) typedef struct { diff --git a/src/ipu/ipu_texturetiles.cpp b/src/ipu/ipu_texturetiles.cpp index 8dca173..0c2a7a9 100644 --- a/src/ipu/ipu_texturetiles.cpp +++ b/src/ipu/ipu_texturetiles.cpp @@ -12,16 +12,19 @@ // Remeber! Copies of these vars exits independently on each tile unsigned* tileLocalProgBuf; +unsigned* tileLocalCommsBuf; unsigned* tileLocalTextureBuf; int textureFetchCount = 0; +const int tmpRepeatCount = 4; // -------- Components for the tiles that serve textures ------------ // +extern "C" __SUPER__ void IPU_R_InitTextureTile(unsigned* progBuf, int progBufSize) { - // progBuf starts with a directory of 2 program offsets - unsigned* progHead = progBuf + 2; + // progBuf starts with a directory of 3 program offsets + unsigned* progHead = progBuf + 3; unsigned* progEnd = &progBuf[progBufSize]; // Figure out which tiles are involved @@ -51,28 +54,51 @@ void IPU_R_InitTextureTile(unsigned* progBuf, int progBufSize) { progHead = assembler.assemble(progHead, progEnd - progHead); progHead++; } + + // Third program receives the `done` + { + progBuf[2] = progHead - progBuf; + XCOMAssembler assembler; + int aggrTile = XCOM_logical2physical(IPUFIRSTRENDERTILE); + int sendCycle = XCOM_WORSTSENDDELAY; + int muxCycle = sendCycle + XCOM_TimeToMux(aggrTile, __builtin_ipu_get_tile_id()); + assembler.addRecv(0, 1, aggrTile, muxCycle); + progHead = assembler.assemble(progHead, progEnd - progHead); + progHead++; + } } +extern "C" __SUPER__ -void IPU_R_FulfilColumnRequest(unsigned* progBuf, unsigned* textureBuf) { +void IPU_R_FulfilColumnRequest(unsigned* progBuf, unsigned* textureBuf, unsigned* commsBuf) { // Start of buffer is a directory of programs auto recvProgram = &progBuf[progBuf[0]]; auto sendProgram = &progBuf[progBuf[1]]; + auto aggrProgram = &progBuf[progBuf[2]]; - for(; textureFetchCount < 6; textureFetchCount++) { + // for(; textureFetchCount < tmpRepeatCount; textureFetchCount++) { + while (1) { XCOM_Execute(recvProgram, NULL, textureBuf); + // Unpack received data + unsigned textureNum = ((IPUColRequest_t*) textureBuf)->texture; + unsigned colNum = ((IPUColRequest_t*) textureBuf)->column; + + byte c1 = (textureNum * 9) % 256; + byte c2 = (c1 + 1) % 256; + byte c3 = (c1 + 2) % 256; + byte c4 = (c1 + 1) % 256; + unsigned colour = c1 | (c2 << 8) | (c3 << 16) | (c4 << 24); for (int i = 0; i < IPUTEXTURECACHELINESIZE; i++) { - textureBuf[i] = 0x20202020; + textureBuf[i] = colour; } XCOM_Execute(sendProgram, textureBuf, NULL); - // TMP - asm volatile(R"( - sans 1 - sync 0x1 - )"); + XCOM_Execute(aggrProgram, NULL, commsBuf); + // if (tileID == 35) printf ("Texture flag: %d\n", commsBuf[0]); + if (commsBuf[0]) + return; } } @@ -80,28 +106,12 @@ void IPU_R_FulfilColumnRequest(unsigned* progBuf, unsigned* textureBuf) { // -------- Components for the tiles that request textures ------------ // -#define NUMCACHECOLS (20) -#define CACHECOLSIZE (128) -static byte columnCache[NUMCACHECOLS][CACHECOLSIZE]; - static int textureTileLUT[IPUTEXTURETILESPERRENDERTILE]; static int muxInstructionOffset; extern "C" __SUPER__ void IPU_R_InitColumnRequester(unsigned* progBuf, int progBufSize) { - // TMP colours - for (int i = 0; i < NUMCACHECOLS; ++i) { - unsigned* col = (unsigned*) columnCache[i]; - byte c1 = (i * 8) % 256; - byte c2 = (c1 + 1) % 256; - byte c3 = (c1 + 2) % 256; - byte c4 = (c1 + 1) % 256; - unsigned packedColour = c1 | (c2 << 8) | (c3 << 16) | (c4 << 24); - for (int j = 0; j < CACHECOLSIZE / 4; j++) { - col[j] = packedColour; - } - } // Figure out which tiles to talk to int firstTextureTile = IPUFIRSTTEXTURETILE + (IPUTEXTURETILESPERRENDERTILE * (tileID - IPUFIRSTRENDERTILE)); @@ -112,115 +122,151 @@ void IPU_R_InitColumnRequester(unsigned* progBuf, int progBufSize) { // ProgBuf starts with directory of programs unsigned* progHead = progBuf + 3; unsigned* progEnd = &progBuf[progBufSize]; + int aggrTile = XCOM_logical2physical(IPUFIRSTRENDERTILE); XCOMAssembler assembler; - // First Program: performs the request and receives the response + // First Program: performs the column request and sends flags to aggrTile progBuf[0] = progHead - progBuf; int messageSize = sizeof(IPUColRequest_t) / sizeof(int); assembler.addSend(0, messageSize, XCOM_BROADCAST, XCOM_WORSTSENDDELAY); + // Do the first step of the `done` flag aggregation + if (__builtin_ipu_get_tile_id() == aggrTile) { + for (int slot = 1; slot < IPUNUMRENDERTILES; ++slot) { + int senderID = XCOM_logical2physical(IPUFIRSTRENDERTILE + slot); + bool clearMux = slot == IPUNUMRENDERTILES - 1; + int recvCycle = XCOM_WORSTRECVDELAY + XCOM_WORSTRECVDELAY + slot - 1; + unsigned* recvAddr = (unsigned*)(slot * sizeof(unsigned)); + assembler.addRecv(recvAddr, 1, senderID, recvCycle, clearMux); + } + } else { + int myTimeSlot = tileID - (IPUFIRSTRENDERTILE + 1); + int direction = XCOM_EastOrWest(__builtin_ipu_get_tile_id(), aggrTile); + int muxCycle = XCOM_WORSTRECVDELAY + XCOM_WORSTRECVDELAY + myTimeSlot; + int sendCycle = muxCycle - XCOM_TimeToMux(__builtin_ipu_get_tile_id(), aggrTile); + unsigned* addr = (unsigned*) sizeof(IPUColRequest_t); + assembler.addSend(addr, 1, direction, sendCycle); + } progHead = assembler.assemble(progHead, progEnd - progHead); + progHead++; assembler.reset(); + + // Second Proggram: Receives the request response + progBuf[1] = progHead - progBuf; assembler.addRecv(0, IPUTEXTURECACHELINESIZE, 0, XCOM_WORSTRECVDELAY); unsigned* newProgHead = assembler.assemble(progHead, progEnd - progHead); - // Record the mux location for later live patching + // Record the mux-setting instruction location for later live patching for (unsigned* inst = progHead; inst < newProgHead; ++inst) { if ((*inst & 0xfc003fffu) == 0x64000000u) { - muxInstructionOffset = (inst - &progBuf[progBuf[0]]); + muxInstructionOffset = (inst - &progBuf[progBuf[1]]); break; } } progHead = newProgHead + 1; // +1 => don't overwrite the return statement assembler.reset(); - // Second Program: aggregates the 'done' flag across render tiles - progBuf[1] = progHead - progBuf; - int aggrTile = XCOM_logical2physical(IPUFIRSTRENDERTILE); + // Third Program: distributes the `done` flag + progBuf[2] = progHead - progBuf; + int sendCycle = XCOM_WORSTSENDDELAY; if (__builtin_ipu_get_tile_id() == aggrTile) { - for (int slot = 0; slot < IPUNUMRENDERTILES - 1; ++slot) { - int senderID = XCOM_logical2physical(IPUFIRSTRENDERTILE + 1 + slot); - assembler.addRecv((unsigned*)(4*(slot + 1)), 1, senderID, XCOM_WORSTRECVDELAY + slot, - slot == IPUNUMRENDERTILES - 2); - } - progHead = assembler.assemble(progHead, progEnd - progHead); - progHead++; - - // Third Program: aggregation tile distributes the answer - progBuf[2] = progHead - progBuf; - assembler.reset(); - assembler.addSend(0, 1, XCOM_BROADCAST, XCOM_WORSTSENDDELAY); - progHead = assembler.assemble(progHead, progEnd - progHead); - progHead++; - + assembler.addSend(0, 1, XCOM_BROADCAST, sendCycle); } else { - int myTimeSlot = tileID - (IPUFIRSTRENDERTILE + 1); - int direction = XCOM_EastOrWest(__builtin_ipu_get_tile_id(), aggrTile); - int muxCycle = XCOM_WORSTRECVDELAY + myTimeSlot; - int sendCycle = muxCycle - XCOM_TimeToMux(__builtin_ipu_get_tile_id(), aggrTile); - assembler.addSend(0, 1, direction, sendCycle); - progHead = assembler.assemble(progHead, progEnd - progHead); - - assembler.reset(); - sendCycle = XCOM_WORSTSENDDELAY; - muxCycle = sendCycle + XCOM_TimeToMux(aggrTile, __builtin_ipu_get_tile_id()); + int muxCycle = sendCycle + XCOM_TimeToMux(aggrTile, __builtin_ipu_get_tile_id()); assembler.addRecv(0, 1, aggrTile, muxCycle); - progHead = assembler.assemble(progHead, progEnd - progHead); - progHead++; } - - // assembler.reset(); + progHead = assembler.assemble(progHead, progEnd - progHead); + progHead++; + assembler.reset(); } - -__SUPER__ -static bool checkRenderingFinished() { - auto aggregateProgA = &tileLocalProgBuf[tileLocalProgBuf[1]]; - auto aggregateProgB = &tileLocalProgBuf[tileLocalProgBuf[2]]; - - tileLocalTextureBuf[0] = tileID + 100; - XCOM_Execute( - aggregateProgA, - tileLocalTextureBuf, - tileLocalTextureBuf - ); - if (tileID == IPUFIRSTRENDERTILE) { - for (int i = 1; i < IPUNUMRENDERTILES; i++) { - tileLocalTextureBuf[0] += tileLocalTextureBuf[i]; - } - XCOM_Execute( - aggregateProgB, - tileLocalTextureBuf, - NULL - ); - } - printf("Aggregation result: %d\n", tileLocalTextureBuf[0]); - - return tileLocalTextureBuf[0]; -} - extern "C" __SUPER__ byte* IPU_R_RequestColumn(int texture, int column) { // progBuff starts with a program directory auto requestProg = &tileLocalProgBuf[tileLocalProgBuf[0]]; - auto aggregateProgA = &tileLocalProgBuf[tileLocalProgBuf[1]]; - auto aggregateProgB = &tileLocalProgBuf[tileLocalProgBuf[2]]; + auto receiveProg = &tileLocalProgBuf[tileLocalProgBuf[1]]; + auto aggregateProg = &tileLocalProgBuf[tileLocalProgBuf[2]]; - if (textureFetchCount++ < 6) { - int sourceTile = 0; - XCOM_PatchMuxAndExecute( - requestProg, // Prog - tileLocalTextureBuf, // Read offset - tileLocalTextureBuf, // Write offset - muxInstructionOffset, // Patch offset - textureTileLUT[sourceTile] // Mux value + // Populate buffer with data to be exchanged + IPUColRequest_t *request = (IPUColRequest_t*) tileLocalTextureBuf; + request->texture = texture; + request->column = column; + *((unsigned*) &request[1]) = 0; // The Done Flag + + XCOM_Execute( + requestProg, // Prog + tileLocalTextureBuf, // Read offset + tileLocalCommsBuf // Write offset + ); + + // aggrTile aggregates + if (tileID == IPUFIRSTRENDERTILE) { + tileLocalCommsBuf[0] = 0; + } + + int textureSourceTile = 0; + XCOM_PatchMuxAndExecute( + receiveProg, // Prog + NULL, // Read offset + tileLocalTextureBuf, // Write offset + muxInstructionOffset, // Patch offset + textureTileLUT[textureSourceTile] // Mux value + ); + + XCOM_Execute( + aggregateProg, // Prog + tileLocalCommsBuf, // Read offset + tileLocalCommsBuf // Write offset + ); + + return (byte*) tileLocalTextureBuf; +} + +extern "C" +__SUPER__ +void IPU_R_RenderTileDone() { + // progBuff starts with a program directory + auto requestProg = &tileLocalProgBuf[tileLocalProgBuf[0]]; + auto aggregateProg = &tileLocalProgBuf[tileLocalProgBuf[2]]; + + while (1) { + + // Populate buffer with data to be exchanged + IPUColRequest_t *request = (IPUColRequest_t*) tileLocalTextureBuf; + request->texture = 0xffffffff; + request->column = 0xffffffff; + *((unsigned*) &request[1]) = 1; // The `done` Flag + + XCOM_Execute( + requestProg, // Prog + tileLocalTextureBuf, // Read offset + tileLocalCommsBuf // Write offset ); - checkRenderingFinished(); + // aggrTile aggregates `done` flags + unsigned aggr = true; + if (tileID == IPUFIRSTRENDERTILE) { + for (int i = 1; i < IPUNUMRENDERTILES; i++) { + aggr &= tileLocalCommsBuf[i]; + } + } + tileLocalCommsBuf[0] = aggr; - return (byte*) tileLocalTextureBuf; + // Don't bother receiving the response + asm volatile(R"( + sans 0 + sync 0x1 + )"); + + XCOM_Execute( + aggregateProg, // Prog + tileLocalCommsBuf, // Read offset + tileLocalCommsBuf // Write offset + ); + + if (tileLocalCommsBuf[0]) // Done flag + return; } - return columnCache[texture % NUMCACHECOLS]; } @@ -228,36 +274,59 @@ byte* IPU_R_RequestColumn(int texture, int column) { // -------- Components for the sans tiles ------------ // -extern "C" -__SUPER__ -void IPU_R_InitSansTile(unsigned* progBuf, int progBufSize) { - // TODO - XCOMAssembler assembler; - int srcTile = 0; - int sendCycle = XCOM_WORSTSENDDELAY; - int muxCycle = sendCycle + XCOM_TimeToMux(srcTile, __builtin_ipu_get_tile_id()); - int messageSize = sizeof(IPUColRequest_t) / sizeof(int); - assembler.addRecv(0, messageSize, 0, muxCycle); - assembler.assemble(progBuf, progBufSize); -} - extern "C" __SUPER__ -void IPU_R_Sans(unsigned* progBuf, int progBufSize) { - (void) progBuf; - (void) progBufSize; +void IPU_R_Sans(unsigned* progBuf, unsigned* commsBuf) { + // Start of buffer is a directory of programs + auto recvProgram = &progBuf[progBuf[0]]; + auto sendProgram = &progBuf[progBuf[1]]; + auto aggrProgram = &progBuf[progBuf[2]]; - for(; textureFetchCount < 6; textureFetchCount++) { + while (1) { asm volatile(R"( sans 1 sync 0x1 )"); - // TMP - asm volatile(R"( - sans 1 - sync 0x1 - )"); + XCOM_Execute(aggrProgram, commsBuf, commsBuf); + + if (commsBuf[0]) + return; } } + + +/* + THE PLAN + +Prog 0 { + Render tiles send request from textureBuf[0:1], then + - aggrTile receives flags into comms buf + - the rest send done flag from textureBuf[2] + Texture tiles receive request into texture buf + Sans tiles sans x 2 +} + +Render tiles patch mux address + aggrTile aggregates flags into commsBuf[0] +Texture tiles fetch coluns into textureBuf +Sans tiles do nothing + +Prog 1 { + Render tiles receive cols into textureBuf + TextureTiles send cols from texture buf + Sans tiles do nothing (no sans) +} + + +Prog 2 { + Render tiles receive doneFlag + AggrTile sends done flag + TextureTiles receive done flag + Sans tiles receive done flag +} + + + +*/ \ No newline at end of file diff --git a/src/ipu/ipu_texturetiles.h b/src/ipu/ipu_texturetiles.h index 91fd9fe..8b582dc 100644 --- a/src/ipu/ipu_texturetiles.h +++ b/src/ipu/ipu_texturetiles.h @@ -17,19 +17,17 @@ typedef struct { extern unsigned* tileLocalProgBuf; +extern unsigned* tileLocalCommsBuf; extern unsigned* tileLocalTextureBuf; __SUPER__ void IPU_R_InitTextureTile(unsigned* progBuf, int progBufSize); - __SUPER__ void IPU_R_InitColumnRequester(unsigned* progBuf, int progBufSize); __SUPER__ byte* IPU_R_RequestColumn(int texture, int column); - -__SUPER__ void IPU_R_FulfilColumnRequest(unsigned* progBuf, unsigned* textureBuf); - -__SUPER__ void IPU_R_Sans(unsigned* progBuf, int progBufSize); - +__SUPER__ void IPU_R_FulfilColumnRequest(unsigned* progBuf, unsigned* textureBuf, unsigned* commsBuf); +__SUPER__ void IPU_R_Sans(unsigned* progBuf, unsigned* commsBuf); +__SUPER__ void IPU_R_RenderTileDone(void); #ifdef __cplusplus diff --git a/src/ipu/r_codelets.cpp b/src/ipu/r_codelets.cpp index 070dcc5..964593d 100644 --- a/src/ipu/r_codelets.cpp +++ b/src/ipu/r_codelets.cpp @@ -49,6 +49,8 @@ struct [[ poplar::constraint("region(*nonExecutableDummy) != region(*progBuf)"), poplar::constraint("elem(*textureCache) != elem(*progBuf)"), +poplar::constraint("elem(*textureCache) != elem(*commsBuf)"), +poplar::constraint("elem(*progBuf) != elem(*commsBuf)"), ]] R_RenderPlayerView_Vertex : public poplar::SupervisorVertex { poplar::Input> miscValues; @@ -56,22 +58,21 @@ R_RenderPlayerView_Vertex : public poplar::SupervisorVertex { poplar::InOut> nonExecutableDummy; poplar::InOut> progBuf; + poplar::InOut> commsBuf; poplar::InOut> textureCache; __SUPER__ void compute() { assert(&frame[0] == I_VideoBuffer); tileLocalProgBuf = &progBuf[0]; + tileLocalCommsBuf = &commsBuf[0]; tileLocalTextureBuf = &textureCache[0]; - // TMP - textureCache[0] = -1; - textureCache[1] = 1701; - IPU_R_RenderPlayerView_UnpackMiscValues( (R_RenderPlayerView_MiscValues_t*) &miscValues[0] ); R_RenderPlayerView(&players[displayplayer]); + IPU_R_RenderTileDone(); return; } }; @@ -92,31 +93,43 @@ class R_ExecuteSetViewSize_Vertex : public poplar::SupervisorVertex { struct [[ -poplar::constraint("region(*dummy) != region(*progBuf)"), +poplar::constraint("region(*dummy) != region(*progBuf)"), poplar::constraint("elem(*textureBuf) != elem(*progBuf)"), +poplar::constraint("elem(*textureBuf) != elem(*commsBuf)"), +poplar::constraint("elem(*progBuf) != elem(*commsBuf)"), ]] R_FulfilColumnRequests_Vertex : public poplar::SupervisorVertex { poplar::InOut> dummy; poplar::InOut> progBuf; + poplar::InOut> commsBuf; poplar::Output> textureBuf; __SUPER__ void compute() { - IPU_R_FulfilColumnRequest(&progBuf[0], &textureBuf[0]); + IPU_R_FulfilColumnRequest(&progBuf[0], &textureBuf[0], &commsBuf[0]); } }; -struct R_InitTextureTile_Vertex : public poplar::SupervisorVertex { +struct +[[ +poplar::constraint("region(*dummy) != region(*progBuf)"), +poplar::constraint("elem(*progBuf) != elem(*commsBuf)"), +]] +R_Sans_Vertex : public poplar::SupervisorVertex { + poplar::InOut> dummy; + poplar::InOut> progBuf; + poplar::InOut> commsBuf; + + __SUPER__ void compute() { + IPU_R_Sans(&progBuf[0], &commsBuf[0]); + } +}; + +struct R_InitTextureOrSans_Vertex : public poplar::SupervisorVertex { poplar::Output> progBuf; __SUPER__ void compute() { IPU_R_InitTextureTile(&progBuf[0], progBuf.size()); } -}; - - -struct R_Sans_Vertex : public poplar::SupervisorVertex { - __SUPER__ void compute() { - IPU_R_Sans(NULL, NULL); - } }; \ No newline at end of file diff --git a/src/ipu_host.cpp b/src/ipu_host.cpp index ff29ccf..8c270de 100644 --- a/src/ipu_host.cpp +++ b/src/ipu_host.cpp @@ -96,9 +96,11 @@ void IpuDoom::buildIpuGraph() { // Stuff for exchange programs poplar::Tensor nonExecutableDummy = m_ipuGraph.addVariable(poplar::INT, {totalTiles, 1}, "nonExecutableDummy"); poplar::Tensor progBuf = m_ipuGraph.addVariable(poplar::UNSIGNED_INT, {totalTiles, IPUPROGBUFSIZE}, "progBuf"); + poplar::Tensor commsBuf = m_ipuGraph.addVariable(poplar::UNSIGNED_INT, {totalTiles, IPUCOMMSBUFSIZE}, "commsBuf"); for (unsigned t = 0; t < totalTiles; ++t) { m_ipuGraph.setTileMapping(nonExecutableDummy[t], t); m_ipuGraph.setTileMapping(progBuf[t], t); + m_ipuGraph.setTileMapping(commsBuf[t], t); } @@ -169,13 +171,12 @@ void IpuDoom::buildIpuGraph() { m_ipuGraph.setTileMapping(vtx, logicalTile); m_ipuGraph.setPerfEstimate(vtx, 100); } - poplar::ComputeSet R_InitTextureTile_CS = m_ipuGraph.addComputeSet("R_InitTextureTile_CS"); - for (int textureTile = 0; textureTile < IPUNUMTEXTURETILES; ++textureTile) { - int logicalTile = IPUFIRSTTEXTURETILE + textureTile; - vtx = m_ipuGraph.addVertex(R_InitTextureTile_CS, "R_InitTextureTile_Vertex", { - {"progBuf", progBuf[logicalTile]}, + poplar::ComputeSet R_InitTextureOrSans_CS = m_ipuGraph.addComputeSet("R_InitTextureOrSans_CS"); + for (unsigned tile = IPUFIRSTTEXTURETILE; tile < totalTiles; ++tile) { + vtx = m_ipuGraph.addVertex(R_InitTextureOrSans_CS, "R_InitTextureOrSans_Vertex", { + {"progBuf", progBuf[tile]}, }); - m_ipuGraph.setTileMapping(vtx, logicalTile); + m_ipuGraph.setTileMapping(vtx, tile); m_ipuGraph.setPerfEstimate(vtx, 2000); } @@ -186,7 +187,7 @@ void IpuDoom::buildIpuGraph() { ); poplar::program::Sequence R_Init_prog({ - poplar::program::Execute(R_InitTextureTile_CS), + poplar::program::Execute(R_InitTextureOrSans_CS), poplar::program::Copy(miscValuesStream, m_miscValuesBuf), poplar::program::Repeat(2, poplar::program::Sequence({ // <- number of R_Init_CS steps poplar::program::Execute(R_Init_CS), @@ -280,7 +281,7 @@ void IpuDoom::buildIpuGraph() { poplar::UNSIGNED_INT, { IPUNUMRENDERTILES, IPUNUMTEXTURECACHELINES * IPUTEXTURECACHELINESIZE }, - "textureBuf"); + "textureCache"); poplar::ComputeSet R_RenderPlayerView_CS = m_ipuGraph.addComputeSet("R_RenderPlayerView_CS"); for (int renderTile = 0; renderTile < IPUNUMRENDERTILES; ++renderTile) { @@ -289,6 +290,7 @@ void IpuDoom::buildIpuGraph() { {"frame", ipuFrameSlices[renderTile]}, {"textureCache", textureCache[renderTile]}, {"progBuf", progBuf[logicalTile]}, + {"commsBuf", commsBuf[logicalTile]}, {"nonExecutableDummy", nonExecutableDummy[logicalTile]}, {"miscValues", m_miscValuesBuf}, }); @@ -302,13 +304,19 @@ void IpuDoom::buildIpuGraph() { {"dummy", nonExecutableDummy[logicalTile]}, {"textureBuf", textureBuf[textureTile]}, {"progBuf", progBuf[logicalTile]}, + {"commsBuf", commsBuf[logicalTile]}, }); m_ipuGraph.setTileMapping(vtx, logicalTile); m_ipuGraph.setPerfEstimate(vtx, 1000); m_ipuGraph.setTileMapping(textureBuf[textureTile], textureTile); } for (unsigned tile = IPUFIRSTTEXTURETILE + IPUNUMTEXTURETILES; tile < totalTiles; ++tile) { - m_ipuGraph.setTileMapping(m_ipuGraph.addVertex(R_RenderPlayerView_CS, "R_Sans_Vertex"), tile); + vtx = m_ipuGraph.addVertex(R_RenderPlayerView_CS, "R_Sans_Vertex", { + {"dummy", nonExecutableDummy[tile]}, + {"progBuf", progBuf[tile]}, + {"commsBuf", commsBuf[tile]}, + }); + m_ipuGraph.setTileMapping(vtx, tile); } // Cache line is used as the aggregation buffer, make sure it's big enough assert(IPUTEXTURECACHELINESIZE >= IPUNUMRENDERTILES); @@ -316,9 +324,7 @@ void IpuDoom::buildIpuGraph() { poplar::program::Sequence R_RenderPlayerView_prog({ poplar::program::Copy(miscValuesStream, m_miscValuesBuf), poplar::program::Copy(frameInStream, ipuFrame), - poplar::program::Sync(poplar::SyncType::INTERNAL), poplar::program::Execute(R_RenderPlayerView_CS), - poplar::program::Sync(poplar::SyncType::INTERNAL), poplar::program::Copy(ipuFrame, frameOutStream), }); @@ -340,7 +346,8 @@ void IpuDoom::buildIpuGraph() { // ---------------- Final prog --------------// m_ipuEngine = std::make_unique(std::move(poplar::Engine( - m_ipuGraph, { + m_ipuGraph, + { IPU_MiscSetup_Prog, G_DoLoadLevel_prog, G_Ticker_prog, @@ -350,7 +357,9 @@ void IpuDoom::buildIpuGraph() { R_ExecuteSetViewSize_prog, R_Init_prog, IPU_Init_Prog, - }))); + }, + {{"opt.enableSkipSyncs", "false"}} + ))); m_ipuEngine->connectStream("miscValues-stream", m_miscValuesBuf_h); m_ipuEngine->connectStream("lumpNum-stream", &m_lumpNum_h);