From 65e35ef19b39f825ce77663c9d3d1cb3d7d980fc Mon Sep 17 00:00:00 2001 From: jndean Date: Wed, 18 Oct 2023 17:00:52 +0000 Subject: [PATCH] Intermediate: First few columns of first frame fetched from texture tiles --- src/d_main.c | 2 +- src/ipu/ipu_interface.h | 8 ++ src/ipu/ipu_texturetiles.cpp | 172 ++++++++++++++++++++++++++++++++++- src/ipu/ipu_texturetiles.h | 16 +++- src/ipu/r_codelets.cpp | 54 ++++++++++- src/ipu_host.cpp | 92 +++++++++++++++---- 6 files changed, 319 insertions(+), 25 deletions(-) diff --git a/src/d_main.c b/src/d_main.c index 9ba0d78..ede67f1 100644 --- a/src/d_main.c +++ b/src/d_main.c @@ -1112,7 +1112,7 @@ void D_DoomMain(void) { I_InitTimer(); printf("inited timer\n"); I_InitJoystick(); - printf("inited joystick \n"); + printf("init'd joystick \n"); I_InitSound(true); printf("initted sound\n"); I_InitMusic(); diff --git a/src/ipu/ipu_interface.h b/src/ipu/ipu_interface.h index db78f66..4531e6d 100644 --- a/src/ipu/ipu_interface.h +++ b/src/ipu/ipu_interface.h @@ -18,11 +18,19 @@ extern "C" { #define IPUMAXEVENTSPERTIC (5) #define IPUAMMARKBUFSIZE (544) #define IPUMAPPEDLINEUPDATES (2) +#define IPUPROGBUFSIZE (32) #define IPUFIRSTRENDERTILE (0) #define IPUNUMRENDERTILES (32) #define IPUCOLSPERRENDERTILE (SCREENWIDTH / IPUNUMRENDERTILES) +#define IPUTEXTURETILESPERRENDERTILE (10) +#define IPUTEXTURETILEBUFSIZE (1024) +#define IPUFIRSTTEXTURETILE (IPUFIRSTRENDERTILE + IPUNUMRENDERTILES) +#define IPUNUMTEXTURETILES (IPUTEXTURETILESPERRENDERTILE * IPUNUMRENDERTILES) +#define IPUNUMTEXTURECACHELINES (1) +#define IPUTEXTURECACHELINESIZE (128 / sizeof(int)) +#define IPUREDUCTIONTILE (IPUFIRSTTEXTURETILE + IPUNUMTEXTURETILES) typedef struct { diff --git a/src/ipu/ipu_texturetiles.cpp b/src/ipu/ipu_texturetiles.cpp index 106a88b..c795255 100644 --- a/src/ipu/ipu_texturetiles.cpp +++ b/src/ipu/ipu_texturetiles.cpp @@ -1,16 +1,89 @@ +#include +#include + #include "doomtype.h" +#include "ipu_interface.h" #include "ipu_utils.h" #include "ipu_texturetiles.h" +#include "../../xcom.hpp" +// Remeber! Copies of these vars exits independently on each tile +unsigned* tileLocalProgBuf; +unsigned* tileLocalTextureBuf; +int textureFetchCount = 0; + + +// -------- Components for the tiles that serve textures ------------ // + +__SUPER__ +void IPU_R_InitTextureTile(unsigned* progBuf, int progBufSize) { + // progBuf starts with a directory of 2 program offsets + unsigned* progHead = progBuf + 2; + unsigned* progEnd = &progBuf[progBufSize]; + + // Figure out which tiles are involved + int renderTile = XCOM_logical2physical((tileID - IPUFIRSTTEXTURETILE) / IPUTEXTURETILESPERRENDERTILE); + + // First program receives the request + { + progBuf[0] = progHead - progBuf; + XCOMAssembler assembler; + int sendCycle = XCOM_WORSTSENDDELAY; + int muxCycle = sendCycle + XCOM_TimeToMux(renderTile, __builtin_ipu_get_tile_id()); + int messageSize = sizeof(IPUColRequest_t) / sizeof(int); + assembler.addRecv(0, messageSize, renderTile, muxCycle); + progHead = assembler.assemble(progHead, progEnd - progHead); + progHead++; // This program returns control flow, so don't override the `br $m10` + } + + // Second program sends the response + { + progBuf[1] = progHead - progBuf; + XCOMAssembler assembler; + int messageSize = IPUTEXTURECACHELINESIZE; + int recvCycle = XCOM_WORSTRECVDELAY; + int sendCycle = recvCycle - XCOM_TimeToMux(__builtin_ipu_get_tile_id(), renderTile); + int direction = XCOM_EastOrWest(__builtin_ipu_get_tile_id(), renderTile); + assembler.addSend(0, messageSize, direction, sendCycle); + progHead = assembler.assemble(progHead, progEnd - progHead); + progHead++; + } +} + +__SUPER__ +void IPU_R_FulfilColumnRequest(unsigned* progBuf, unsigned* textureBuf) { + // Start of buffer is a directory of programs + auto recvProgram = &progBuf[progBuf[0]]; + auto sendProgram = &progBuf[progBuf[1]]; + + for(; textureFetchCount < 6; textureFetchCount++) { + + XCOM_Execute(recvProgram, NULL, textureBuf); + + for (int i = 0; i < IPUTEXTURECACHELINESIZE; i++) { + textureBuf[i] = 0x20202020; + } + XCOM_Execute(sendProgram, textureBuf, NULL); + } +} + + + +// -------- Components for the tiles that request textures ------------ // + #define NUMCACHECOLS (20) #define CACHECOLSIZE (128) static byte columnCache[NUMCACHECOLS][CACHECOLSIZE]; +static int textureTileLUT[IPUTEXTURETILESPERRENDERTILE]; +static int muxInstructionOffset; -extern "C" __SUPER__ void IPU_R_InitColumnRequester(void) { +extern "C" +__SUPER__ +void IPU_R_InitColumnRequester(unsigned* progBuf, int progBufSize) { // TMP colours for (int i = 0; i < NUMCACHECOLS; ++i) { unsigned* col = (unsigned*) columnCache[i]; @@ -23,9 +96,102 @@ extern "C" __SUPER__ void IPU_R_InitColumnRequester(void) { col[j] = packedColour; } } + + // Figure out which tiles to talk to + int firstTextureTile = IPUFIRSTTEXTURETILE + (IPUTEXTURETILESPERRENDERTILE * (tileID - IPUFIRSTRENDERTILE)); + for (int i = 0; i < IPUTEXTURETILESPERRENDERTILE; ++i) { + textureTileLUT[i] = XCOM_logical2physical(firstTextureTile + i); + } + + // Prog Buf starts with directory of programs + unsigned* progHead = progBuf + 3; + unsigned* progEnd = &progBuf[progBufSize]; + { + progBuf[0] = progHead - progBuf; + XCOMAssembler assembler; + int messageSize = sizeof(IPUColRequest_t) / sizeof(int); + assembler.addSend(0, messageSize, XCOM_BROADCAST, XCOM_WORSTSENDDELAY); + progHead = assembler.assemble(progHead, progEnd - progHead); + } + { + XCOMAssembler assembler; + int messageSize = IPUTEXTURECACHELINESIZE; + assembler.addRecv(0, messageSize, 0, XCOM_WORSTRECVDELAY); + unsigned* newProgHead = assembler.assemble(progHead, progEnd - progHead); + for (unsigned* inst = progHead; inst < newProgHead; ++inst) { + if ((*inst & 0xfc003fffu) == 0x64000000u) { + muxInstructionOffset = (inst - &progBuf[progBuf[0]]); + break; + } + } + progHead = newProgHead; + } + // Third program sends finished flag to flag reducer + // { + // progBuf[2] = progHead - progBuf; + // XCOMAssembler assembler; + // int messageSize = 1; + // int recvCycle = XCOM_WORSTRECVDELAY; + // int sendCycle = recvCycle - XCOM_TimeToMux(__builtin_ipu_get_tile_id(), renderTile); + // int direction = XCOM_EastOrWest(__builtin_ipu_get_tile_id(), renderTile); + // assembler.addSend(0, messageSize, direction, sendCycle); + // progHead = assembler.assemble(progHead, progEnd - progHead); + // progHead++; + // } + } -extern "C" __SUPER__ byte* IPU_R_RequestColumn(int texture, int column) { +extern "C" +__SUPER__ +byte* IPU_R_RequestColumn(int texture, int column) { + // progBuff starts with a program directory + auto requestProg = &tileLocalProgBuf[tileLocalProgBuf[0]]; + + if (textureFetchCount++ < 6) { + int sourceTile = 0; + XCOM_PatchMuxAndExecute( + requestProg, // Prog + tileLocalTextureBuf, // Read offset + tileLocalTextureBuf, // Write offset + muxInstructionOffset, // Patch offset + textureTileLUT[sourceTile] // Mux value + ); + + return (byte*) tileLocalTextureBuf; + } return columnCache[texture % NUMCACHECOLS]; -} \ No newline at end of file +} + + + +// -------- Components for the sans tiles ------------ // + + +extern "C" +__SUPER__ +void IPU_R_InitSansTile(unsigned* progBuf, int progBufSize) { + // TODO + XCOMAssembler assembler; + int srcTile = 0; + int sendCycle = XCOM_WORSTSENDDELAY; + int muxCycle = sendCycle + XCOM_TimeToMux(srcTile, __builtin_ipu_get_tile_id()); + int messageSize = sizeof(IPUColRequest_t) / sizeof(int); + assembler.addRecv(0, messageSize, 0, muxCycle); + assembler.assemble(progBuf, progBufSize); +} + +extern "C" +__SUPER__ +void IPU_R_Sans(unsigned* progBuf, int progBufSize) { + (void) progBuf; + (void) progBufSize; + + for(; textureFetchCount < 6; textureFetchCount++) { + asm volatile(R"( + sans 1 + sync 0x1 + )"); + } +} + diff --git a/src/ipu/ipu_texturetiles.h b/src/ipu/ipu_texturetiles.h index 1554824..91fd9fe 100644 --- a/src/ipu/ipu_texturetiles.h +++ b/src/ipu/ipu_texturetiles.h @@ -11,11 +11,25 @@ extern "C" { #include "ipu_utils.h" +typedef struct { + int texture, column; +} IPUColRequest_t; -__SUPER__ void IPU_R_InitColumnRequester(void); + +extern unsigned* tileLocalProgBuf; +extern unsigned* tileLocalTextureBuf; + + +__SUPER__ void IPU_R_InitTextureTile(unsigned* progBuf, int progBufSize); + +__SUPER__ void IPU_R_InitColumnRequester(unsigned* progBuf, int progBufSize); __SUPER__ byte* IPU_R_RequestColumn(int texture, int column); +__SUPER__ void IPU_R_FulfilColumnRequest(unsigned* progBuf, unsigned* textureBuf); + +__SUPER__ void IPU_R_Sans(unsigned* progBuf, int progBufSize); + #ifdef __cplusplus diff --git a/src/ipu/r_codelets.cpp b/src/ipu/r_codelets.cpp index 18c4d80..070dcc5 100644 --- a/src/ipu/r_codelets.cpp +++ b/src/ipu/r_codelets.cpp @@ -20,6 +20,7 @@ extern "C" { struct R_Init_Vertex: public poplar::SupervisorVertex { + poplar::Output> progBuf; poplar::Input> miscValues; poplar::Input> lumpBuf; poplar::Output lumpNum; @@ -34,7 +35,7 @@ struct R_Init_Vertex: public poplar::SupervisorVertex { break; case 1: R_InitTextures((int*)&lumpBuf[0], (R_Init_MiscValues_t*)&miscValues[0]); - IPU_R_InitColumnRequester(); + IPU_R_InitColumnRequester(&progBuf[0], progBuf.size()); *lumpNum = 0; step = 0; @@ -44,19 +45,34 @@ struct R_Init_Vertex: public poplar::SupervisorVertex { }; -struct R_RenderPlayerView_Vertex : public poplar::SupervisorVertex { +struct +[[ +poplar::constraint("region(*nonExecutableDummy) != region(*progBuf)"), +poplar::constraint("elem(*textureCache) != elem(*progBuf)"), +]] +R_RenderPlayerView_Vertex : public poplar::SupervisorVertex { poplar::Input> miscValues; poplar::InOut> frame; + poplar::InOut> nonExecutableDummy; + poplar::InOut> progBuf; + poplar::InOut> textureCache; __SUPER__ void compute() { assert(&frame[0] == I_VideoBuffer); + tileLocalProgBuf = &progBuf[0]; + tileLocalTextureBuf = &textureCache[0]; + // TMP + textureCache[0] = -1; + textureCache[1] = 1701; + IPU_R_RenderPlayerView_UnpackMiscValues( (R_RenderPlayerView_MiscValues_t*) &miscValues[0] ); R_RenderPlayerView(&players[displayplayer]); - return ; + return; } }; @@ -71,4 +87,36 @@ class R_ExecuteSetViewSize_Vertex : public poplar::SupervisorVertex { ); R_ExecuteSetViewSize(); } +}; + + +struct +[[ +poplar::constraint("region(*dummy) != region(*progBuf)"), +poplar::constraint("elem(*textureBuf) != elem(*progBuf)"), +]] +R_FulfilColumnRequests_Vertex : public poplar::SupervisorVertex { + poplar::InOut> dummy; + poplar::InOut> progBuf; + poplar::Output> textureBuf; + + __SUPER__ void compute() { + IPU_R_FulfilColumnRequest(&progBuf[0], &textureBuf[0]); + } +}; + +struct R_InitTextureTile_Vertex : public poplar::SupervisorVertex { + poplar::Output> progBuf; + + __SUPER__ void compute() { + IPU_R_InitTextureTile(&progBuf[0], progBuf.size()); + } +}; + + +struct R_Sans_Vertex : public poplar::SupervisorVertex { + __SUPER__ void compute() { + IPU_R_Sans(NULL, NULL); + } }; \ No newline at end of file diff --git a/src/ipu_host.cpp b/src/ipu_host.cpp index a311858..fd7df21 100644 --- a/src/ipu_host.cpp +++ b/src/ipu_host.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include "i_video.h" #include "ipu/ipu_interface.h" @@ -74,6 +75,7 @@ IpuDoom::~IpuDoom(){}; void IpuDoom::buildIpuGraph() { m_ipuGraph.addCodelets("build/ipu_rt.gp"); + const size_t totalTiles = m_ipuDevice.getTarget().getNumTiles(); // ---- The main frame buffer ---- // poplar::Tensor ipuFrame = @@ -91,6 +93,15 @@ void IpuDoom::buildIpuGraph() { auto frameOutStream = m_ipuGraph.addDeviceToHostFIFO("frame-outstream", poplar::UNSIGNED_CHAR, SCREENWIDTH * SCREENHEIGHT); + // Stuff for exchange programs + poplar::Tensor nonExecutableDummy = m_ipuGraph.addVariable(poplar::INT, {totalTiles, 1}, "nonExecutableDummy"); + poplar::Tensor progBuf = m_ipuGraph.addVariable(poplar::UNSIGNED_INT, {totalTiles, IPUPROGBUFSIZE}, "progBuf"); + for (unsigned t = 0; t < totalTiles; ++t) { + m_ipuGraph.setTileMapping(nonExecutableDummy[t], t); + m_ipuGraph.setTileMapping(progBuf[t], t); + } + + // -------- AM_Drawer_CS ------ // poplar::ComputeSet AM_Drawer_CS = m_ipuGraph.addComputeSet("AM_Drawer_CS"); @@ -148,11 +159,25 @@ void IpuDoom::buildIpuGraph() { poplar::ComputeSet R_Init_CS = m_ipuGraph.addComputeSet("R_Init_CS"); for (int renderTile = 0; renderTile < IPUNUMRENDERTILES; ++renderTile) { + int logicalTile = IPUFIRSTRENDERTILE + renderTile; vtx = m_ipuGraph.addVertex(R_Init_CS, "R_Init_Vertex", { - {"lumpNum", m_lumpNum[renderTile]}, {"lumpBuf", lumpBuf}, {"miscValues", m_miscValuesBuf}}); - m_ipuGraph.setTileMapping(vtx, renderTile + IPUFIRSTRENDERTILE); + {"lumpNum", m_lumpNum[renderTile]}, + {"lumpBuf", lumpBuf}, + {"miscValues", m_miscValuesBuf}, + {"progBuf", progBuf[logicalTile]} + }); + m_ipuGraph.setTileMapping(vtx, logicalTile); m_ipuGraph.setPerfEstimate(vtx, 100); } + poplar::ComputeSet R_InitTextureTile_CS = m_ipuGraph.addComputeSet("R_InitTextureTile_CS"); + for (int textureTile = 0; textureTile < IPUNUMTEXTURETILES; ++textureTile) { + int logicalTile = IPUFIRSTTEXTURETILE + textureTile; + vtx = m_ipuGraph.addVertex(R_InitTextureTile_CS, "R_InitTextureTile_Vertex", { + {"progBuf", progBuf[logicalTile]}, + }); + m_ipuGraph.setTileMapping(vtx, logicalTile); + m_ipuGraph.setPerfEstimate(vtx, 2000); + } poplar::HostFunction requestLumpFromHost = m_ipuGraph.addHostFunction( "requestLumpFromHost", @@ -161,14 +186,12 @@ void IpuDoom::buildIpuGraph() { ); poplar::program::Sequence R_Init_prog({ - poplar::program::Copy(miscValuesStream, m_miscValuesBuf), - poplar::program::Repeat(2, poplar::program::Sequence({ // <- number of R_Init_CS steps - poplar::program::Execute(R_Init_CS), - // poplar::program::Copy(m_lumpNum[0], lumpNumStream), // Only listen to first tile's requests - // poplar::program::Sync(poplar::SyncType::GLOBAL), // lumpnum must arrive before lump is loaded - // poplar::program::Copy(lumpBufStream, lumpBuf), - poplar::program::Call(requestLumpFromHost, {m_lumpNum[0]}, {lumpBuf}), - })), + poplar::program::Execute(R_InitTextureTile_CS), + poplar::program::Copy(miscValuesStream, m_miscValuesBuf), + poplar::program::Repeat(2, poplar::program::Sequence({ // <- number of R_Init_CS steps + poplar::program::Execute(R_Init_CS), + poplar::program::Call(requestLumpFromHost, {m_lumpNum[0]}, {lumpBuf}), + })), }); // ---------------- G_Ticker --------------// @@ -203,13 +226,14 @@ void IpuDoom::buildIpuGraph() { }); - // -------------- IPU Init setup (Happens after most CPU setup) ------------// + // -------------- IPU Init setup (Happens before most CPU setup) ------------// + // Initialising vtx that runs on every tile poplar::ComputeSet IPU_Init_CS = m_ipuGraph.addComputeSet("IPU_Init_CS"); - for (int renderTile = 0; renderTile < IPUNUMRENDERTILES; ++renderTile) { + for (unsigned tile = 0; tile < totalTiles; ++tile) { vtx = m_ipuGraph.addVertex(IPU_Init_CS, "IPU_Init_Vertex"); - m_ipuGraph.setTileMapping(vtx, renderTile + IPUFIRSTRENDERTILE); - m_ipuGraph.setPerfEstimate(vtx, 10000); + m_ipuGraph.setTileMapping(vtx, tile); + m_ipuGraph.setPerfEstimate(vtx, 1000); } poplar::program::Sequence IPU_Init_Prog({ poplar::program::Execute(IPU_Init_CS), @@ -248,18 +272,51 @@ void IpuDoom::buildIpuGraph() { // -------- R_RenderPlayerView_CS ------ // + poplar::Tensor textureBuf = m_ipuGraph.addVariable( + poplar::UNSIGNED_INT, + {IPUNUMTEXTURETILES, IPUTEXTURETILEBUFSIZE}, + "textureBuf"); + poplar::Tensor textureCache = m_ipuGraph.addVariable( + poplar::UNSIGNED_INT, + { IPUNUMRENDERTILES, + IPUNUMTEXTURECACHELINES * IPUTEXTURECACHELINESIZE }, + "textureBuf"); + poplar::ComputeSet R_RenderPlayerView_CS = m_ipuGraph.addComputeSet("R_RenderPlayerView_CS"); for (int renderTile = 0; renderTile < IPUNUMRENDERTILES; ++renderTile) { - vtx = m_ipuGraph.addVertex(R_RenderPlayerView_CS, "R_RenderPlayerView_Vertex", - {{"frame", ipuFrameSlices[renderTile]}, {"miscValues", m_miscValuesBuf}}); - m_ipuGraph.setTileMapping(vtx, renderTile + IPUFIRSTRENDERTILE); + int logicalTile = IPUFIRSTRENDERTILE + renderTile; + vtx = m_ipuGraph.addVertex(R_RenderPlayerView_CS, "R_RenderPlayerView_Vertex", { + {"frame", ipuFrameSlices[renderTile]}, + {"textureCache", textureCache[renderTile]}, + {"progBuf", progBuf[logicalTile]}, + {"nonExecutableDummy", nonExecutableDummy[logicalTile]}, + {"miscValues", m_miscValuesBuf}, + }); + m_ipuGraph.setTileMapping(vtx, logicalTile); + m_ipuGraph.setTileMapping(textureCache[renderTile], logicalTile); m_ipuGraph.setPerfEstimate(vtx, 10000000); } + for (int textureTile = 0; textureTile < IPUNUMTEXTURETILES; ++textureTile) { + int logicalTile = IPUFIRSTTEXTURETILE + textureTile; + vtx = m_ipuGraph.addVertex(R_RenderPlayerView_CS, "R_FulfilColumnRequests_Vertex", { + {"dummy", nonExecutableDummy[logicalTile]}, + {"textureBuf", textureBuf[textureTile]}, + {"progBuf", progBuf[logicalTile]}, + }); + m_ipuGraph.setTileMapping(vtx, logicalTile); + m_ipuGraph.setPerfEstimate(vtx, 1000); + m_ipuGraph.setTileMapping(textureBuf[textureTile], textureTile); + } + for (unsigned tile = IPUFIRSTTEXTURETILE + IPUNUMTEXTURETILES; tile < totalTiles; ++tile) { + m_ipuGraph.setTileMapping(m_ipuGraph.addVertex(R_RenderPlayerView_CS, "R_Sans_Vertex"), tile); + } poplar::program::Sequence R_RenderPlayerView_prog({ poplar::program::Copy(miscValuesStream, m_miscValuesBuf), poplar::program::Copy(frameInStream, ipuFrame), + poplar::program::Sync(poplar::SyncType::INTERNAL), poplar::program::Execute(R_RenderPlayerView_CS), + poplar::program::Sync(poplar::SyncType::INTERNAL), poplar::program::Copy(ipuFrame, frameOutStream), }); @@ -277,6 +334,7 @@ void IpuDoom::buildIpuGraph() { poplar::program::Execute(R_ExecuteSetViewSize_CS), }); + // ---------------- Final prog --------------// m_ipuEngine = std::make_unique(std::move(poplar::Engine(