From 474608258310cb43f150decd9568010ed6124cb9 Mon Sep 17 00:00:00 2001 From: jndean Date: Wed, 25 Oct 2023 11:23:36 +0000 Subject: [PATCH] Texture blob moved to IPU --- src/ipu/ipu_interface.h | 2 +- src/ipu/ipu_texturetiles.cpp | 2 + src/ipu/ipu_texturetiles.h | 2 + src/ipu/r_codelets.cpp | 74 ++++++++++++++++-------------------- src/ipu_host.cpp | 47 ++++++++++++++++++----- src/ipu_transfer.h | 5 +++ src/r_data.c | 22 ++++------- src/r_main.c | 5 ++- 8 files changed, 91 insertions(+), 68 deletions(-) diff --git a/src/ipu/ipu_interface.h b/src/ipu/ipu_interface.h index 7b35a1c..3468d72 100644 --- a/src/ipu/ipu_interface.h +++ b/src/ipu/ipu_interface.h @@ -31,7 +31,7 @@ extern "C" { #define IPUNUMTEXTURETILES (IPUTEXTURETILESPERRENDERTILE * IPUNUMRENDERTILES) #define IPUNUMTEXTURECACHELINES (1) #define IPUTEXTURECACHELINESIZE (128 / sizeof(int)) - +#define IPUMAXNUMTEXTURES (130) #define IPUCOMMSBUFSIZE (IPUNUMRENDERTILES) diff --git a/src/ipu/ipu_texturetiles.cpp b/src/ipu/ipu_texturetiles.cpp index 4f5091f..40f48dd 100644 --- a/src/ipu/ipu_texturetiles.cpp +++ b/src/ipu/ipu_texturetiles.cpp @@ -14,6 +14,8 @@ unsigned* tileLocalProgBuf; unsigned* tileLocalCommsBuf; unsigned* tileLocalTextureBuf; +const int* tileLocalTextureRange; +const int* tileLocalTextureOffsets; // -------- Components for the tiles that serve textures ------------ // diff --git a/src/ipu/ipu_texturetiles.h b/src/ipu/ipu_texturetiles.h index a66507c..3cb6ee4 100644 --- a/src/ipu/ipu_texturetiles.h +++ b/src/ipu/ipu_texturetiles.h @@ -19,6 +19,8 @@ typedef struct { extern unsigned* tileLocalProgBuf; extern unsigned* tileLocalCommsBuf; extern unsigned* tileLocalTextureBuf; +extern const int* tileLocalTextureRange; +extern const int* tileLocalTextureOffsets; __SUPER__ void IPU_R_InitTextureTile(unsigned* progBuf, int progBufSize); diff --git a/src/ipu/r_codelets.cpp b/src/ipu/r_codelets.cpp index 23b70af..4116756 100644 --- a/src/ipu/r_codelets.cpp +++ b/src/ipu/r_codelets.cpp @@ -19,6 +19,19 @@ extern "C" { }; +class R_ExecuteSetViewSize_Vertex : public poplar::SupervisorVertex { + public: + poplar::Input> miscValues; + __SUPER__ + void compute() { + IPU_R_ExecuteSetViewSize_UnpackMiscValues( + (R_ExecuteSetViewSize_MiscValues_t*) &miscValues[0] + ); + R_ExecuteSetViewSize(); + } +}; + + struct R_Init_Vertex: public poplar::SupervisorVertex { poplar::Output> progBuf; @@ -46,29 +59,6 @@ struct R_Init_Vertex: public poplar::SupervisorVertex { } }; -struct R_Init_TT_Vertex: public poplar::SupervisorVertex { - - poplar::Input> miscValues; - poplar::Input> lumpBuf; - poplar::Output lumpNum; - - __SUPER__ - void compute() { - -static int step = 0; switch (step++) { case 0: - - *lumpNum = ((R_Init_MiscValues_t*)&miscValues[0])->TEXTURE1_lumpnum; - -break; case 1: - - R_InitTextures_TT((int*)&lumpBuf[0]); - - -*lumpNum = 0; step = 0; } - - } -}; - struct [[ @@ -102,20 +92,14 @@ R_RenderPlayerView_Vertex : public poplar::SupervisorVertex { } }; +struct R_InitTexture_Vertex : public poplar::SupervisorVertex { + poplar::Output> progBuf; -class R_ExecuteSetViewSize_Vertex : public poplar::SupervisorVertex { - public: - poplar::Input> miscValues; - __SUPER__ - void compute() { - IPU_R_ExecuteSetViewSize_UnpackMiscValues( - (R_ExecuteSetViewSize_MiscValues_t*) &miscValues[0] - ); - R_ExecuteSetViewSize(); + __SUPER__ void compute() { + IPU_R_InitTextureTile(&progBuf[0], progBuf.size()); } }; - struct [[ poplar::constraint("region(*dummy) != region(*progBuf)"), @@ -129,12 +113,28 @@ R_FulfilColumnRequests_Vertex : public poplar::SupervisorVertex { poplar::InOut> progBuf; poplar::InOut> commsBuf; poplar::Output> textureBuf; + // poplar::Input> textureOffsets; + // poplar::Input> textureRange; __SUPER__ void compute() { + // tileLocalTextureRange = &textureRange[0]; + // tileLocalTextureOffsets = &textureOffsets[0]; + IPU_R_FulfilColumnRequest(&progBuf[0], &textureBuf[0], &commsBuf[0]); } }; + +struct R_InitSans_Vertex : public poplar::SupervisorVertex { + poplar::Output> progBuf; + + __SUPER__ void compute() { + // Reuse IPU_R_InitTextureTile because the 'done' flag receiving program + // it compiles is perfectly valid for use by a sans tile + IPU_R_InitTextureTile(&progBuf[0], progBuf.size()); + } +}; + struct [[ poplar::constraint("region(*dummy) != region(*progBuf)"), @@ -150,11 +150,3 @@ R_Sans_Vertex : public poplar::SupervisorVertex { IPU_R_Sans(&progBuf[0], &commsBuf[0]); } }; - -struct R_InitTextureOrSans_Vertex : public poplar::SupervisorVertex { - poplar::Output> progBuf; - - __SUPER__ void compute() { - IPU_R_InitTextureTile(&progBuf[0], progBuf.size()); - } -}; \ No newline at end of file diff --git a/src/ipu_host.cpp b/src/ipu_host.cpp index 3210448..6f0b837 100644 --- a/src/ipu_host.cpp +++ b/src/ipu_host.cpp @@ -159,6 +159,15 @@ void IpuDoom::buildIpuGraph() { // -------- R_Init ------ // + poplar::Tensor textureBuf = m_ipuGraph.addVariable( + poplar::UNSIGNED_CHAR, + {IPUNUMTEXTURETILES, IPUTEXTURETILEBUFSIZE}, + "textureBuf"); + auto textureBufStream = m_ipuGraph.addHostToDeviceFIFO( + "textureBuf-stream", + poplar::UNSIGNED_CHAR, + IPUTEXTURETILESPERRENDERTILE * IPUTEXTURETILEBUFSIZE); + poplar::ComputeSet R_Init_CS = m_ipuGraph.addComputeSet("R_Init_CS"); for (int renderTile = 0; renderTile < IPUNUMRENDERTILES; ++renderTile) { int logicalTile = IPUFIRSTRENDERTILE + renderTile; @@ -171,9 +180,17 @@ void IpuDoom::buildIpuGraph() { m_ipuGraph.setTileMapping(vtx, logicalTile); m_ipuGraph.setPerfEstimate(vtx, 100); } - poplar::ComputeSet R_InitTextureOrSans_CS = m_ipuGraph.addComputeSet("R_InitTextureOrSans_CS"); - for (unsigned tile = IPUFIRSTTEXTURETILE; tile < totalTiles; ++tile) { - vtx = m_ipuGraph.addVertex(R_InitTextureOrSans_CS, "R_InitTextureOrSans_Vertex", { + poplar::ComputeSet R_InitTextureAndSans_CS = m_ipuGraph.addComputeSet("R_InitTextureAndSans_CS"); + for (unsigned textureTile = 0; textureTile < IPUNUMTEXTURETILES; ++textureTile) { + int logicalTile = IPUFIRSTTEXTURETILE + textureTile; + vtx = m_ipuGraph.addVertex(R_InitTextureAndSans_CS, "R_InitTexture_Vertex", { + {"progBuf", progBuf[logicalTile]}, + }); + m_ipuGraph.setTileMapping(vtx, logicalTile); + m_ipuGraph.setPerfEstimate(vtx, 2000); + } + for (unsigned tile = IPUFIRSTTEXTURETILE + IPUNUMTEXTURETILES; tile < totalTiles; ++tile) { + vtx = m_ipuGraph.addVertex(R_InitTextureAndSans_CS, "R_InitSans_Vertex", { {"progBuf", progBuf[tile]}, }); m_ipuGraph.setTileMapping(vtx, tile); @@ -187,8 +204,9 @@ void IpuDoom::buildIpuGraph() { ); poplar::program::Sequence R_Init_prog({ - poplar::program::Execute(R_InitTextureOrSans_CS), poplar::program::Copy(miscValuesStream, m_miscValuesBuf), + poplar::program::Copy(textureBufStream, textureBuf.slice(0, IPUTEXTURETILESPERRENDERTILE)), + poplar::program::Execute(R_InitTextureAndSans_CS), poplar::program::Repeat(2, poplar::program::Sequence({ // <- number of R_Init_CS steps poplar::program::Execute(R_Init_CS), poplar::program::Call(requestLumpFromHost, {m_lumpNum[0]}, {lumpBuf}), @@ -273,21 +291,24 @@ void IpuDoom::buildIpuGraph() { // -------- R_RenderPlayerView_CS ------ // - poplar::Tensor textureBuf = m_ipuGraph.addVariable( - poplar::UNSIGNED_CHAR, - {IPUNUMTEXTURETILES, IPUTEXTURETILEBUFSIZE}, - "textureBuf"); poplar::Tensor textureCache = m_ipuGraph.addVariable( poplar::UNSIGNED_INT, { IPUNUMRENDERTILES, IPUNUMTEXTURECACHELINES * IPUTEXTURECACHELINESIZE}, "textureCache"); + poplar::Tensor textureOffsets = m_ipuGraph.addVariable(poplar::INT, {IPUMAXNUMTEXTURES}, "textureOffsets"); + poplar::Tensor textureRanges = m_ipuGraph.addVariable(poplar::INT, {IPUTEXTURETILESPERRENDERTILE + 1}, "textureRanges"); + auto textureOffsetStream = m_ipuGraph.addHostToDeviceFIFO("textureOffsets-stream", poplar::INT, IPUMAXNUMTEXTURES); + auto textureRangeStream = m_ipuGraph.addHostToDeviceFIFO("textureRanges-stream", poplar::INT, IPUTEXTURETILESPERRENDERTILE + 1); + m_ipuGraph.setTileMapping(textureOffsets, IPUFIRSTTEXTURETILE); + m_ipuGraph.setTileMapping(textureRanges, IPUFIRSTTEXTURETILE); + poplar::ComputeSet R_RenderPlayerView_CS = m_ipuGraph.addComputeSet("R_RenderPlayerView_CS"); for (int renderTile = 0; renderTile < IPUNUMRENDERTILES; ++renderTile) { int logicalTile = IPUFIRSTRENDERTILE + renderTile; vtx = m_ipuGraph.addVertex(R_RenderPlayerView_CS, "R_RenderPlayerView_Vertex", { - {"frame", ipuFrameSlices[renderTile]}, + {"frame", ipuFrameSlices[renderTile]}, {"textureCache", textureCache[renderTile]}, {"progBuf", progBuf[logicalTile]}, {"commsBuf", commsBuf[logicalTile]}, @@ -300,11 +321,14 @@ void IpuDoom::buildIpuGraph() { } for (int textureTile = 0; textureTile < IPUNUMTEXTURETILES; ++textureTile) { int logicalTile = IPUFIRSTTEXTURETILE + textureTile; + // int textureStripeIdx = textureTile % IPUTEXTURETILESPERRENDERTILE; vtx = m_ipuGraph.addVertex(R_RenderPlayerView_CS, "R_FulfilColumnRequests_Vertex", { {"dummy", nonExecutableDummy[logicalTile]}, - {"textureBuf", textureBuf[textureTile]}, {"progBuf", progBuf[logicalTile]}, {"commsBuf", commsBuf[logicalTile]}, + {"textureBuf", textureBuf[textureTile]}, + // {"textureOffsets", textureOffsets}, + // {"textureRange", textureRanges.slice(textureStripeIdx, textureStripeIdx + 2)}, }); m_ipuGraph.setTileMapping(vtx, logicalTile); m_ipuGraph.setPerfEstimate(vtx, 1000); @@ -361,6 +385,8 @@ void IpuDoom::buildIpuGraph() { m_ipuEngine->connectStream("miscValues-stream", m_miscValuesBuf_h); m_ipuEngine->connectStream("lumpNum-stream", &m_lumpNum_h); + m_ipuEngine->connectStream("textureOffsets-stream", ipuTextureBlobOffsets); + m_ipuEngine->connectStream("textureRanges-stream", ipuTextureBlobRanges); // Connect frame-instream/outstream later in run_IPU_MiscSetup because // I_VideoBuffer is initialised quite late @@ -411,6 +437,7 @@ void IpuDoom::run_R_ExecuteSetViewSize() { m_ipuEngine->run(6); } void IpuDoom::run_R_Init() { + m_ipuEngine->connectStream("textureBuf-stream", ipuTextureBlob); IPU_R_Init_PackMiscValues(m_miscValuesBuf_h); m_ipuEngine->run(7); } diff --git a/src/ipu_transfer.h b/src/ipu_transfer.h index 65ca0e8..d0560e4 100644 --- a/src/ipu_transfer.h +++ b/src/ipu_transfer.h @@ -8,6 +8,11 @@ extern "C" { #include "doomtype.h" #include "r_defs.h" +#include "ipu/ipu_interface.h" + +extern byte* ipuTextureBlob; +extern int ipuTextureBlobOffsets[IPUMAXNUMTEXTURES]; +extern int ipuTextureBlobRanges[IPUTEXTURETILESPERRENDERTILE + 1]; void IPU_G_LoadLevel_PackMiscValues(void* buf); void IPU_G_Ticker_PackMiscValues(void* buf); diff --git a/src/r_data.c b/src/r_data.c index b6349c2..43d2dbb 100644 --- a/src/r_data.c +++ b/src/r_data.c @@ -354,18 +354,22 @@ byte *R_GetColumn_Original(int tex, int col) { // JOSEF: Renamed to `_Original` // JOSEF: We dedicate several tiles on the IPU just to storing textures, // so we can afford to preassemble all textures into a big blob byte* ipuTextureBlob; -int* ipuTextureBlobOffsets; +int ipuTextureBlobOffsets[IPUMAXNUMTEXTURES]; int ipuTextureBlobRanges[IPUTEXTURETILESPERRENDERTILE + 1]; + void GenerateIPUTextureBlob(void) { + + if (numtextures >= IPUMAXNUMTEXTURES) { + I_Error("GenerateIPUTextureBlob: numtextures >= IPUMAXNUMTEXTURES"); + } ipuTextureBlob = malloc(IPUTEXTURETILEBUFSIZE * IPUTEXTURETILESPERRENDERTILE); - ipuTextureBlobOffsets = malloc(sizeof(*ipuTextureBlobOffsets) * numtextures); ipuTextureBlobRanges[0] = 0; int tile = 0, pos = 0; + for (int t = 0; t < numtextures; ++t) { int tex_width = textures[t]->width; int tex_height = textures[t]->height; int tex_size = tex_width * tex_height; - if (pos + tex_size >= IPUTEXTURETILEBUFSIZE) { pos = 0; tile += 1; @@ -384,13 +388,13 @@ void GenerateIPUTextureBlob(void) { pos += tex_size; } ipuTextureBlobRanges[tile + 1] = numtextures; - } // // R_GetColumn : JOSEF: The version that uses the IPU texture dump // byte *R_GetColumn(int tex, int col) { + // return R_GetColumn_Original(tex, col); // For checking old behaviour int tile; col &= texturewidthmask[tex]; for (tile = 0; tile < IPUTEXTURETILESPERRENDERTILE; ++tile) { @@ -551,8 +555,6 @@ void R_InitTextures(void) { printf("\b"); } - int totaltexturesize = 0; // JOSEF TMP - for (i = 0; i < numtextures; i++, directory++) { if (!(i & 63)) printf("."); @@ -580,8 +582,6 @@ void R_InitTextures(void) { texture->height = SHORT(mtexture->height); texture->patchcount = SHORT(mtexture->patchcount); - totaltexturesize += texture->width * texture->height; // JOSEF TMP - memcpy(texture->name, mtexture->name, sizeof(texture->name)); mpatch = &mtexture->patches[0]; patch = &texture->patches[0]; @@ -609,12 +609,6 @@ void R_InitTextures(void) { totalwidth += texture->width; } - printf("\n JOSEF: Total Texture Size = %d bytes (%dKb), numtextures = %d\n", - totaltexturesize, - totaltexturesize / 1000, - numtextures - ); - Z_Free(patchlookup); W_ReleaseLumpName(("TEXTURE1")); diff --git a/src/r_main.c b/src/r_main.c index 608a07d..47121da 100644 --- a/src/r_main.c +++ b/src/r_main.c @@ -646,7 +646,6 @@ void R_ExecuteSetViewSize(void) { // void R_Init(void) { - IPU_R_Init(); R_InitData(); printf("."); @@ -666,6 +665,8 @@ void R_Init(void) { printf("."); framecount = 0; + + IPU_R_Init(); } // @@ -741,7 +742,7 @@ void R_RenderPlayerView(player_t *player) { // The head node is the last node output. R_RenderBSPNode(numnodes - 1); - // IPU_R_RenderPlayerView(); + IPU_R_RenderPlayerView(); // Check for new console commands. NetUpdate();