Intermediate: First few columns of first frame fetched from texture tiles

This commit is contained in:
jndean
2023-10-18 17:00:52 +00:00
parent 899ff800bd
commit 65e35ef19b
6 changed files with 319 additions and 25 deletions
+1 -1
View File
@@ -1112,7 +1112,7 @@ void D_DoomMain(void) {
I_InitTimer();
printf("inited timer\n");
I_InitJoystick();
printf("inited joystick \n");
printf("init'd joystick \n");
I_InitSound(true);
printf("initted sound\n");
I_InitMusic();
+8
View File
@@ -18,11 +18,19 @@ extern "C" {
#define IPUMAXEVENTSPERTIC (5)
#define IPUAMMARKBUFSIZE (544)
#define IPUMAPPEDLINEUPDATES (2)
#define IPUPROGBUFSIZE (32)
#define IPUFIRSTRENDERTILE (0)
#define IPUNUMRENDERTILES (32)
#define IPUCOLSPERRENDERTILE (SCREENWIDTH / IPUNUMRENDERTILES)
#define IPUTEXTURETILESPERRENDERTILE (10)
#define IPUTEXTURETILEBUFSIZE (1024)
#define IPUFIRSTTEXTURETILE (IPUFIRSTRENDERTILE + IPUNUMRENDERTILES)
#define IPUNUMTEXTURETILES (IPUTEXTURETILESPERRENDERTILE * IPUNUMRENDERTILES)
#define IPUNUMTEXTURECACHELINES (1)
#define IPUTEXTURECACHELINESIZE (128 / sizeof(int))
#define IPUREDUCTIONTILE (IPUFIRSTTEXTURETILE + IPUNUMTEXTURETILES)
typedef struct {
+169 -3
View File
@@ -1,16 +1,89 @@
#include <poplar/TileConstants.hpp>
#include <poplar/Vertex.hpp>
#include "doomtype.h"
#include "ipu_interface.h"
#include "ipu_utils.h"
#include "ipu_texturetiles.h"
#include "../../xcom.hpp"
// Remeber! Copies of these vars exits independently on each tile
unsigned* tileLocalProgBuf;
unsigned* tileLocalTextureBuf;
int textureFetchCount = 0;
// -------- Components for the tiles that serve textures ------------ //
__SUPER__
void IPU_R_InitTextureTile(unsigned* progBuf, int progBufSize) {
// progBuf starts with a directory of 2 program offsets
unsigned* progHead = progBuf + 2;
unsigned* progEnd = &progBuf[progBufSize];
// Figure out which tiles are involved
int renderTile = XCOM_logical2physical((tileID - IPUFIRSTTEXTURETILE) / IPUTEXTURETILESPERRENDERTILE);
// First program receives the request
{
progBuf[0] = progHead - progBuf;
XCOMAssembler assembler;
int sendCycle = XCOM_WORSTSENDDELAY;
int muxCycle = sendCycle + XCOM_TimeToMux(renderTile, __builtin_ipu_get_tile_id());
int messageSize = sizeof(IPUColRequest_t) / sizeof(int);
assembler.addRecv(0, messageSize, renderTile, muxCycle);
progHead = assembler.assemble(progHead, progEnd - progHead);
progHead++; // This program returns control flow, so don't override the `br $m10`
}
// Second program sends the response
{
progBuf[1] = progHead - progBuf;
XCOMAssembler assembler;
int messageSize = IPUTEXTURECACHELINESIZE;
int recvCycle = XCOM_WORSTRECVDELAY;
int sendCycle = recvCycle - XCOM_TimeToMux(__builtin_ipu_get_tile_id(), renderTile);
int direction = XCOM_EastOrWest(__builtin_ipu_get_tile_id(), renderTile);
assembler.addSend(0, messageSize, direction, sendCycle);
progHead = assembler.assemble(progHead, progEnd - progHead);
progHead++;
}
}
__SUPER__
void IPU_R_FulfilColumnRequest(unsigned* progBuf, unsigned* textureBuf) {
// Start of buffer is a directory of programs
auto recvProgram = &progBuf[progBuf[0]];
auto sendProgram = &progBuf[progBuf[1]];
for(; textureFetchCount < 6; textureFetchCount++) {
XCOM_Execute(recvProgram, NULL, textureBuf);
for (int i = 0; i < IPUTEXTURECACHELINESIZE; i++) {
textureBuf[i] = 0x20202020;
}
XCOM_Execute(sendProgram, textureBuf, NULL);
}
}
// -------- Components for the tiles that request textures ------------ //
#define NUMCACHECOLS (20)
#define CACHECOLSIZE (128)
static byte columnCache[NUMCACHECOLS][CACHECOLSIZE];
static int textureTileLUT[IPUTEXTURETILESPERRENDERTILE];
static int muxInstructionOffset;
extern "C" __SUPER__ void IPU_R_InitColumnRequester(void) {
extern "C"
__SUPER__
void IPU_R_InitColumnRequester(unsigned* progBuf, int progBufSize) {
// TMP colours
for (int i = 0; i < NUMCACHECOLS; ++i) {
unsigned* col = (unsigned*) columnCache[i];
@@ -23,9 +96,102 @@ extern "C" __SUPER__ void IPU_R_InitColumnRequester(void) {
col[j] = packedColour;
}
}
// Figure out which tiles to talk to
int firstTextureTile = IPUFIRSTTEXTURETILE + (IPUTEXTURETILESPERRENDERTILE * (tileID - IPUFIRSTRENDERTILE));
for (int i = 0; i < IPUTEXTURETILESPERRENDERTILE; ++i) {
textureTileLUT[i] = XCOM_logical2physical(firstTextureTile + i);
}
// Prog Buf starts with directory of programs
unsigned* progHead = progBuf + 3;
unsigned* progEnd = &progBuf[progBufSize];
{
progBuf[0] = progHead - progBuf;
XCOMAssembler assembler;
int messageSize = sizeof(IPUColRequest_t) / sizeof(int);
assembler.addSend(0, messageSize, XCOM_BROADCAST, XCOM_WORSTSENDDELAY);
progHead = assembler.assemble(progHead, progEnd - progHead);
}
{
XCOMAssembler assembler;
int messageSize = IPUTEXTURECACHELINESIZE;
assembler.addRecv(0, messageSize, 0, XCOM_WORSTRECVDELAY);
unsigned* newProgHead = assembler.assemble(progHead, progEnd - progHead);
for (unsigned* inst = progHead; inst < newProgHead; ++inst) {
if ((*inst & 0xfc003fffu) == 0x64000000u) {
muxInstructionOffset = (inst - &progBuf[progBuf[0]]);
break;
}
}
progHead = newProgHead;
}
// Third program sends finished flag to flag reducer
// {
// progBuf[2] = progHead - progBuf;
// XCOMAssembler assembler;
// int messageSize = 1;
// int recvCycle = XCOM_WORSTRECVDELAY;
// int sendCycle = recvCycle - XCOM_TimeToMux(__builtin_ipu_get_tile_id(), renderTile);
// int direction = XCOM_EastOrWest(__builtin_ipu_get_tile_id(), renderTile);
// assembler.addSend(0, messageSize, direction, sendCycle);
// progHead = assembler.assemble(progHead, progEnd - progHead);
// progHead++;
// }
}
extern "C" __SUPER__ byte* IPU_R_RequestColumn(int texture, int column) {
extern "C"
__SUPER__
byte* IPU_R_RequestColumn(int texture, int column) {
// progBuff starts with a program directory
auto requestProg = &tileLocalProgBuf[tileLocalProgBuf[0]];
if (textureFetchCount++ < 6) {
int sourceTile = 0;
XCOM_PatchMuxAndExecute(
requestProg, // Prog
tileLocalTextureBuf, // Read offset
tileLocalTextureBuf, // Write offset
muxInstructionOffset, // Patch offset
textureTileLUT[sourceTile] // Mux value
);
return (byte*) tileLocalTextureBuf;
}
return columnCache[texture % NUMCACHECOLS];
}
}
// -------- Components for the sans tiles ------------ //
extern "C"
__SUPER__
void IPU_R_InitSansTile(unsigned* progBuf, int progBufSize) {
// TODO
XCOMAssembler assembler;
int srcTile = 0;
int sendCycle = XCOM_WORSTSENDDELAY;
int muxCycle = sendCycle + XCOM_TimeToMux(srcTile, __builtin_ipu_get_tile_id());
int messageSize = sizeof(IPUColRequest_t) / sizeof(int);
assembler.addRecv(0, messageSize, 0, muxCycle);
assembler.assemble(progBuf, progBufSize);
}
extern "C"
__SUPER__
void IPU_R_Sans(unsigned* progBuf, int progBufSize) {
(void) progBuf;
(void) progBufSize;
for(; textureFetchCount < 6; textureFetchCount++) {
asm volatile(R"(
sans 1
sync 0x1
)");
}
}
+15 -1
View File
@@ -11,11 +11,25 @@ extern "C" {
#include "ipu_utils.h"
typedef struct {
int texture, column;
} IPUColRequest_t;
__SUPER__ void IPU_R_InitColumnRequester(void);
extern unsigned* tileLocalProgBuf;
extern unsigned* tileLocalTextureBuf;
__SUPER__ void IPU_R_InitTextureTile(unsigned* progBuf, int progBufSize);
__SUPER__ void IPU_R_InitColumnRequester(unsigned* progBuf, int progBufSize);
__SUPER__ byte* IPU_R_RequestColumn(int texture, int column);
__SUPER__ void IPU_R_FulfilColumnRequest(unsigned* progBuf, unsigned* textureBuf);
__SUPER__ void IPU_R_Sans(unsigned* progBuf, int progBufSize);
#ifdef __cplusplus
+51 -3
View File
@@ -20,6 +20,7 @@ extern "C" {
struct R_Init_Vertex: public poplar::SupervisorVertex {
poplar::Output<poplar::Vector<unsigned>> progBuf;
poplar::Input<poplar::Vector<unsigned char>> miscValues;
poplar::Input<poplar::Vector<unsigned char>> lumpBuf;
poplar::Output<int> lumpNum;
@@ -34,7 +35,7 @@ struct R_Init_Vertex: public poplar::SupervisorVertex {
break; case 1:
R_InitTextures((int*)&lumpBuf[0], (R_Init_MiscValues_t*)&miscValues[0]);
IPU_R_InitColumnRequester();
IPU_R_InitColumnRequester(&progBuf[0], progBuf.size());
*lumpNum = 0;
step = 0;
@@ -44,19 +45,34 @@ struct R_Init_Vertex: public poplar::SupervisorVertex {
};
struct R_RenderPlayerView_Vertex : public poplar::SupervisorVertex {
struct
[[
poplar::constraint("region(*nonExecutableDummy) != region(*progBuf)"),
poplar::constraint("elem(*textureCache) != elem(*progBuf)"),
]]
R_RenderPlayerView_Vertex : public poplar::SupervisorVertex {
poplar::Input<poplar::Vector<unsigned char>> miscValues;
poplar::InOut<poplar::Vector<unsigned char>> frame;
poplar::InOut<poplar::Vector<
int, poplar::VectorLayout::SPAN, 4, true>> nonExecutableDummy;
poplar::InOut<poplar::Vector<unsigned>> progBuf;
poplar::InOut<poplar::Vector<unsigned>> textureCache;
__SUPER__ void compute() {
assert(&frame[0] == I_VideoBuffer);
tileLocalProgBuf = &progBuf[0];
tileLocalTextureBuf = &textureCache[0];
// TMP
textureCache[0] = -1;
textureCache[1] = 1701;
IPU_R_RenderPlayerView_UnpackMiscValues(
(R_RenderPlayerView_MiscValues_t*) &miscValues[0]
);
R_RenderPlayerView(&players[displayplayer]);
return ;
return;
}
};
@@ -71,4 +87,36 @@ class R_ExecuteSetViewSize_Vertex : public poplar::SupervisorVertex {
);
R_ExecuteSetViewSize();
}
};
struct
[[
poplar::constraint("region(*dummy) != region(*progBuf)"),
poplar::constraint("elem(*textureBuf) != elem(*progBuf)"),
]]
R_FulfilColumnRequests_Vertex : public poplar::SupervisorVertex {
poplar::InOut<poplar::Vector<
int, poplar::VectorLayout::SPAN, 4, true>> dummy;
poplar::InOut<poplar::Vector<unsigned>> progBuf;
poplar::Output<poplar::Vector<unsigned>> textureBuf;
__SUPER__ void compute() {
IPU_R_FulfilColumnRequest(&progBuf[0], &textureBuf[0]);
}
};
struct R_InitTextureTile_Vertex : public poplar::SupervisorVertex {
poplar::Output<poplar::Vector<unsigned>> progBuf;
__SUPER__ void compute() {
IPU_R_InitTextureTile(&progBuf[0], progBuf.size());
}
};
struct R_Sans_Vertex : public poplar::SupervisorVertex {
__SUPER__ void compute() {
IPU_R_Sans(NULL, NULL);
}
};
+75 -17
View File
@@ -11,6 +11,7 @@
#include <poplar/IPUModel.hpp>
#include <poplar/Program.hpp>
#include <poplar/HostFunctionCallback.hpp>
#include <poplar/SyncType.hpp>
#include "i_video.h"
#include "ipu/ipu_interface.h"
@@ -74,6 +75,7 @@ IpuDoom::~IpuDoom(){};
void IpuDoom::buildIpuGraph() {
m_ipuGraph.addCodelets("build/ipu_rt.gp");
const size_t totalTiles = m_ipuDevice.getTarget().getNumTiles();
// ---- The main frame buffer ---- //
poplar::Tensor ipuFrame =
@@ -91,6 +93,15 @@ void IpuDoom::buildIpuGraph() {
auto frameOutStream =
m_ipuGraph.addDeviceToHostFIFO("frame-outstream", poplar::UNSIGNED_CHAR, SCREENWIDTH * SCREENHEIGHT);
// Stuff for exchange programs
poplar::Tensor nonExecutableDummy = m_ipuGraph.addVariable(poplar::INT, {totalTiles, 1}, "nonExecutableDummy");
poplar::Tensor progBuf = m_ipuGraph.addVariable(poplar::UNSIGNED_INT, {totalTiles, IPUPROGBUFSIZE}, "progBuf");
for (unsigned t = 0; t < totalTiles; ++t) {
m_ipuGraph.setTileMapping(nonExecutableDummy[t], t);
m_ipuGraph.setTileMapping(progBuf[t], t);
}
// -------- AM_Drawer_CS ------ //
poplar::ComputeSet AM_Drawer_CS = m_ipuGraph.addComputeSet("AM_Drawer_CS");
@@ -148,11 +159,25 @@ void IpuDoom::buildIpuGraph() {
poplar::ComputeSet R_Init_CS = m_ipuGraph.addComputeSet("R_Init_CS");
for (int renderTile = 0; renderTile < IPUNUMRENDERTILES; ++renderTile) {
int logicalTile = IPUFIRSTRENDERTILE + renderTile;
vtx = m_ipuGraph.addVertex(R_Init_CS, "R_Init_Vertex", {
{"lumpNum", m_lumpNum[renderTile]}, {"lumpBuf", lumpBuf}, {"miscValues", m_miscValuesBuf}});
m_ipuGraph.setTileMapping(vtx, renderTile + IPUFIRSTRENDERTILE);
{"lumpNum", m_lumpNum[renderTile]},
{"lumpBuf", lumpBuf},
{"miscValues", m_miscValuesBuf},
{"progBuf", progBuf[logicalTile]}
});
m_ipuGraph.setTileMapping(vtx, logicalTile);
m_ipuGraph.setPerfEstimate(vtx, 100);
}
poplar::ComputeSet R_InitTextureTile_CS = m_ipuGraph.addComputeSet("R_InitTextureTile_CS");
for (int textureTile = 0; textureTile < IPUNUMTEXTURETILES; ++textureTile) {
int logicalTile = IPUFIRSTTEXTURETILE + textureTile;
vtx = m_ipuGraph.addVertex(R_InitTextureTile_CS, "R_InitTextureTile_Vertex", {
{"progBuf", progBuf[logicalTile]},
});
m_ipuGraph.setTileMapping(vtx, logicalTile);
m_ipuGraph.setPerfEstimate(vtx, 2000);
}
poplar::HostFunction requestLumpFromHost = m_ipuGraph.addHostFunction(
"requestLumpFromHost",
@@ -161,14 +186,12 @@ void IpuDoom::buildIpuGraph() {
);
poplar::program::Sequence R_Init_prog({
poplar::program::Copy(miscValuesStream, m_miscValuesBuf),
poplar::program::Repeat(2, poplar::program::Sequence({ // <- number of R_Init_CS steps
poplar::program::Execute(R_Init_CS),
// poplar::program::Copy(m_lumpNum[0], lumpNumStream), // Only listen to first tile's requests
// poplar::program::Sync(poplar::SyncType::GLOBAL), // lumpnum must arrive before lump is loaded
// poplar::program::Copy(lumpBufStream, lumpBuf),
poplar::program::Call(requestLumpFromHost, {m_lumpNum[0]}, {lumpBuf}),
})),
poplar::program::Execute(R_InitTextureTile_CS),
poplar::program::Copy(miscValuesStream, m_miscValuesBuf),
poplar::program::Repeat(2, poplar::program::Sequence({ // <- number of R_Init_CS steps
poplar::program::Execute(R_Init_CS),
poplar::program::Call(requestLumpFromHost, {m_lumpNum[0]}, {lumpBuf}),
})),
});
// ---------------- G_Ticker --------------//
@@ -203,13 +226,14 @@ void IpuDoom::buildIpuGraph() {
});
// -------------- IPU Init setup (Happens after most CPU setup) ------------//
// -------------- IPU Init setup (Happens before most CPU setup) ------------//
// Initialising vtx that runs on every tile
poplar::ComputeSet IPU_Init_CS = m_ipuGraph.addComputeSet("IPU_Init_CS");
for (int renderTile = 0; renderTile < IPUNUMRENDERTILES; ++renderTile) {
for (unsigned tile = 0; tile < totalTiles; ++tile) {
vtx = m_ipuGraph.addVertex(IPU_Init_CS, "IPU_Init_Vertex");
m_ipuGraph.setTileMapping(vtx, renderTile + IPUFIRSTRENDERTILE);
m_ipuGraph.setPerfEstimate(vtx, 10000);
m_ipuGraph.setTileMapping(vtx, tile);
m_ipuGraph.setPerfEstimate(vtx, 1000);
}
poplar::program::Sequence IPU_Init_Prog({
poplar::program::Execute(IPU_Init_CS),
@@ -248,18 +272,51 @@ void IpuDoom::buildIpuGraph() {
// -------- R_RenderPlayerView_CS ------ //
poplar::Tensor textureBuf = m_ipuGraph.addVariable(
poplar::UNSIGNED_INT,
{IPUNUMTEXTURETILES, IPUTEXTURETILEBUFSIZE},
"textureBuf");
poplar::Tensor textureCache = m_ipuGraph.addVariable(
poplar::UNSIGNED_INT,
{ IPUNUMRENDERTILES,
IPUNUMTEXTURECACHELINES * IPUTEXTURECACHELINESIZE },
"textureBuf");
poplar::ComputeSet R_RenderPlayerView_CS = m_ipuGraph.addComputeSet("R_RenderPlayerView_CS");
for (int renderTile = 0; renderTile < IPUNUMRENDERTILES; ++renderTile) {
vtx = m_ipuGraph.addVertex(R_RenderPlayerView_CS, "R_RenderPlayerView_Vertex",
{{"frame", ipuFrameSlices[renderTile]}, {"miscValues", m_miscValuesBuf}});
m_ipuGraph.setTileMapping(vtx, renderTile + IPUFIRSTRENDERTILE);
int logicalTile = IPUFIRSTRENDERTILE + renderTile;
vtx = m_ipuGraph.addVertex(R_RenderPlayerView_CS, "R_RenderPlayerView_Vertex", {
{"frame", ipuFrameSlices[renderTile]},
{"textureCache", textureCache[renderTile]},
{"progBuf", progBuf[logicalTile]},
{"nonExecutableDummy", nonExecutableDummy[logicalTile]},
{"miscValues", m_miscValuesBuf},
});
m_ipuGraph.setTileMapping(vtx, logicalTile);
m_ipuGraph.setTileMapping(textureCache[renderTile], logicalTile);
m_ipuGraph.setPerfEstimate(vtx, 10000000);
}
for (int textureTile = 0; textureTile < IPUNUMTEXTURETILES; ++textureTile) {
int logicalTile = IPUFIRSTTEXTURETILE + textureTile;
vtx = m_ipuGraph.addVertex(R_RenderPlayerView_CS, "R_FulfilColumnRequests_Vertex", {
{"dummy", nonExecutableDummy[logicalTile]},
{"textureBuf", textureBuf[textureTile]},
{"progBuf", progBuf[logicalTile]},
});
m_ipuGraph.setTileMapping(vtx, logicalTile);
m_ipuGraph.setPerfEstimate(vtx, 1000);
m_ipuGraph.setTileMapping(textureBuf[textureTile], textureTile);
}
for (unsigned tile = IPUFIRSTTEXTURETILE + IPUNUMTEXTURETILES; tile < totalTiles; ++tile) {
m_ipuGraph.setTileMapping(m_ipuGraph.addVertex(R_RenderPlayerView_CS, "R_Sans_Vertex"), tile);
}
poplar::program::Sequence R_RenderPlayerView_prog({
poplar::program::Copy(miscValuesStream, m_miscValuesBuf),
poplar::program::Copy(frameInStream, ipuFrame),
poplar::program::Sync(poplar::SyncType::INTERNAL),
poplar::program::Execute(R_RenderPlayerView_CS),
poplar::program::Sync(poplar::SyncType::INTERNAL),
poplar::program::Copy(ipuFrame, frameOutStream),
});
@@ -277,6 +334,7 @@ void IpuDoom::buildIpuGraph() {
poplar::program::Execute(R_ExecuteSetViewSize_CS),
});
// ---------------- Final prog --------------//
m_ipuEngine = std::make_unique<poplar::Engine>(std::move(poplar::Engine(