mirror of
https://github.com/jndean/IPUDOOM.git
synced 2026-03-21 23:19:47 +00:00
Texture exchange functional
This commit is contained in:
@@ -20,6 +20,7 @@ extern "C" {
|
||||
#define IPUMAPPEDLINEUPDATES (2)
|
||||
#define IPUPROGBUFSIZE (128)
|
||||
|
||||
|
||||
#define IPUFIRSTRENDERTILE (0)
|
||||
#define IPUNUMRENDERTILES (32)
|
||||
#define IPUCOLSPERRENDERTILE (SCREENWIDTH / IPUNUMRENDERTILES)
|
||||
@@ -30,7 +31,9 @@ extern "C" {
|
||||
#define IPUNUMTEXTURETILES (IPUTEXTURETILESPERRENDERTILE * IPUNUMRENDERTILES)
|
||||
#define IPUNUMTEXTURECACHELINES (1)
|
||||
#define IPUTEXTURECACHELINESIZE (128 / sizeof(int))
|
||||
#define IPUREDUCTIONTILE (IPUFIRSTTEXTURETILE + IPUNUMTEXTURETILES)
|
||||
|
||||
// #define IPUREDUCTIONPERIOD (2)
|
||||
#define IPUCOMMSBUFSIZE (IPUNUMRENDERTILES)
|
||||
|
||||
|
||||
typedef struct {
|
||||
|
||||
@@ -12,16 +12,19 @@
|
||||
|
||||
// Remeber! Copies of these vars exits independently on each tile
|
||||
unsigned* tileLocalProgBuf;
|
||||
unsigned* tileLocalCommsBuf;
|
||||
unsigned* tileLocalTextureBuf;
|
||||
int textureFetchCount = 0;
|
||||
|
||||
const int tmpRepeatCount = 4;
|
||||
|
||||
// -------- Components for the tiles that serve textures ------------ //
|
||||
|
||||
extern "C"
|
||||
__SUPER__
|
||||
void IPU_R_InitTextureTile(unsigned* progBuf, int progBufSize) {
|
||||
// progBuf starts with a directory of 2 program offsets
|
||||
unsigned* progHead = progBuf + 2;
|
||||
// progBuf starts with a directory of 3 program offsets
|
||||
unsigned* progHead = progBuf + 3;
|
||||
unsigned* progEnd = &progBuf[progBufSize];
|
||||
|
||||
// Figure out which tiles are involved
|
||||
@@ -51,28 +54,51 @@ void IPU_R_InitTextureTile(unsigned* progBuf, int progBufSize) {
|
||||
progHead = assembler.assemble(progHead, progEnd - progHead);
|
||||
progHead++;
|
||||
}
|
||||
|
||||
// Third program receives the `done`
|
||||
{
|
||||
progBuf[2] = progHead - progBuf;
|
||||
XCOMAssembler assembler;
|
||||
int aggrTile = XCOM_logical2physical(IPUFIRSTRENDERTILE);
|
||||
int sendCycle = XCOM_WORSTSENDDELAY;
|
||||
int muxCycle = sendCycle + XCOM_TimeToMux(aggrTile, __builtin_ipu_get_tile_id());
|
||||
assembler.addRecv(0, 1, aggrTile, muxCycle);
|
||||
progHead = assembler.assemble(progHead, progEnd - progHead);
|
||||
progHead++;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C"
|
||||
__SUPER__
|
||||
void IPU_R_FulfilColumnRequest(unsigned* progBuf, unsigned* textureBuf) {
|
||||
void IPU_R_FulfilColumnRequest(unsigned* progBuf, unsigned* textureBuf, unsigned* commsBuf) {
|
||||
// Start of buffer is a directory of programs
|
||||
auto recvProgram = &progBuf[progBuf[0]];
|
||||
auto sendProgram = &progBuf[progBuf[1]];
|
||||
auto aggrProgram = &progBuf[progBuf[2]];
|
||||
|
||||
for(; textureFetchCount < 6; textureFetchCount++) {
|
||||
// for(; textureFetchCount < tmpRepeatCount; textureFetchCount++) {
|
||||
while (1) {
|
||||
|
||||
XCOM_Execute(recvProgram, NULL, textureBuf);
|
||||
|
||||
// Unpack received data
|
||||
unsigned textureNum = ((IPUColRequest_t*) textureBuf)->texture;
|
||||
unsigned colNum = ((IPUColRequest_t*) textureBuf)->column;
|
||||
|
||||
byte c1 = (textureNum * 9) % 256;
|
||||
byte c2 = (c1 + 1) % 256;
|
||||
byte c3 = (c1 + 2) % 256;
|
||||
byte c4 = (c1 + 1) % 256;
|
||||
unsigned colour = c1 | (c2 << 8) | (c3 << 16) | (c4 << 24);
|
||||
for (int i = 0; i < IPUTEXTURECACHELINESIZE; i++) {
|
||||
textureBuf[i] = 0x20202020;
|
||||
textureBuf[i] = colour;
|
||||
}
|
||||
XCOM_Execute(sendProgram, textureBuf, NULL);
|
||||
|
||||
// TMP
|
||||
asm volatile(R"(
|
||||
sans 1
|
||||
sync 0x1
|
||||
)");
|
||||
XCOM_Execute(aggrProgram, NULL, commsBuf);
|
||||
// if (tileID == 35) printf ("Texture flag: %d\n", commsBuf[0]);
|
||||
if (commsBuf[0])
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -80,28 +106,12 @@ void IPU_R_FulfilColumnRequest(unsigned* progBuf, unsigned* textureBuf) {
|
||||
|
||||
// -------- Components for the tiles that request textures ------------ //
|
||||
|
||||
#define NUMCACHECOLS (20)
|
||||
#define CACHECOLSIZE (128)
|
||||
static byte columnCache[NUMCACHECOLS][CACHECOLSIZE];
|
||||
|
||||
static int textureTileLUT[IPUTEXTURETILESPERRENDERTILE];
|
||||
static int muxInstructionOffset;
|
||||
|
||||
extern "C"
|
||||
__SUPER__
|
||||
void IPU_R_InitColumnRequester(unsigned* progBuf, int progBufSize) {
|
||||
// TMP colours
|
||||
for (int i = 0; i < NUMCACHECOLS; ++i) {
|
||||
unsigned* col = (unsigned*) columnCache[i];
|
||||
byte c1 = (i * 8) % 256;
|
||||
byte c2 = (c1 + 1) % 256;
|
||||
byte c3 = (c1 + 2) % 256;
|
||||
byte c4 = (c1 + 1) % 256;
|
||||
unsigned packedColour = c1 | (c2 << 8) | (c3 << 16) | (c4 << 24);
|
||||
for (int j = 0; j < CACHECOLSIZE / 4; j++) {
|
||||
col[j] = packedColour;
|
||||
}
|
||||
}
|
||||
|
||||
// Figure out which tiles to talk to
|
||||
int firstTextureTile = IPUFIRSTTEXTURETILE + (IPUTEXTURETILESPERRENDERTILE * (tileID - IPUFIRSTRENDERTILE));
|
||||
@@ -112,115 +122,151 @@ void IPU_R_InitColumnRequester(unsigned* progBuf, int progBufSize) {
|
||||
// ProgBuf starts with directory of programs
|
||||
unsigned* progHead = progBuf + 3;
|
||||
unsigned* progEnd = &progBuf[progBufSize];
|
||||
int aggrTile = XCOM_logical2physical(IPUFIRSTRENDERTILE);
|
||||
XCOMAssembler assembler;
|
||||
|
||||
// First Program: performs the request and receives the response
|
||||
// First Program: performs the column request and sends flags to aggrTile
|
||||
progBuf[0] = progHead - progBuf;
|
||||
int messageSize = sizeof(IPUColRequest_t) / sizeof(int);
|
||||
assembler.addSend(0, messageSize, XCOM_BROADCAST, XCOM_WORSTSENDDELAY);
|
||||
// Do the first step of the `done` flag aggregation
|
||||
if (__builtin_ipu_get_tile_id() == aggrTile) {
|
||||
for (int slot = 1; slot < IPUNUMRENDERTILES; ++slot) {
|
||||
int senderID = XCOM_logical2physical(IPUFIRSTRENDERTILE + slot);
|
||||
bool clearMux = slot == IPUNUMRENDERTILES - 1;
|
||||
int recvCycle = XCOM_WORSTRECVDELAY + XCOM_WORSTRECVDELAY + slot - 1;
|
||||
unsigned* recvAddr = (unsigned*)(slot * sizeof(unsigned));
|
||||
assembler.addRecv(recvAddr, 1, senderID, recvCycle, clearMux);
|
||||
}
|
||||
} else {
|
||||
int myTimeSlot = tileID - (IPUFIRSTRENDERTILE + 1);
|
||||
int direction = XCOM_EastOrWest(__builtin_ipu_get_tile_id(), aggrTile);
|
||||
int muxCycle = XCOM_WORSTRECVDELAY + XCOM_WORSTRECVDELAY + myTimeSlot;
|
||||
int sendCycle = muxCycle - XCOM_TimeToMux(__builtin_ipu_get_tile_id(), aggrTile);
|
||||
unsigned* addr = (unsigned*) sizeof(IPUColRequest_t);
|
||||
assembler.addSend(addr, 1, direction, sendCycle);
|
||||
}
|
||||
progHead = assembler.assemble(progHead, progEnd - progHead);
|
||||
progHead++;
|
||||
assembler.reset();
|
||||
|
||||
// Second Proggram: Receives the request response
|
||||
progBuf[1] = progHead - progBuf;
|
||||
assembler.addRecv(0, IPUTEXTURECACHELINESIZE, 0, XCOM_WORSTRECVDELAY);
|
||||
unsigned* newProgHead = assembler.assemble(progHead, progEnd - progHead);
|
||||
// Record the mux location for later live patching
|
||||
// Record the mux-setting instruction location for later live patching
|
||||
for (unsigned* inst = progHead; inst < newProgHead; ++inst) {
|
||||
if ((*inst & 0xfc003fffu) == 0x64000000u) {
|
||||
muxInstructionOffset = (inst - &progBuf[progBuf[0]]);
|
||||
muxInstructionOffset = (inst - &progBuf[progBuf[1]]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
progHead = newProgHead + 1; // +1 => don't overwrite the return statement
|
||||
assembler.reset();
|
||||
|
||||
// Second Program: aggregates the 'done' flag across render tiles
|
||||
progBuf[1] = progHead - progBuf;
|
||||
int aggrTile = XCOM_logical2physical(IPUFIRSTRENDERTILE);
|
||||
// Third Program: distributes the `done` flag
|
||||
progBuf[2] = progHead - progBuf;
|
||||
int sendCycle = XCOM_WORSTSENDDELAY;
|
||||
if (__builtin_ipu_get_tile_id() == aggrTile) {
|
||||
for (int slot = 0; slot < IPUNUMRENDERTILES - 1; ++slot) {
|
||||
int senderID = XCOM_logical2physical(IPUFIRSTRENDERTILE + 1 + slot);
|
||||
assembler.addRecv((unsigned*)(4*(slot + 1)), 1, senderID, XCOM_WORSTRECVDELAY + slot,
|
||||
slot == IPUNUMRENDERTILES - 2);
|
||||
}
|
||||
progHead = assembler.assemble(progHead, progEnd - progHead);
|
||||
progHead++;
|
||||
|
||||
// Third Program: aggregation tile distributes the answer
|
||||
progBuf[2] = progHead - progBuf;
|
||||
assembler.reset();
|
||||
assembler.addSend(0, 1, XCOM_BROADCAST, XCOM_WORSTSENDDELAY);
|
||||
progHead = assembler.assemble(progHead, progEnd - progHead);
|
||||
progHead++;
|
||||
|
||||
assembler.addSend(0, 1, XCOM_BROADCAST, sendCycle);
|
||||
} else {
|
||||
int myTimeSlot = tileID - (IPUFIRSTRENDERTILE + 1);
|
||||
int direction = XCOM_EastOrWest(__builtin_ipu_get_tile_id(), aggrTile);
|
||||
int muxCycle = XCOM_WORSTRECVDELAY + myTimeSlot;
|
||||
int sendCycle = muxCycle - XCOM_TimeToMux(__builtin_ipu_get_tile_id(), aggrTile);
|
||||
assembler.addSend(0, 1, direction, sendCycle);
|
||||
progHead = assembler.assemble(progHead, progEnd - progHead);
|
||||
|
||||
assembler.reset();
|
||||
sendCycle = XCOM_WORSTSENDDELAY;
|
||||
muxCycle = sendCycle + XCOM_TimeToMux(aggrTile, __builtin_ipu_get_tile_id());
|
||||
int muxCycle = sendCycle + XCOM_TimeToMux(aggrTile, __builtin_ipu_get_tile_id());
|
||||
assembler.addRecv(0, 1, aggrTile, muxCycle);
|
||||
progHead = assembler.assemble(progHead, progEnd - progHead);
|
||||
progHead++;
|
||||
}
|
||||
|
||||
// assembler.reset();
|
||||
progHead = assembler.assemble(progHead, progEnd - progHead);
|
||||
progHead++;
|
||||
assembler.reset();
|
||||
|
||||
}
|
||||
|
||||
|
||||
__SUPER__
|
||||
static bool checkRenderingFinished() {
|
||||
auto aggregateProgA = &tileLocalProgBuf[tileLocalProgBuf[1]];
|
||||
auto aggregateProgB = &tileLocalProgBuf[tileLocalProgBuf[2]];
|
||||
|
||||
tileLocalTextureBuf[0] = tileID + 100;
|
||||
XCOM_Execute(
|
||||
aggregateProgA,
|
||||
tileLocalTextureBuf,
|
||||
tileLocalTextureBuf
|
||||
);
|
||||
if (tileID == IPUFIRSTRENDERTILE) {
|
||||
for (int i = 1; i < IPUNUMRENDERTILES; i++) {
|
||||
tileLocalTextureBuf[0] += tileLocalTextureBuf[i];
|
||||
}
|
||||
XCOM_Execute(
|
||||
aggregateProgB,
|
||||
tileLocalTextureBuf,
|
||||
NULL
|
||||
);
|
||||
}
|
||||
printf("Aggregation result: %d\n", tileLocalTextureBuf[0]);
|
||||
|
||||
return tileLocalTextureBuf[0];
|
||||
}
|
||||
|
||||
extern "C"
|
||||
__SUPER__
|
||||
byte* IPU_R_RequestColumn(int texture, int column) {
|
||||
// progBuff starts with a program directory
|
||||
auto requestProg = &tileLocalProgBuf[tileLocalProgBuf[0]];
|
||||
auto aggregateProgA = &tileLocalProgBuf[tileLocalProgBuf[1]];
|
||||
auto aggregateProgB = &tileLocalProgBuf[tileLocalProgBuf[2]];
|
||||
auto receiveProg = &tileLocalProgBuf[tileLocalProgBuf[1]];
|
||||
auto aggregateProg = &tileLocalProgBuf[tileLocalProgBuf[2]];
|
||||
|
||||
if (textureFetchCount++ < 6) {
|
||||
int sourceTile = 0;
|
||||
XCOM_PatchMuxAndExecute(
|
||||
requestProg, // Prog
|
||||
tileLocalTextureBuf, // Read offset
|
||||
tileLocalTextureBuf, // Write offset
|
||||
muxInstructionOffset, // Patch offset
|
||||
textureTileLUT[sourceTile] // Mux value
|
||||
// Populate buffer with data to be exchanged
|
||||
IPUColRequest_t *request = (IPUColRequest_t*) tileLocalTextureBuf;
|
||||
request->texture = texture;
|
||||
request->column = column;
|
||||
*((unsigned*) &request[1]) = 0; // The Done Flag
|
||||
|
||||
XCOM_Execute(
|
||||
requestProg, // Prog
|
||||
tileLocalTextureBuf, // Read offset
|
||||
tileLocalCommsBuf // Write offset
|
||||
);
|
||||
|
||||
// aggrTile aggregates
|
||||
if (tileID == IPUFIRSTRENDERTILE) {
|
||||
tileLocalCommsBuf[0] = 0;
|
||||
}
|
||||
|
||||
int textureSourceTile = 0;
|
||||
XCOM_PatchMuxAndExecute(
|
||||
receiveProg, // Prog
|
||||
NULL, // Read offset
|
||||
tileLocalTextureBuf, // Write offset
|
||||
muxInstructionOffset, // Patch offset
|
||||
textureTileLUT[textureSourceTile] // Mux value
|
||||
);
|
||||
|
||||
XCOM_Execute(
|
||||
aggregateProg, // Prog
|
||||
tileLocalCommsBuf, // Read offset
|
||||
tileLocalCommsBuf // Write offset
|
||||
);
|
||||
|
||||
return (byte*) tileLocalTextureBuf;
|
||||
}
|
||||
|
||||
extern "C"
|
||||
__SUPER__
|
||||
void IPU_R_RenderTileDone() {
|
||||
// progBuff starts with a program directory
|
||||
auto requestProg = &tileLocalProgBuf[tileLocalProgBuf[0]];
|
||||
auto aggregateProg = &tileLocalProgBuf[tileLocalProgBuf[2]];
|
||||
|
||||
while (1) {
|
||||
|
||||
// Populate buffer with data to be exchanged
|
||||
IPUColRequest_t *request = (IPUColRequest_t*) tileLocalTextureBuf;
|
||||
request->texture = 0xffffffff;
|
||||
request->column = 0xffffffff;
|
||||
*((unsigned*) &request[1]) = 1; // The `done` Flag
|
||||
|
||||
XCOM_Execute(
|
||||
requestProg, // Prog
|
||||
tileLocalTextureBuf, // Read offset
|
||||
tileLocalCommsBuf // Write offset
|
||||
);
|
||||
|
||||
checkRenderingFinished();
|
||||
// aggrTile aggregates `done` flags
|
||||
unsigned aggr = true;
|
||||
if (tileID == IPUFIRSTRENDERTILE) {
|
||||
for (int i = 1; i < IPUNUMRENDERTILES; i++) {
|
||||
aggr &= tileLocalCommsBuf[i];
|
||||
}
|
||||
}
|
||||
tileLocalCommsBuf[0] = aggr;
|
||||
|
||||
return (byte*) tileLocalTextureBuf;
|
||||
// Don't bother receiving the response
|
||||
asm volatile(R"(
|
||||
sans 0
|
||||
sync 0x1
|
||||
)");
|
||||
|
||||
XCOM_Execute(
|
||||
aggregateProg, // Prog
|
||||
tileLocalCommsBuf, // Read offset
|
||||
tileLocalCommsBuf // Write offset
|
||||
);
|
||||
|
||||
if (tileLocalCommsBuf[0]) // Done flag
|
||||
return;
|
||||
}
|
||||
return columnCache[texture % NUMCACHECOLS];
|
||||
}
|
||||
|
||||
|
||||
@@ -228,36 +274,59 @@ byte* IPU_R_RequestColumn(int texture, int column) {
|
||||
// -------- Components for the sans tiles ------------ //
|
||||
|
||||
|
||||
extern "C"
|
||||
__SUPER__
|
||||
void IPU_R_InitSansTile(unsigned* progBuf, int progBufSize) {
|
||||
// TODO
|
||||
XCOMAssembler assembler;
|
||||
int srcTile = 0;
|
||||
int sendCycle = XCOM_WORSTSENDDELAY;
|
||||
int muxCycle = sendCycle + XCOM_TimeToMux(srcTile, __builtin_ipu_get_tile_id());
|
||||
int messageSize = sizeof(IPUColRequest_t) / sizeof(int);
|
||||
assembler.addRecv(0, messageSize, 0, muxCycle);
|
||||
assembler.assemble(progBuf, progBufSize);
|
||||
}
|
||||
|
||||
extern "C"
|
||||
__SUPER__
|
||||
void IPU_R_Sans(unsigned* progBuf, int progBufSize) {
|
||||
(void) progBuf;
|
||||
(void) progBufSize;
|
||||
void IPU_R_Sans(unsigned* progBuf, unsigned* commsBuf) {
|
||||
// Start of buffer is a directory of programs
|
||||
auto recvProgram = &progBuf[progBuf[0]];
|
||||
auto sendProgram = &progBuf[progBuf[1]];
|
||||
auto aggrProgram = &progBuf[progBuf[2]];
|
||||
|
||||
for(; textureFetchCount < 6; textureFetchCount++) {
|
||||
while (1) {
|
||||
asm volatile(R"(
|
||||
sans 1
|
||||
sync 0x1
|
||||
)");
|
||||
|
||||
// TMP
|
||||
asm volatile(R"(
|
||||
sans 1
|
||||
sync 0x1
|
||||
)");
|
||||
XCOM_Execute(aggrProgram, commsBuf, commsBuf);
|
||||
|
||||
if (commsBuf[0])
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
THE PLAN
|
||||
|
||||
Prog 0 {
|
||||
Render tiles send request from textureBuf[0:1], then
|
||||
- aggrTile receives flags into comms buf
|
||||
- the rest send done flag from textureBuf[2]
|
||||
Texture tiles receive request into texture buf
|
||||
Sans tiles sans x 2
|
||||
}
|
||||
|
||||
Render tiles patch mux address
|
||||
aggrTile aggregates flags into commsBuf[0]
|
||||
Texture tiles fetch coluns into textureBuf
|
||||
Sans tiles do nothing
|
||||
|
||||
Prog 1 {
|
||||
Render tiles receive cols into textureBuf
|
||||
TextureTiles send cols from texture buf
|
||||
Sans tiles do nothing (no sans)
|
||||
}
|
||||
|
||||
|
||||
Prog 2 {
|
||||
Render tiles receive doneFlag
|
||||
AggrTile sends done flag
|
||||
TextureTiles receive done flag
|
||||
Sans tiles receive done flag
|
||||
}
|
||||
|
||||
|
||||
|
||||
*/
|
||||
@@ -17,19 +17,17 @@ typedef struct {
|
||||
|
||||
|
||||
extern unsigned* tileLocalProgBuf;
|
||||
extern unsigned* tileLocalCommsBuf;
|
||||
extern unsigned* tileLocalTextureBuf;
|
||||
|
||||
|
||||
__SUPER__ void IPU_R_InitTextureTile(unsigned* progBuf, int progBufSize);
|
||||
|
||||
__SUPER__ void IPU_R_InitColumnRequester(unsigned* progBuf, int progBufSize);
|
||||
|
||||
__SUPER__ byte* IPU_R_RequestColumn(int texture, int column);
|
||||
|
||||
__SUPER__ void IPU_R_FulfilColumnRequest(unsigned* progBuf, unsigned* textureBuf);
|
||||
|
||||
__SUPER__ void IPU_R_Sans(unsigned* progBuf, int progBufSize);
|
||||
|
||||
__SUPER__ void IPU_R_FulfilColumnRequest(unsigned* progBuf, unsigned* textureBuf, unsigned* commsBuf);
|
||||
__SUPER__ void IPU_R_Sans(unsigned* progBuf, unsigned* commsBuf);
|
||||
__SUPER__ void IPU_R_RenderTileDone(void);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
||||
@@ -49,6 +49,8 @@ struct
|
||||
[[
|
||||
poplar::constraint("region(*nonExecutableDummy) != region(*progBuf)"),
|
||||
poplar::constraint("elem(*textureCache) != elem(*progBuf)"),
|
||||
poplar::constraint("elem(*textureCache) != elem(*commsBuf)"),
|
||||
poplar::constraint("elem(*progBuf) != elem(*commsBuf)"),
|
||||
]]
|
||||
R_RenderPlayerView_Vertex : public poplar::SupervisorVertex {
|
||||
poplar::Input<poplar::Vector<unsigned char>> miscValues;
|
||||
@@ -56,22 +58,21 @@ R_RenderPlayerView_Vertex : public poplar::SupervisorVertex {
|
||||
poplar::InOut<poplar::Vector<
|
||||
int, poplar::VectorLayout::SPAN, 4, true>> nonExecutableDummy;
|
||||
poplar::InOut<poplar::Vector<unsigned>> progBuf;
|
||||
poplar::InOut<poplar::Vector<unsigned>> commsBuf;
|
||||
poplar::InOut<poplar::Vector<unsigned>> textureCache;
|
||||
|
||||
__SUPER__ void compute() {
|
||||
assert(&frame[0] == I_VideoBuffer);
|
||||
tileLocalProgBuf = &progBuf[0];
|
||||
tileLocalCommsBuf = &commsBuf[0];
|
||||
tileLocalTextureBuf = &textureCache[0];
|
||||
// TMP
|
||||
textureCache[0] = -1;
|
||||
textureCache[1] = 1701;
|
||||
|
||||
|
||||
IPU_R_RenderPlayerView_UnpackMiscValues(
|
||||
(R_RenderPlayerView_MiscValues_t*) &miscValues[0]
|
||||
);
|
||||
|
||||
R_RenderPlayerView(&players[displayplayer]);
|
||||
IPU_R_RenderTileDone();
|
||||
return;
|
||||
}
|
||||
};
|
||||
@@ -92,31 +93,43 @@ class R_ExecuteSetViewSize_Vertex : public poplar::SupervisorVertex {
|
||||
|
||||
struct
|
||||
[[
|
||||
poplar::constraint("region(*dummy) != region(*progBuf)"),
|
||||
poplar::constraint("region(*dummy) != region(*progBuf)"),
|
||||
poplar::constraint("elem(*textureBuf) != elem(*progBuf)"),
|
||||
poplar::constraint("elem(*textureBuf) != elem(*commsBuf)"),
|
||||
poplar::constraint("elem(*progBuf) != elem(*commsBuf)"),
|
||||
]]
|
||||
R_FulfilColumnRequests_Vertex : public poplar::SupervisorVertex {
|
||||
poplar::InOut<poplar::Vector<
|
||||
int, poplar::VectorLayout::SPAN, 4, true>> dummy;
|
||||
poplar::InOut<poplar::Vector<unsigned>> progBuf;
|
||||
poplar::InOut<poplar::Vector<unsigned>> commsBuf;
|
||||
poplar::Output<poplar::Vector<unsigned>> textureBuf;
|
||||
|
||||
__SUPER__ void compute() {
|
||||
IPU_R_FulfilColumnRequest(&progBuf[0], &textureBuf[0]);
|
||||
IPU_R_FulfilColumnRequest(&progBuf[0], &textureBuf[0], &commsBuf[0]);
|
||||
}
|
||||
};
|
||||
|
||||
struct R_InitTextureTile_Vertex : public poplar::SupervisorVertex {
|
||||
struct
|
||||
[[
|
||||
poplar::constraint("region(*dummy) != region(*progBuf)"),
|
||||
poplar::constraint("elem(*progBuf) != elem(*commsBuf)"),
|
||||
]]
|
||||
R_Sans_Vertex : public poplar::SupervisorVertex {
|
||||
poplar::InOut<poplar::Vector<
|
||||
int, poplar::VectorLayout::SPAN, 4, true>> dummy;
|
||||
poplar::InOut<poplar::Vector<unsigned>> progBuf;
|
||||
poplar::InOut<poplar::Vector<unsigned>> commsBuf;
|
||||
|
||||
__SUPER__ void compute() {
|
||||
IPU_R_Sans(&progBuf[0], &commsBuf[0]);
|
||||
}
|
||||
};
|
||||
|
||||
struct R_InitTextureOrSans_Vertex : public poplar::SupervisorVertex {
|
||||
poplar::Output<poplar::Vector<unsigned>> progBuf;
|
||||
|
||||
__SUPER__ void compute() {
|
||||
IPU_R_InitTextureTile(&progBuf[0], progBuf.size());
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
struct R_Sans_Vertex : public poplar::SupervisorVertex {
|
||||
__SUPER__ void compute() {
|
||||
IPU_R_Sans(NULL, NULL);
|
||||
}
|
||||
};
|
||||
@@ -96,9 +96,11 @@ void IpuDoom::buildIpuGraph() {
|
||||
// Stuff for exchange programs
|
||||
poplar::Tensor nonExecutableDummy = m_ipuGraph.addVariable(poplar::INT, {totalTiles, 1}, "nonExecutableDummy");
|
||||
poplar::Tensor progBuf = m_ipuGraph.addVariable(poplar::UNSIGNED_INT, {totalTiles, IPUPROGBUFSIZE}, "progBuf");
|
||||
poplar::Tensor commsBuf = m_ipuGraph.addVariable(poplar::UNSIGNED_INT, {totalTiles, IPUCOMMSBUFSIZE}, "commsBuf");
|
||||
for (unsigned t = 0; t < totalTiles; ++t) {
|
||||
m_ipuGraph.setTileMapping(nonExecutableDummy[t], t);
|
||||
m_ipuGraph.setTileMapping(progBuf[t], t);
|
||||
m_ipuGraph.setTileMapping(commsBuf[t], t);
|
||||
}
|
||||
|
||||
|
||||
@@ -169,13 +171,12 @@ void IpuDoom::buildIpuGraph() {
|
||||
m_ipuGraph.setTileMapping(vtx, logicalTile);
|
||||
m_ipuGraph.setPerfEstimate(vtx, 100);
|
||||
}
|
||||
poplar::ComputeSet R_InitTextureTile_CS = m_ipuGraph.addComputeSet("R_InitTextureTile_CS");
|
||||
for (int textureTile = 0; textureTile < IPUNUMTEXTURETILES; ++textureTile) {
|
||||
int logicalTile = IPUFIRSTTEXTURETILE + textureTile;
|
||||
vtx = m_ipuGraph.addVertex(R_InitTextureTile_CS, "R_InitTextureTile_Vertex", {
|
||||
{"progBuf", progBuf[logicalTile]},
|
||||
poplar::ComputeSet R_InitTextureOrSans_CS = m_ipuGraph.addComputeSet("R_InitTextureOrSans_CS");
|
||||
for (unsigned tile = IPUFIRSTTEXTURETILE; tile < totalTiles; ++tile) {
|
||||
vtx = m_ipuGraph.addVertex(R_InitTextureOrSans_CS, "R_InitTextureOrSans_Vertex", {
|
||||
{"progBuf", progBuf[tile]},
|
||||
});
|
||||
m_ipuGraph.setTileMapping(vtx, logicalTile);
|
||||
m_ipuGraph.setTileMapping(vtx, tile);
|
||||
m_ipuGraph.setPerfEstimate(vtx, 2000);
|
||||
}
|
||||
|
||||
@@ -186,7 +187,7 @@ void IpuDoom::buildIpuGraph() {
|
||||
);
|
||||
|
||||
poplar::program::Sequence R_Init_prog({
|
||||
poplar::program::Execute(R_InitTextureTile_CS),
|
||||
poplar::program::Execute(R_InitTextureOrSans_CS),
|
||||
poplar::program::Copy(miscValuesStream, m_miscValuesBuf),
|
||||
poplar::program::Repeat(2, poplar::program::Sequence({ // <- number of R_Init_CS steps
|
||||
poplar::program::Execute(R_Init_CS),
|
||||
@@ -280,7 +281,7 @@ void IpuDoom::buildIpuGraph() {
|
||||
poplar::UNSIGNED_INT,
|
||||
{ IPUNUMRENDERTILES,
|
||||
IPUNUMTEXTURECACHELINES * IPUTEXTURECACHELINESIZE },
|
||||
"textureBuf");
|
||||
"textureCache");
|
||||
|
||||
poplar::ComputeSet R_RenderPlayerView_CS = m_ipuGraph.addComputeSet("R_RenderPlayerView_CS");
|
||||
for (int renderTile = 0; renderTile < IPUNUMRENDERTILES; ++renderTile) {
|
||||
@@ -289,6 +290,7 @@ void IpuDoom::buildIpuGraph() {
|
||||
{"frame", ipuFrameSlices[renderTile]},
|
||||
{"textureCache", textureCache[renderTile]},
|
||||
{"progBuf", progBuf[logicalTile]},
|
||||
{"commsBuf", commsBuf[logicalTile]},
|
||||
{"nonExecutableDummy", nonExecutableDummy[logicalTile]},
|
||||
{"miscValues", m_miscValuesBuf},
|
||||
});
|
||||
@@ -302,13 +304,19 @@ void IpuDoom::buildIpuGraph() {
|
||||
{"dummy", nonExecutableDummy[logicalTile]},
|
||||
{"textureBuf", textureBuf[textureTile]},
|
||||
{"progBuf", progBuf[logicalTile]},
|
||||
{"commsBuf", commsBuf[logicalTile]},
|
||||
});
|
||||
m_ipuGraph.setTileMapping(vtx, logicalTile);
|
||||
m_ipuGraph.setPerfEstimate(vtx, 1000);
|
||||
m_ipuGraph.setTileMapping(textureBuf[textureTile], textureTile);
|
||||
}
|
||||
for (unsigned tile = IPUFIRSTTEXTURETILE + IPUNUMTEXTURETILES; tile < totalTiles; ++tile) {
|
||||
m_ipuGraph.setTileMapping(m_ipuGraph.addVertex(R_RenderPlayerView_CS, "R_Sans_Vertex"), tile);
|
||||
vtx = m_ipuGraph.addVertex(R_RenderPlayerView_CS, "R_Sans_Vertex", {
|
||||
{"dummy", nonExecutableDummy[tile]},
|
||||
{"progBuf", progBuf[tile]},
|
||||
{"commsBuf", commsBuf[tile]},
|
||||
});
|
||||
m_ipuGraph.setTileMapping(vtx, tile);
|
||||
}
|
||||
// Cache line is used as the aggregation buffer, make sure it's big enough
|
||||
assert(IPUTEXTURECACHELINESIZE >= IPUNUMRENDERTILES);
|
||||
@@ -316,9 +324,7 @@ void IpuDoom::buildIpuGraph() {
|
||||
poplar::program::Sequence R_RenderPlayerView_prog({
|
||||
poplar::program::Copy(miscValuesStream, m_miscValuesBuf),
|
||||
poplar::program::Copy(frameInStream, ipuFrame),
|
||||
poplar::program::Sync(poplar::SyncType::INTERNAL),
|
||||
poplar::program::Execute(R_RenderPlayerView_CS),
|
||||
poplar::program::Sync(poplar::SyncType::INTERNAL),
|
||||
poplar::program::Copy(ipuFrame, frameOutStream),
|
||||
});
|
||||
|
||||
@@ -340,7 +346,8 @@ void IpuDoom::buildIpuGraph() {
|
||||
// ---------------- Final prog --------------//
|
||||
|
||||
m_ipuEngine = std::make_unique<poplar::Engine>(std::move(poplar::Engine(
|
||||
m_ipuGraph, {
|
||||
m_ipuGraph,
|
||||
{
|
||||
IPU_MiscSetup_Prog,
|
||||
G_DoLoadLevel_prog,
|
||||
G_Ticker_prog,
|
||||
@@ -350,7 +357,9 @@ void IpuDoom::buildIpuGraph() {
|
||||
R_ExecuteSetViewSize_prog,
|
||||
R_Init_prog,
|
||||
IPU_Init_Prog,
|
||||
})));
|
||||
},
|
||||
{{"opt.enableSkipSyncs", "false"}}
|
||||
)));
|
||||
|
||||
m_ipuEngine->connectStream("miscValues-stream", m_miscValuesBuf_h);
|
||||
m_ipuEngine->connectStream("lumpNum-stream", &m_lumpNum_h);
|
||||
|
||||
Reference in New Issue
Block a user