diff --git a/Makefile b/Makefile index 7355295..62a14ae 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,6 @@ CPU_OBJ = $(addprefix build/cpu_obj/, ipu_host.o ipu_transfer.o i_system.o d_mode.o i_main.o m_argv.o m_misc.o net_common.o net_dedicated.o net_io.o net_packet.o net_query.o net_sdl.o net_server.o net_structrw.o aes_prng.o d_event.o d_iwad.o d_loop.o gusconf.o i_cdmus.o i_input.o i_joystick.o i_sdlmusic.o i_sdlsound.o i_sound.o i_timer.o i_video.o i_videohr.o midifile.o mus2mid.o m_bbox.o m_cheat.o m_config.o m_controls.o m_fixed.o net_client.o sha1.o memio.o tables.o v_video.o w_checksum.o w_main.o w_wad.o w_file.o w_file_stdc.o w_file_posix.o w_merge.o z_zone.o net_loop.o am_map.o d_items.o d_main.o d_net.o doomdef.o doomstat.o dstrings.o f_wipe.o g_game.o hu_lib.o hu_stuff.o info.o m_menu.o m_random.o p_ceilng.o p_doors.o p_enemy.o p_floor.o p_inter.o p_lights.o p_map.o p_maputl.o p_mobj.o p_plats.o p_pspr.o p_saveg.o p_setup.o p_sight.o p_spec.o p_switch.o p_telept.o p_tick.o p_user.o r_bsp.o r_data.o r_draw.o r_main.o r_plane.o r_segs.o r_sky.o r_things.o s_sound.o sounds.o st_lib.o st_stuff.o statdump.o wi_stuff.o) IPU_OBJ = $(addprefix build/ipu_obj/, \ ipu_vertices.gp \ - ipu_print.gp \ ipu_transfer.gp \ ipu_malloc.gp \ i_video.gp \ @@ -37,6 +36,7 @@ CPU_FLAGS = -I src -I /usr/local/include/SDL2 -I/usr/include/libpng16 -I/opt/pop -O2 # -m32 IPU_FLAGS = -I src/ipu \ + --target=ipu2 \ -Wall -Werror -Wno-unused-variable \ -O2 diff --git a/README.md b/README.md index c0c1059..e134361 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ make # Download shareware resource pack wget https://distro.ibiblio.org/slitaz/sources/packages/d/doom1.wad # Run -./build/doom -iwad doom1.wad -width 320 -nosound +./build/doom -iwad doom1.wad -width 320 -nosound -nomouse ``` diff --git a/src/d_main.c b/src/d_main.c index 21ecef9..a33c661 100644 --- a/src/d_main.c +++ b/src/d_main.c @@ -206,9 +206,9 @@ void D_Display(void) { if (!gametic) break; - // if (automapactive) - // AM_Drawer(); - IPU_AM_Drawer(); // JOSEF + if (automapactive) + AM_Drawer(); + // IPU_AM_Drawer(); // JOSEF. Disabled while I split render tiles if (wipe || (viewheight != SCREENHEIGHT && fullscreen)) diff --git a/src/ipu/am_map.c b/src/ipu/am_map.c index df5d2ef..d197952 100644 --- a/src/ipu/am_map.c +++ b/src/ipu/am_map.c @@ -9,7 +9,6 @@ #include "tables.h" #include "v_video.h" -#include "ipu_print.h" #include "ipu_interface.h" @@ -467,7 +466,6 @@ void AM_Start(void) { } AM_initVariables(); // AM_loadPics(); // JOSEF: pics loaded once on startup - ipuprint("Starting automap"); } // diff --git a/src/ipu/g_game.c b/src/ipu/g_game.c index 1042127..c3c18f7 100644 --- a/src/ipu/g_game.c +++ b/src/ipu/g_game.c @@ -5,8 +5,6 @@ #include "g_game.h" #include "doomstat.h" -#include "ipu_print.h" - #define SAVEGAMESIZE 0x2c000 @@ -143,8 +141,6 @@ void G_Ticker(void) { int buf; ticcmd_t *cmd; - reset_ipuprint(); - /* LATER // do player reborns if needed for (i = 0; i < MAXPLAYERS; i++) diff --git a/src/ipu/ipu_interface.h b/src/ipu/ipu_interface.h index e0138e1..39b3556 100644 --- a/src/ipu/ipu_interface.h +++ b/src/ipu/ipu_interface.h @@ -13,12 +13,16 @@ extern "C" { #include "tables.h" -#define IPUMAXLUMPBYTES 32000 -#define IPUMISCVALUESSIZE 116 -#define IPUPRINTBUFSIZE 2048 -#define IPUMAXEVENTSPERTIC 5 -#define IPUAMMARKBUFSIZE 544 -#define IPUMAPPEDLINEUPDATES 2 +#define IPUMAXLUMPBYTES (32000) +#define IPUMISCVALUESSIZE (116) +#define IPUMAXEVENTSPERTIC (5) +#define IPUAMMARKBUFSIZE (544) +#define IPUMAPPEDLINEUPDATES (2) + +#define IPUFIRSTRENDERTILE (0) +#define IPUNUMRENDERTILES (8) +#define IPUCOLSPERRENDERTILE (SCREENWIDTH / IPUNUMRENDERTILES) + typedef struct { diff --git a/src/ipu/ipu_print.c b/src/ipu/ipu_print.c deleted file mode 100644 index 772c552..0000000 --- a/src/ipu/ipu_print.c +++ /dev/null @@ -1,60 +0,0 @@ -#include "ipu_interface.h" - -static char printbuf[IPUPRINTBUFSIZE]; -static char* printbuf_head = printbuf; -static const char* printbuf_end = &printbuf[IPUPRINTBUFSIZE - 1]; -static int printed; - -void reset_ipuprint() { - printbuf_head = printbuf; - *printbuf_head = '\0'; - printed = 0; -} - -void ipuprint(const char* str) { - while (*str != '\0' && printbuf_head != printbuf_end) { - *(printbuf_head++) = *(str++); - printed += 1; - } - *printbuf_head = '\0'; -} - -void ipuprintnum(int x) { - if (x < 0) { - *(printbuf_head++) = '-'; - x = -x; - } - if (x == 0) { - *(printbuf_head++) = '0'; - } - const int MAX_DIGITS = 10; - char digits[MAX_DIGITS]; - int i; - for (i = 0; x != 0 && i < MAX_DIGITS; ++i) { - digits[i] = 0x30 + (x % 10); - x /= 10; - } - for (i -= 1; i >= 0; --i) { - *(printbuf_head++) = digits[i]; - } - *printbuf_head = '\0'; -} - -void get_ipuprint_data(char* dst, int dst_size) { - int limit = (dst_size < IPUPRINTBUFSIZE) ? dst_size : IPUPRINTBUFSIZE; - for (int i = 0; i < limit; ++i) { - dst[i] = printbuf[i]; - } - if (limit <= printed) { - dst[IPUPRINTBUFSIZE - 10] = '['; - dst[IPUPRINTBUFSIZE - 9] = 'c'; - dst[IPUPRINTBUFSIZE - 8] = 'o'; - dst[IPUPRINTBUFSIZE - 7] = 'n'; - dst[IPUPRINTBUFSIZE - 6] = 't'; - dst[IPUPRINTBUFSIZE - 5] = '.'; - dst[IPUPRINTBUFSIZE - 4] = '.'; - dst[IPUPRINTBUFSIZE - 3] = '.'; - dst[IPUPRINTBUFSIZE - 2] = ']'; - dst[IPUPRINTBUFSIZE - 1] = '\0'; - } -} diff --git a/src/ipu/ipu_print.h b/src/ipu/ipu_print.h deleted file mode 100644 index 67f38af..0000000 --- a/src/ipu/ipu_print.h +++ /dev/null @@ -1,17 +0,0 @@ -# ifndef __IPU_PRINT_H__ -#define __IPU_PRINT_H__ -#ifdef __cplusplus -extern "C" { -#endif - - -void reset_ipuprint(); -void ipuprint(const char* str); -void ipuprintnum(int x); -void get_ipuprint_data(char* dst, int dst_size); - - -#ifdef __cplusplus -} -#endif -#endif // __IPU_PRINT_H__ \ No newline at end of file diff --git a/src/ipu/ipu_vertices.cpp b/src/ipu/ipu_vertices.cpp index 17f7e52..6e6f240 100644 --- a/src/ipu/ipu_vertices.cpp +++ b/src/ipu/ipu_vertices.cpp @@ -3,8 +3,8 @@ #include #include "doomtype.h" +#include "r_main.h" #include "i_video.h" -#include "ipu_print.h" typedef uint8_t pixel_t; @@ -37,32 +37,26 @@ class AM_Drawer_Vertex : public poplar::Vertex { }; -class IPU_GetPrintBuf_Vertex : public poplar::Vertex { - public: - poplar::Output> printbuf; +struct IPU_Init_Vertex : public poplar::SupervisorVertex { + poplar::InOut> frame; + + __attribute__((target("supervisor"))) + void compute() { + + I_VideoBuffer = &frame[0]; + + // Deduce logical tile ID + int physical = __builtin_ipu_get_tile_id(); + int row = physical / 64; + int subcolumn = physical % 64; + int column = subcolumn / 4; + int subtile = subcolumn % 4; + int logical = (92 * column) + (subtile / 2); + if (subtile & 1) { + row = 45 - row; + } + logical += 2 * row; + tileID = logical; - bool compute() { - get_ipuprint_data(&printbuf[0], printbuf.size()); - reset_ipuprint(); - return true; } -}; - - -// class G_Ticker_Vertex : public poplar::Vertex { Call G_ticker in D_ProcessEvents -// public: - -// bool compute() { -// return true; -// } -// }; - -class D_ProcessEvents_Vertex : public poplar::Vertex { - public: - - bool compute() { - // Unpack and process events - return true; - } -}; - +}; \ No newline at end of file diff --git a/src/ipu/m_fixed.c b/src/ipu/m_fixed.c index 30d81f8..6d37d17 100644 --- a/src/ipu/m_fixed.c +++ b/src/ipu/m_fixed.c @@ -22,7 +22,6 @@ #include #include "m_fixed.h" -#include @@ -38,6 +37,12 @@ FixedMul fixed_t b ) { return ((int64_t) a * (int64_t) b) >> FRACBITS; + // JOSEF: What about this? fewer inst. + // return (((a >> FRACBITS) * (b >> FRACBITS)) << FRACBITS) + // | (((a * b) >> FRACBITS) & 0xffff); + // OR: + // return (((a >> FRACBITS) * b) & 0xffff0000u) + // | ((a * b) >> FRACBITS); } diff --git a/src/ipu/m_fixed.h b/src/ipu/m_fixed.h index 1f824df..3a66c16 100644 --- a/src/ipu/m_fixed.h +++ b/src/ipu/m_fixed.h @@ -31,7 +31,6 @@ typedef int fixed_t; -int abs(int); // JOSEF fixed_t FixedMul (fixed_t a, fixed_t b); fixed_t FixedDiv (fixed_t a, fixed_t b); diff --git a/src/ipu/p_setup.c b/src/ipu/p_setup.c index e58a815..b1f27fe 100644 --- a/src/ipu/p_setup.c +++ b/src/ipu/p_setup.c @@ -9,7 +9,6 @@ #include "ipu_malloc.h" #include "ipu_transfer.h" -#include "ipu_print.h" #include "print.h" @@ -469,9 +468,6 @@ void P_SetupLevel_pt0(const unsigned char unused) { */ leveltime = 0; - reset_ipuprint(); - // ipuprint("Map starts at lump "); ipuprintnum(gamelumpnum); ipuprint("\n"); - return; // note: most of this ordering is important diff --git a/src/ipu/p_setup_codelets.cpp b/src/ipu/p_setup_codelets.cpp index d7b5826..41f2173 100644 --- a/src/ipu/p_setup_codelets.cpp +++ b/src/ipu/p_setup_codelets.cpp @@ -90,17 +90,12 @@ class P_SetupLevel_Vertex : public poplar::Vertex { // ------------ IPU_Setup ------------ // -class IPU_Setup_UnpackMarknumSprites_Vertex : public poplar::Vertex { +struct IPU_Setup_UnpackMarknumSprites_Vertex : public poplar::Vertex { poplar::Input> buf; - poplar::InOut> frame; - public: - bool compute() { + void compute() { IPU_Setup_UnpackMarkNums(&buf[0]); - // Initialise I_VideoBuffer here for now :) - I_VideoBuffer = &frame[0]; - - return true; + return; } }; \ No newline at end of file diff --git a/src/ipu/r_bsp.c b/src/ipu/r_bsp.c index 96f1803..941f1ed 100644 --- a/src/ipu/r_bsp.c +++ b/src/ipu/r_bsp.c @@ -35,6 +35,7 @@ //#include "r_local.h" +// #include "ipu_interface.h" #include seg_t *curline; diff --git a/src/ipu/r_codelets.cpp b/src/ipu/r_codelets.cpp index 32da5a4..f51faad 100644 --- a/src/ipu/r_codelets.cpp +++ b/src/ipu/r_codelets.cpp @@ -27,6 +27,7 @@ struct R_RenderPlayerView_Vertex : public poplar::Vertex { IPU_R_RenderPlayerView_UnpackMiscValues( (R_RenderPlayerView_MiscValues_t*) &miscValues[0] ); + R_RenderPlayerView(&players[displayplayer]); return ; } diff --git a/src/ipu/r_draw.c b/src/ipu/r_draw.c index d80bfbf..a62c401 100644 --- a/src/ipu/r_draw.c +++ b/src/ipu/r_draw.c @@ -40,6 +40,7 @@ */ #include +#include "ipu_interface.h" // ? #define MAXWIDTH 1120 @@ -112,6 +113,10 @@ void R_DrawColumn(void) { fixed_t frac; fixed_t fracstep; + // JOSEF: Each tile only renders a portion of the frame + if ((dc_x < tileLeftClip) || (dc_x >= tileRightClip)) + return; + count = dc_yh - dc_yl; // Zero length, column does not exceed a pixel. @@ -142,7 +147,7 @@ void R_DrawColumn(void) { // using a lighting/special effects LUT. *dest = colour; // LATER: dc_colormap[dc_source[(frac >> FRACBITS) & 127]]; // LATER - dest += SCREENWIDTH; + dest += IPUCOLSPERRENDERTILE; // JOSEF: SCREENWIDTH; // frac += fracstep; // LATER } while (count--); @@ -702,7 +707,7 @@ void R_InitBuffer(int width, int height) { // Column offset. For windows. for (i = 0; i < width; i++) - columnofs[i] = viewwindowx + i; + columnofs[i] = viewwindowx + i - tileLeftClip; // Samw with base row offset. if (width == SCREENWIDTH) @@ -711,8 +716,11 @@ void R_InitBuffer(int width, int height) { viewwindowy = (SCREENHEIGHT - SBARHEIGHT - height) >> 1; // Preclaculate all row offsets. - for (i = 0; i < height; i++) - ylookup[i] = I_VideoBuffer + (i + viewwindowy) * SCREENWIDTH; + for (i = 0; i < height; i++) { + // ylookup[i] = I_VideoBuffer + (i + viewwindowy) * SCREENWIDTH; + // JOSEF + ylookup[i] = I_VideoBuffer + (i + viewwindowy) * IPUCOLSPERRENDERTILE; + } } /* diff --git a/src/ipu/r_main.c b/src/ipu/r_main.c index 1ca50d1..620025e 100644 --- a/src/ipu/r_main.c +++ b/src/ipu/r_main.c @@ -43,6 +43,7 @@ #include "r_things.h" #include "tables.h" +#include "ipu_interface.h" #include "print.h" @@ -108,17 +109,25 @@ angle_t xtoviewangle[SCREENWIDTH + 1]; // bumped light from gun blasts int extralight; -// void (*colfunc)(void); +// void (*colfunc)(void); // JOSEF -void colfunc() { +void colfunc() { // JOSEF: Don't call through pointer, explicit call if (!detailshift) R_DrawColumn(); - // else R_DrawColumnLow(); // LATER + // else R_DrawColumnLow(); // LATER } void (*basecolfunc)(void); void (*fuzzcolfunc)(void); void (*transcolfunc)(void); void (*spanfunc)(void); + +// JOSEF +int tileID; +int tileLeftClip; +int tileRightClip; + +int abs(int); + // // R_AddPointToBox // Expand a given bbox @@ -601,7 +610,7 @@ void R_ExecuteSetViewSize(void) { /* // JOSEF: Removed. Avoid calls via pointer on IPU, - // instead check detailshift on every call + // instead check detailshift on every call of R_DrawColumn etc if (!detailshift) { colfunc = basecolfunc = R_DrawColumn; fuzzcolfunc = R_DrawFuzzColumn; @@ -615,10 +624,15 @@ void R_ExecuteSetViewSize(void) { } */ + // JOSEF + tileLeftClip = tileID * IPUCOLSPERRENDERTILE; + tileRightClip = tileLeftClip + IPUCOLSPERRENDERTILE; + R_InitBuffer(scaledviewwidth, viewheight); R_InitTextureMapping(); + /* LATER // psprite scales diff --git a/src/ipu/r_main.h b/src/ipu/r_main.h index 6885e53..efd868c 100644 --- a/src/ipu/r_main.h +++ b/src/ipu/r_main.h @@ -90,6 +90,12 @@ extern void (*fuzzcolfunc)(void); // No shadow effects on floors. extern void (*spanfunc)(void); + +// JOSEF +extern int tileID; +extern int tileLeftClip; +extern int tileRightClip; + // // Utility functions. int R_PointOnSide(fixed_t x, fixed_t y, node_t *node); diff --git a/src/ipu/r_segs.c b/src/ipu/r_segs.c index a3ee85a..f434895 100644 --- a/src/ipu/r_segs.c +++ b/src/ipu/r_segs.c @@ -92,6 +92,8 @@ lighttable_t **walllights; short *maskedtexturecol; +int abs(int); // JOSEF + /* // // R_RenderMaskedSegRange diff --git a/src/ipu/v_video.c b/src/ipu/v_video.c index 12fc71f..8e48fa8 100644 --- a/src/ipu/v_video.c +++ b/src/ipu/v_video.c @@ -40,7 +40,7 @@ #include "i_video.h" #include "v_video.h" -#include "ipu_print.h" +#include "print.h" // Blending table used for fuzzpatch, etc. // Only used in Heretic/Hexen @@ -161,7 +161,7 @@ void V_DrawPatch(int x, int y, patch_t *patch) || y < 0 || y + SHORT(patch->height) > SCREENHEIGHT) { - ipuprint("Bad V_DrawPatch!\n"); + printf("Bad V_DrawPatch!\n"); } /* LATER diff --git a/src/ipu_host.cpp b/src/ipu_host.cpp index 8af46b2..c10cb91 100644 --- a/src/ipu_host.cpp +++ b/src/ipu_host.cpp @@ -71,36 +71,26 @@ IpuDoom::~IpuDoom(){}; void IpuDoom::buildIpuGraph() { m_ipuGraph.addCodelets("build/ipu_rt.gp"); - // -------- GetPrintbuf_CS (helper) ------ // - - poplar::Tensor printbuf = - m_ipuGraph.addVariable(poplar::CHAR, {(ulong)IPUPRINTBUFSIZE}, "ipuprint_buf"); - m_ipuGraph.setTileMapping(printbuf, 0); - auto printbufOutStream = - m_ipuGraph.addDeviceToHostFIFO("printbuf-stream", poplar::CHAR, IPUPRINTBUFSIZE); - - poplar::ComputeSet GetPrintbuf_CS = m_ipuGraph.addComputeSet("GetPrintbuf_CS"); - poplar::VertexRef vtx = m_ipuGraph.addVertex(GetPrintbuf_CS, "IPU_GetPrintBuf_Vertex", {{"printbuf", printbuf}}); - m_ipuGraph.setTileMapping(vtx, 0); - m_ipuGraph.setPerfEstimate(vtx, IPUPRINTBUFSIZE); - - poplar::program::Sequence GetPrintbuf_prog({ - poplar::program::Execute(GetPrintbuf_CS), - poplar::program::Copy(printbuf, printbufOutStream), - }); - - // -------- AM_Drawer_CS ------ // - + // ---- The main frame buffer ---- // poplar::Tensor ipuFrame = - m_ipuGraph.addVariable(poplar::UNSIGNED_CHAR, {(ulong)SCREENWIDTH * SCREENHEIGHT}, "frame"); - m_ipuGraph.setTileMapping(ipuFrame, 0); + m_ipuGraph.addVariable(poplar::UNSIGNED_CHAR, {(ulong)SCREENHEIGHT, (ulong)SCREENWIDTH}, "frame"); + assert(IPUCOLSPERRENDERTILE * IPUNUMRENDERTILES == SCREENWIDTH); + std::vector ipuFrameSlices; + for (int renderTile = 0; renderTile < IPUNUMRENDERTILES; ++renderTile) { + ulong start = renderTile * IPUCOLSPERRENDERTILE; + ulong end = start + IPUCOLSPERRENDERTILE; + ipuFrameSlices.push_back(ipuFrame.slice({0, start}, {SCREENHEIGHT, end}).flatten()); + m_ipuGraph.setTileMapping(ipuFrameSlices.back(), renderTile + IPUFIRSTRENDERTILE); + } auto frameInStream = m_ipuGraph.addHostToDeviceFIFO("frame-instream", poplar::UNSIGNED_CHAR, SCREENWIDTH * SCREENHEIGHT); auto frameOutStream = m_ipuGraph.addDeviceToHostFIFO("frame-outstream", poplar::UNSIGNED_CHAR, SCREENWIDTH * SCREENHEIGHT); + // -------- AM_Drawer_CS ------ // + poplar::ComputeSet AM_Drawer_CS = m_ipuGraph.addComputeSet("AM_Drawer_CS"); - vtx = m_ipuGraph.addVertex(AM_Drawer_CS, "AM_Drawer_Vertex", {{"frame", ipuFrame}}); + auto vtx = m_ipuGraph.addVertex(AM_Drawer_CS, "AM_Drawer_Vertex", {{"frame", ipuFrameSlices[0]}}); m_ipuGraph.setTileMapping(vtx, 0); m_ipuGraph.setPerfEstimate(vtx, 10000000); @@ -113,52 +103,56 @@ void IpuDoom::buildIpuGraph() { // -------- IPU_G_DoLoadLevel ------ // m_miscValuesBuf = m_ipuGraph.addVariable(poplar::UNSIGNED_CHAR, {(ulong)IPUMISCVALUESSIZE}, "miscValues"); - m_ipuGraph.setTileMapping(m_miscValuesBuf, 0); + m_ipuGraph.setTileMapping(m_miscValuesBuf, IPUFIRSTRENDERTILE); auto miscValuesStream = m_ipuGraph.addHostToDeviceFIFO("miscValues-stream", poplar::UNSIGNED_CHAR, IPUMISCVALUESSIZE); poplar::ComputeSet G_DoLoadLevel_CS = m_ipuGraph.addComputeSet("G_DoLoadLevel_CS"); - vtx = m_ipuGraph.addVertex(G_DoLoadLevel_CS, "G_DoLoadLevel_Vertex", {{"miscValues", m_miscValuesBuf}}); - m_ipuGraph.setTileMapping(vtx, 0); - m_ipuGraph.setPerfEstimate(vtx, 10000000); + for (int renderTile = 0; renderTile < IPUNUMRENDERTILES; ++renderTile) { + vtx = m_ipuGraph.addVertex(G_DoLoadLevel_CS, "G_DoLoadLevel_Vertex", {{"miscValues", m_miscValuesBuf}}); + m_ipuGraph.setTileMapping(vtx, renderTile + IPUFIRSTRENDERTILE); + m_ipuGraph.setPerfEstimate(vtx, 10000000); + } - m_lumpNum = m_ipuGraph.addVariable(poplar::INT, {}, "lumpNum"); + m_lumpNum = m_ipuGraph.addVariable(poplar::INT, {IPUNUMRENDERTILES}, "lumpNum"); // Manually overlap lumbuf with framebuf, since they're not used at the same time assert(IPUMAXLUMPBYTES <= ipuFrame.numElements()); - poplar::Tensor lumpBuf = ipuFrame.slice(0, IPUMAXLUMPBYTES); - m_ipuGraph.setTileMapping(m_lumpNum, 0); + poplar::Tensor lumpBuf = ipuFrame.flatten().slice(0, IPUMAXLUMPBYTES); // TODO: Fix after frame split over multiple tiles auto lumpBufStream = m_ipuGraph.addHostToDeviceFIFO("lumpBuf-stream", poplar::UNSIGNED_CHAR, IPUMAXLUMPBYTES); auto lumpNumStream = m_ipuGraph.addDeviceToHostFIFO("lumpNum-stream", poplar::INT, 1); poplar::ComputeSet P_SetupLevel_CS = m_ipuGraph.addComputeSet("P_SetupLevel_CS"); - vtx = m_ipuGraph.addVertex(P_SetupLevel_CS, "P_SetupLevel_Vertex", { - {"lumpNum", m_lumpNum}, {"lumpBuf", lumpBuf}}); - m_ipuGraph.setTileMapping(vtx, 0); - m_ipuGraph.setPerfEstimate(vtx, 100); + for (int renderTile = 0; renderTile < IPUNUMRENDERTILES; ++renderTile) { + vtx = m_ipuGraph.addVertex(P_SetupLevel_CS, "P_SetupLevel_Vertex", { + {"lumpNum", m_lumpNum[renderTile]}, {"lumpBuf", lumpBuf}}); + m_ipuGraph.setTileMapping(vtx, renderTile + IPUFIRSTRENDERTILE); + m_ipuGraph.setPerfEstimate(vtx, 100); + m_ipuGraph.setTileMapping(m_lumpNum[renderTile], renderTile + IPUFIRSTRENDERTILE); + } poplar::program::Sequence G_DoLoadLevel_prog({ poplar::program::Copy(miscValuesStream, m_miscValuesBuf), poplar::program::Execute(G_DoLoadLevel_CS), - poplar::program::Repeat(11, poplar::program::Sequence({ + poplar::program::Repeat(11, poplar::program::Sequence({ // <- number of P_SetupLevel_CS steps poplar::program::Execute(P_SetupLevel_CS), - poplar::program::Copy(m_lumpNum, lumpNumStream), + poplar::program::Copy(m_lumpNum[0], lumpNumStream), // Only listen to first tile's requests poplar::program::Sync(poplar::SyncType::GLOBAL), // lumpnum must arrive before lump is loaded poplar::program::Copy(lumpBufStream, lumpBuf), })), - GetPrintbuf_prog, }); // ---------------- G_Ticker --------------// poplar::ComputeSet G_Ticker_CS = m_ipuGraph.addComputeSet("G_Ticker_CS"); - vtx = m_ipuGraph.addVertex(G_Ticker_CS, "G_Ticker_Vertex", {{"miscValues", m_miscValuesBuf}}); - m_ipuGraph.setTileMapping(vtx, 0); - m_ipuGraph.setPerfEstimate(vtx, 100); + for (int renderTile = 0; renderTile < IPUNUMRENDERTILES; ++renderTile) { + vtx = m_ipuGraph.addVertex(G_Ticker_CS, "G_Ticker_Vertex", {{"miscValues", m_miscValuesBuf}}); + m_ipuGraph.setTileMapping(vtx, renderTile + IPUFIRSTRENDERTILE); + m_ipuGraph.setPerfEstimate(vtx, 100); + } poplar::program::Sequence G_Ticker_prog({ poplar::program::Copy(miscValuesStream, m_miscValuesBuf), poplar::program::Execute(G_Ticker_CS), - GetPrintbuf_prog, }); @@ -166,44 +160,58 @@ void IpuDoom::buildIpuGraph() { poplar::ComputeSet G_Responder_CS = m_ipuGraph.addComputeSet("G_Responder_CS"); - vtx = m_ipuGraph.addVertex(G_Responder_CS, "G_Responder_Vertex", {{"miscValues", m_miscValuesBuf}}); - m_ipuGraph.setTileMapping(vtx, 0); - m_ipuGraph.setPerfEstimate(vtx, 100); + for (int renderTile = 0; renderTile < IPUNUMRENDERTILES; ++renderTile) { + vtx = m_ipuGraph.addVertex(G_Responder_CS, "G_Responder_Vertex", {{"miscValues", m_miscValuesBuf}}); + m_ipuGraph.setTileMapping(vtx, renderTile + IPUFIRSTRENDERTILE); + m_ipuGraph.setPerfEstimate(vtx, 100); + } poplar::program::Sequence G_Responder_prog({ poplar::program::Copy(miscValuesStream, m_miscValuesBuf), poplar::program::Execute(G_Responder_CS), - GetPrintbuf_prog, }); // -------------- IPU state setup ------------// + + poplar::ComputeSet IPU_Init_CS = m_ipuGraph.addComputeSet("IPU_Init_CS"); + for (int renderTile = 0; renderTile < IPUNUMRENDERTILES; ++renderTile) { + vtx = m_ipuGraph.addVertex(IPU_Init_CS, "IPU_Init_Vertex", + {{"frame", ipuFrameSlices[renderTile]}}); + m_ipuGraph.setTileMapping(vtx, renderTile + IPUFIRSTRENDERTILE); + m_ipuGraph.setPerfEstimate(vtx, 10000); + } + poplar::Tensor marknumSpriteBuf = m_ipuGraph.addVariable(poplar::UNSIGNED_CHAR, {(ulong)IPUAMMARKBUFSIZE}, "marknumSpriteBuf"); m_ipuGraph.setTileMapping(marknumSpriteBuf, 0); auto marknumSpriteBufStream = m_ipuGraph.addHostToDeviceFIFO("marknumSpriteBuf-stream", poplar::UNSIGNED_CHAR, IPUAMMARKBUFSIZE); poplar::ComputeSet IPU_Setup_UnpackMarknumSprites_CS = m_ipuGraph.addComputeSet("IPU_Setup_UnpackMarknumSprites_CS"); - vtx = m_ipuGraph.addVertex(IPU_Setup_UnpackMarknumSprites_CS, "IPU_Setup_UnpackMarknumSprites_Vertex", - {{"buf", marknumSpriteBuf}, {"frame", ipuFrame}}); - m_ipuGraph.setTileMapping(vtx, 0); - m_ipuGraph.setPerfEstimate(vtx, IPUAMMARKBUFSIZE * 100); + for (int renderTile = 0; renderTile < IPUNUMRENDERTILES; ++renderTile) { + vtx = m_ipuGraph.addVertex(IPU_Setup_UnpackMarknumSprites_CS, "IPU_Setup_UnpackMarknumSprites_Vertex", + {{"buf", marknumSpriteBuf}}); + m_ipuGraph.setTileMapping(vtx, renderTile + IPUFIRSTRENDERTILE); + m_ipuGraph.setPerfEstimate(vtx, IPUAMMARKBUFSIZE * 100); + } - poplar::program::Sequence IPU_Setup_prog({ + poplar::program::Sequence IPU_Init_Prog({ + poplar::program::Execute(IPU_Init_CS), poplar::program::Copy(marknumSpriteBufStream, marknumSpriteBuf), poplar::program::Execute(IPU_Setup_UnpackMarknumSprites_CS), - GetPrintbuf_prog, }); // -------- R_RenderPlayerView_CS ------ // poplar::ComputeSet R_RenderPlayerView_CS = m_ipuGraph.addComputeSet("R_RenderPlayerView_CS"); - vtx = m_ipuGraph.addVertex(R_RenderPlayerView_CS, "R_RenderPlayerView_Vertex", - {{"frame", ipuFrame}, {"miscValues", m_miscValuesBuf}}); - m_ipuGraph.setTileMapping(vtx, 0); - m_ipuGraph.setPerfEstimate(vtx, 10000000); + for (int renderTile = 0; renderTile < IPUNUMRENDERTILES; ++renderTile) { + vtx = m_ipuGraph.addVertex(R_RenderPlayerView_CS, "R_RenderPlayerView_Vertex", + {{"frame", ipuFrameSlices[renderTile]}, {"miscValues", m_miscValuesBuf}}); + m_ipuGraph.setTileMapping(vtx, renderTile + IPUFIRSTRENDERTILE); + m_ipuGraph.setPerfEstimate(vtx, 10000000); + } poplar::program::Sequence R_RenderPlayerView_prog({ poplar::program::Copy(miscValuesStream, m_miscValuesBuf), @@ -215,14 +223,15 @@ void IpuDoom::buildIpuGraph() { // -------- R_ExecuteSetViewSize_CS ------ // poplar::ComputeSet R_ExecuteSetViewSize_CS = m_ipuGraph.addComputeSet("R_ExecuteSetViewSize_CS"); - vtx = m_ipuGraph.addVertex(R_ExecuteSetViewSize_CS, "R_ExecuteSetViewSize_Vertex", {{"miscValues", m_miscValuesBuf}}); - m_ipuGraph.setTileMapping(vtx, 0); - m_ipuGraph.setPerfEstimate(vtx, 100); + for (int renderTile = 0; renderTile < IPUNUMRENDERTILES; ++renderTile) { + vtx = m_ipuGraph.addVertex(R_ExecuteSetViewSize_CS, "R_ExecuteSetViewSize_Vertex", {{"miscValues", m_miscValuesBuf}}); + m_ipuGraph.setTileMapping(vtx, renderTile + IPUFIRSTRENDERTILE); + m_ipuGraph.setPerfEstimate(vtx, 100); + } poplar::program::Sequence R_ExecuteSetViewSize_prog({ poplar::program::Copy(miscValuesStream, m_miscValuesBuf), poplar::program::Execute(R_ExecuteSetViewSize_CS), - GetPrintbuf_prog // Remove? }); // ---------------- Final prog --------------// @@ -230,7 +239,7 @@ void IpuDoom::buildIpuGraph() { printf("Creating engine...\n"); m_ipuEngine = std::make_unique(std::move(poplar::Engine( m_ipuGraph, { - IPU_Setup_prog, + IPU_Init_Prog, G_DoLoadLevel_prog, G_Ticker_prog, G_Responder_prog, @@ -244,10 +253,6 @@ void IpuDoom::buildIpuGraph() { m_ipuEngine->connectStream("miscValues-stream", m_miscValuesBuf_h); m_ipuEngine->connectStream("lumpNum-stream", &m_lumpNum_h); - m_ipuEngine->connectStreamToCallback("printbuf-stream", [](void* p) { - if (((char*)p)[0] == '\0') return; - printf("[IPU] %.*s\n", IPUPRINTBUFSIZE, (char*)p); - }); m_ipuEngine->connectStreamToCallback("lumpBuf-stream", [this](void* p) { IPU_LoadLumpForTransfer(m_lumpNum_h, (byte*) p); }); diff --git a/src/r_data.c b/src/r_data.c index 822c1d8..e82c74d 100644 --- a/src/r_data.c +++ b/src/r_data.c @@ -467,7 +467,7 @@ void R_InitTextures(void) { Z_Malloc(numtextures * sizeof(*texturecompositesize), PU_STATIC, 0); texturewidthmask = Z_Malloc(numtextures * sizeof(*texturewidthmask), PU_STATIC, 0); - textureheight = Z_Malloc(numtextures * sizeof(*textureheight), PU_STATIC, 0); + textureheight = Z_Malloc(numtextures * sizeof(*textureheight), PU_STATIC, 0); totalwidth = 0;