diff --git a/NOTES.md b/NOTES.md index 36c1745..09636b0 100644 --- a/NOTES.md +++ b/NOTES.md @@ -16,7 +16,9 @@ ### Places could save memory - Transcendental lookups -> live calc - - Move scalelight table from render tiles to texture tiles, include light level in column request. Saves 8K + - `openings` (used for masked objects) is 40K, surely can be smaller on striped render tiles + - cut down on width of `visplane.top/bottom`, `columnofs`, etc due to samller screen widths + - remove cachedheight, cacheddistance, cachedxstep, cachedystep? ### Things I don't support, to make my life easier: diff --git a/README.md b/README.md index 75bdbd0..32ebc8b 100644 --- a/README.md +++ b/README.md @@ -35,15 +35,19 @@ Activity Log: - [x] Split rendering across 32 render tiles. Reformat textures into a big buffer that can be striped over dedicated texture tiles, and accessed by the render tiles using JIT-patched exchange programs to enable fetches based on dynamic indices. So now IPU can texture walls. -![Gameplay with textured walls (but nothing else)](README_imgs/WallsTextured_noCPU.gif) +![Gameplay with textured walls (but nothing else)](README_imgs/WallsTextured_noCPU.gif)![Gameplay showing rendering striped across 32 tiles](README_imgs/WallsTileGrey_noCPU.gif ) - [x] Implement lighting model (add shadows to walls): texture tiles translate the colours during texture column fetch requests to save memory on the render tiles. ![Side-by-side of room with and without shadows](README_imgs/WallsLighting.PNG) +- [x] Port the visplane system to the render tiles, so IPU can render floors and ceilings, (untextured for now, instead coloured to show the visplane subdivision). Also implement skybox. + +![Gameplay with visplanes and skybox visible](README_imgs/VisplanesSkybox_noCPU.gif) + Immediate next steps: +- [ ] Port 'flats' and zlight so IPU can texture and light floors + ceilings. - [ ] Implement system to notify IPU of map state changes, so that doors open and close properly. -- [ ] Port visplane system to get IPU rendering floors and ceilings Longer term next steps: diff --git a/README_imgs/VisplanesSkybox_noCPU.gif b/README_imgs/VisplanesSkybox_noCPU.gif new file mode 100755 index 0000000..0ce9aff Binary files /dev/null and b/README_imgs/VisplanesSkybox_noCPU.gif differ diff --git a/README_imgs/WallsTileGrey_noCPU.gif b/README_imgs/WallsTileGrey_noCPU.gif new file mode 100755 index 0000000..7ecd250 Binary files /dev/null and b/README_imgs/WallsTileGrey_noCPU.gif differ diff --git a/src/ipu/ipu_interface.h b/src/ipu/ipu_interface.h index ee506f1..8b7f2a3 100644 --- a/src/ipu/ipu_interface.h +++ b/src/ipu/ipu_interface.h @@ -25,17 +25,18 @@ extern "C" { #define IPUNUMRENDERTILES (32) #define IPUCOLSPERRENDERTILE (SCREENWIDTH / IPUNUMRENDERTILES) -#define IPUTEXTURETILESPERRENDERTILE (4) +#define IPUTEXTURETILESPERRENDERTILE (5) #define IPUTEXTURETILEBUFSIZE (1024 * 400) #define IPUFIRSTTEXTURETILE (IPUFIRSTRENDERTILE + IPUNUMRENDERTILES) #define IPUNUMTEXTURETILES (IPUTEXTURETILESPERRENDERTILE * IPUNUMRENDERTILES) #define IPUNUMTEXTURECACHELINES (1) #define IPUTEXTURECACHELINESIZE (128 / sizeof(int)) -#define IPUMAXNUMTEXTURES (130) +#define IPUMAXNUMTEXTURES (200) #define IPUCOMMSBUFSIZE (IPUNUMRENDERTILES) #define COLOURMAPSIZE (8704) +// Flags for special cases of the lump-loading mechanism #define IPULUMPBUFFLAG_FLATPICS (-100) diff --git a/src/ipu/ipu_texturetiles.cpp b/src/ipu/ipu_texturetiles.cpp index 4331a41..7cef2a9 100644 --- a/src/ipu/ipu_texturetiles.cpp +++ b/src/ipu/ipu_texturetiles.cpp @@ -13,6 +13,9 @@ #include "../../xcom.hpp" +#define IS_SPAN (0x800000) + + // Remeber! Copies of these vars exits independently on each tile unsigned* tileLocalProgBuf; unsigned* tileLocalCommsBuf; @@ -43,7 +46,7 @@ void IPU_R_InitTextureTile(unsigned* progBuf, int progBufSize, const pixel_t* co XCOMAssembler assembler; int sendCycle = XCOM_WORSTSENDDELAY; int muxCycle = sendCycle + XCOM_TimeToMux(renderTile, __builtin_ipu_get_tile_id()); - int messageSize = sizeof(IPUColRequest_t) / sizeof(int); + int messageSize = sizeof(IPUTextureRequest_t) / sizeof(int); assembler.addRecv(0, messageSize, renderTile, muxCycle); progHead = assembler.assemble(progHead, progEnd - progHead); progHead++; // This program returns control flow, so don't override the `br $m10` @@ -107,10 +110,10 @@ void IPU_R_FulfilColumnRequest(unsigned* progBuf, unsigned char* textureBuf, uns XCOM_Execute(recvProgram, NULL, textureBuf); // Unpack received data - unsigned textureNum = ((IPUColRequest_t*) textureBuf)->texture; - unsigned columnOffset = ((IPUColRequest_t*) textureBuf)->columnOffset; - unsigned lightNum = ((IPUColRequest_t*) textureBuf)->lightNum; - unsigned lightScale = ((IPUColRequest_t*) textureBuf)->lightScale; + unsigned textureNum = ((IPUTextureRequest_t*) textureBuf)->texture; + unsigned lightNum = ((IPUTextureRequest_t*) textureBuf)->colRequest.lightNum; + unsigned columnOffset = ((IPUTextureRequest_t*) textureBuf)->colRequest.columnOffset; + unsigned lightScale = ((IPUTextureRequest_t*) textureBuf)->colRequest.lightScale; // TMP: need seperate send buffer for lightscaled output to be sent from, currently use start of textureBuf pixel_t* sendBuf = &textureBuf[IPUTEXTURETILEBUFSIZE - (IPUTEXTURECACHELINESIZE * sizeof(int))]; @@ -158,7 +161,7 @@ void IPU_R_InitColumnRequester(unsigned* progBuf, int progBufSize) { // First Program: performs the column request and sends flags to aggrTile progBuf[0] = progHead - progBuf; - int messageSize = sizeof(IPUColRequest_t) / sizeof(int); + int messageSize = sizeof(IPUTextureRequest_t) / sizeof(int); assembler.addSend(0, messageSize, XCOM_BROADCAST, XCOM_WORSTSENDDELAY); // Do the first step of the `done` flag aggregation if (__builtin_ipu_get_tile_id() == aggrTile) { @@ -174,7 +177,7 @@ void IPU_R_InitColumnRequester(unsigned* progBuf, int progBufSize) { int direction = XCOM_EastOrWest(__builtin_ipu_get_tile_id(), aggrTile); int muxCycle = XCOM_WORSTRECVDELAY + XCOM_WORSTRECVDELAY + myTimeSlot; int sendCycle = muxCycle - XCOM_TimeToMux(__builtin_ipu_get_tile_id(), aggrTile); - unsigned* addr = (unsigned*) sizeof(IPUColRequest_t); + unsigned* addr = (unsigned*) sizeof(IPUTextureRequest_t); assembler.addSend(addr, 1, direction, sendCycle); } progHead = assembler.assemble(progHead, progEnd - progHead); @@ -210,20 +213,16 @@ void IPU_R_InitColumnRequester(unsigned* progBuf, int progBufSize) { } -extern "C" +static __SUPER__ -byte* IPU_R_RequestColumn(int texture, int column) { +byte* renderTileExchange(int textureSourceTile) { // progBuff starts with a program directory auto requestProg = &tileLocalProgBuf[tileLocalProgBuf[0]]; auto receiveProg = &tileLocalProgBuf[tileLocalProgBuf[1]]; auto aggregateProg = &tileLocalProgBuf[tileLocalProgBuf[2]]; - // Populate buffer with data to be exchanged - IPUColRequest_t *request = (IPUColRequest_t*) tileLocalTextureBuf; - request->texture = texture; - request->columnOffset = (column & texturewidthmask[texture]) * (textureheight[texture] >> FRACBITS); - request->lightNum = lightnum; - request->lightScale = walllightindex; + // Request will be populated by the caller, but we follow it up by setting the 'done' flag to 0 + IPUTextureRequest_t *request = (IPUTextureRequest_t*) tileLocalTextureBuf; *((unsigned*) &request[1]) = 0; // The Done Flag XCOM_Execute( @@ -237,14 +236,6 @@ byte* IPU_R_RequestColumn(int texture, int column) { tileLocalCommsBuf[0] = 0; } - int textureSourceTile; - for (textureSourceTile = 0; textureSourceTile < IPUTEXTURETILESPERRENDERTILE; ++textureSourceTile) { - if (texture >= tileLocalTextureRange[textureSourceTile] && - texture < tileLocalTextureRange[textureSourceTile + 1]) { - break; - } - } - XCOM_PatchMuxAndExecute( receiveProg, // Prog NULL, // Read offset @@ -262,6 +253,54 @@ byte* IPU_R_RequestColumn(int texture, int column) { return (byte*) tileLocalTextureBuf; } + +extern "C" +__SUPER__ +byte* IPU_R_RequestColumn(int texture, int column) { + + // Populate buffer with data to be exchanged + IPUTextureRequest_t *request = (IPUTextureRequest_t*) tileLocalTextureBuf; + request->texture = texture; + request->colRequest.columnOffset = (column & texturewidthmask[texture]) * (textureheight[texture] >> FRACBITS); + request->colRequest.lightNum = lightnum; + request->colRequest.lightScale = walllightindex; + + // Figure out which texture tile is going to respond + int textureSourceTile; + for (textureSourceTile = 0; textureSourceTile < IPUTEXTURETILESPERRENDERTILE; ++textureSourceTile) { + if (texture >= tileLocalTextureRange[textureSourceTile] && + texture < tileLocalTextureRange[textureSourceTile + 1]) { + break; + } + } + + return renderTileExchange(textureSourceTile); +} + + +// extern "C" +// __SUPER__ +// byte* IPU_R_RequestSpan() { + +// // Populate buffer with data to be exchanged +// IPUTextureRequest_t *request = (IPUTextureRequest_t*) tileLocalTextureBuf; +// request->texture = texture | IS_SPAN; // Set the request type to SPAN instead of COLUMN +// // request->spanRequest. = (column & texturewidthmask[texture]) * (textureheight[texture] >> FRACBITS); +// // request->colRequest.lightNum = lightnum; +// // request->colRequest.lightScale = walllightindex; + +// // Figure out which texture tile is going to respond +// // int textureSourceTile; +// // for (textureSourceTile = 0; textureSourceTile < IPUTEXTURETILESPERRENDERTILE; ++textureSourceTile) { +// // if (texture >= tileLocalTextureRange[textureSourceTile] && +// // texture < tileLocalTextureRange[textureSourceTile + 1]) { +// // break; +// // } +// // } + +// return renderTileExchange(textureSourceTile); +// } + extern "C" __SUPER__ void IPU_R_RenderTileDone() { @@ -272,9 +311,8 @@ void IPU_R_RenderTileDone() { while (1) { // Populate buffer with data to be exchanged - IPUColRequest_t *request = (IPUColRequest_t*) tileLocalTextureBuf; + IPUTextureRequest_t *request = (IPUTextureRequest_t*) tileLocalTextureBuf; request->texture = 0xffffffff; - request->columnOffset = 0xffffffff; *((unsigned*) &request[1]) = 1; // The `done` Flag XCOM_Execute( diff --git a/src/ipu/ipu_texturetiles.h b/src/ipu/ipu_texturetiles.h index 67e4682..7f3fdee 100644 --- a/src/ipu/ipu_texturetiles.h +++ b/src/ipu/ipu_texturetiles.h @@ -11,9 +11,22 @@ extern "C" { #include "ipu_utils.h" +struct IPUColRequest_t { + unsigned columnOffset, lightNum, lightScale; +}; +struct IPUSpanRequest_t { + unsigned position, step; + short ds_x1, ds_x2; + unsigned char y, light; +}; + typedef struct { - unsigned texture, columnOffset, lightNum, lightScale; -} IPUColRequest_t; + unsigned texture; + union { + struct IPUColRequest_t colRequest; + struct IPUSpanRequest_t spanRequest; + }; +} IPUTextureRequest_t; extern unsigned* tileLocalProgBuf; diff --git a/src/ipu/r_draw.c b/src/ipu/r_draw.c index 2272a1c..2c635f7 100644 --- a/src/ipu/r_draw.c +++ b/src/ipu/r_draw.c @@ -526,10 +526,9 @@ int dscount; // // Draws the actual span. +__SUPER__ void R_DrawSpan(void) { - - /* LATER - unsigned int position, step; + // unsigned int position, step; pixel_t *dest; int count; int spot; @@ -537,25 +536,29 @@ void R_DrawSpan(void) { if (ds_x2 < ds_x1 || ds_x1 < 0 || ds_x2 >= SCREENWIDTH || (unsigned)ds_y > SCREENHEIGHT) { - I_Error("R_DrawSpan: %i to %i at %i", ds_x1, ds_x2, ds_y); + printf("ERROR: R_DrawSpan: %d to %d at %d\n", ds_x1, ds_x2, ds_y); + return; } - // dscount++; + // dscount++; // NOT JOSEF :) // Pack position and step variables into a single 32-bit integer, // with x in the top 16 bits and y in the bottom 16 bits. For // each 16-bit part, the top 6 bits are the integer part and the // bottom 10 bits are the fractional part of the pixel position. - position = ((ds_xfrac << 10) & 0xffff0000) | ((ds_yfrac >> 6) & 0x0000ffff); - step = ((ds_xstep << 10) & 0xffff0000) | ((ds_ystep >> 6) & 0x0000ffff); + // LATER + // position = ((ds_xfrac << 10) & 0xffff0000) | ((ds_yfrac >> 6) & 0x0000ffff); + // step = ((ds_xstep << 10) & 0xffff0000) | ((ds_ystep >> 6) & 0x0000ffff); dest = ylookup[ds_y] + columnofs[ds_x1]; // We do not check for zero spans here? count = ds_x2 - ds_x1; + do { // Calculate current texture index in u,v. + /* LATER ytemp = (position >> 4) & 0x0fc0; xtemp = (position >> 26); spot = xtemp | ytemp; @@ -565,9 +568,12 @@ void R_DrawSpan(void) { *dest++ = ds_colormap[ds_source[spot]]; position += step; + */ + + *dest++ = 20; } while (count--); - */ + } /* diff --git a/src/ipu/r_main.c b/src/ipu/r_main.c index 2040511..1c40f0f 100644 --- a/src/ipu/r_main.c +++ b/src/ipu/r_main.c @@ -104,7 +104,7 @@ angle_t xtoviewangle[SCREENWIDTH + 1]; // lighttable_t *scalelight[LIGHTLEVELS][MAXLIGHTSCALE];// JOSEF: This lives on texture tile instead // lighttable_t *scalelightfixed[MAXLIGHTSCALE]; -// lighttable_t *zlight[LIGHTLEVELS][MAXLIGHTZ]; // JOSEF: TODO (on texture/flats tile?) +lighttable_t *zlight[LIGHTLEVELS][MAXLIGHTZ]; // JOSEF: TODO (on texture/flats tile?) // bumped light from gun blasts int extralight; diff --git a/src/ipu/r_plane.c b/src/ipu/r_plane.c index cad5d31..1d7477a 100644 --- a/src/ipu/r_plane.c +++ b/src/ipu/r_plane.c @@ -61,7 +61,7 @@ visplane_t *floorplane; visplane_t *ceilingplane; // ? -#define MAXOPENINGS SCREENWIDTH * 64 +#define MAXOPENINGS SCREENWIDTH * 64 // JOSEF: Surely this can be smaller on IPU short openings[MAXOPENINGS]; short *lastopening; @@ -169,6 +169,7 @@ void R_MapPlane(int y, int x1, int x2) { // high or low detail // spanfunc(); // LATER + // R_DrawSpan(); } // @@ -399,8 +400,8 @@ void R_DrawPlanes(void) { pl->bottom[x]); // JOSEF: TMP solid colour visualisation - // pixel_t colour = (140 + (pl - visplanes) * 2) % 256; - pixel_t colour = (pl->picnum * 17 + 209) % 256; + pixel_t colour = (140 + (pl - visplanes) * 2) % 256; + // pixel_t colour = (pl->picnum * 17 + 209) % 256; for (int y = pl->top[x]; y <= pl->bottom[x]; y++) { pixel_t* dest = (I_VideoBuffer + (y + viewwindowy) * IPUCOLSPERRENDERTILE) + (viewwindowx + x - tileLeftClip); *dest = colour; diff --git a/src/r_data.c b/src/r_data.c index 4c076c9..bfad472 100644 --- a/src/r_data.c +++ b/src/r_data.c @@ -359,13 +359,14 @@ int ipuTextureBlobRanges[IPUTEXTURETILESPERRENDERTILE + 1]; void GenerateIPUTextureBlob(void) { - if (numtextures >= IPUMAXNUMTEXTURES) { - I_Error("GenerateIPUTextureBlob: numtextures >= IPUMAXNUMTEXTURES"); + if (numtextures + numflats >= IPUMAXNUMTEXTURES) { + I_Error("GenerateIPUTextureBlob: numtextures + numflats >= IPUMAXNUMTEXTURES"); } ipuTextureBlob = malloc(IPUTEXTURETILEBUFSIZE * IPUTEXTURETILESPERRENDERTILE); ipuTextureBlobRanges[0] = 0; int tile = 0, pos = 0; + // First, pack up all wall textures into the blob for (int t = 0; t < numtextures; ++t) { int tex_width = textures[t]->width; int tex_height = textures[t]->height; @@ -387,7 +388,33 @@ void GenerateIPUTextureBlob(void) { } pos += tex_size; } - ipuTextureBlobRanges[tile + 1] = numtextures; + + // Next, pack in all the flats + for (int i = 0, t = numtextures; i < numflats; ++i, ++t) { + int lump_num = firstflat + i; + int lump_size = W_LumpLength(lump_num); + + + if (pos + lump_size >= IPUTEXTURETILEBUFSIZE) { + pos = 0; + tile += 1; + if (tile > IPUTEXTURETILESPERRENDERTILE) { + I_Error("GenerateIPUTextureDump: insufficient texture tiles"); + } + ipuTextureBlobRanges[tile] = t; + } + + ipuTextureBlobOffsets[t] = pos; + byte* flat = W_CacheLumpNum(lump_num, PU_STATIC); + byte* dst = &ipuTextureBlob[tile * IPUTEXTURETILEBUFSIZE + pos]; + memcpy(dst, flat, lump_size); + pos += lump_size; + W_ReleaseLumpNum(lump_num); + } + + // Set the upper texture index bound on the final tile + // (by setting the lower bound on a fake subsequent tile) + ipuTextureBlobRanges[tile + 1] = numtextures + numflats; } // @@ -627,8 +654,6 @@ void R_InitTextures(void) { texturetranslation[i] = i; GenerateTextureHashTable(); - - GenerateIPUTextureBlob(); } @@ -707,6 +732,8 @@ void R_InitData(void) { R_InitSpriteLumps(); printf("."); R_InitColormaps(); + + GenerateIPUTextureBlob(); // JOSEF } // @@ -810,6 +837,7 @@ void R_PrecacheLevel(void) { if (flatpresent[i]) { lump = firstflat + i; flatmemory += lumpinfo[lump]->size; + printf("Flat size = %d\n", lumpinfo[lump]->size); W_CacheLumpNum(lump, PU_CACHE); } } diff --git a/src/r_main.c b/src/r_main.c index 3538301..2cec8b2 100644 --- a/src/r_main.c +++ b/src/r_main.c @@ -747,8 +747,7 @@ void R_RenderPlayerView(player_t *player) { // Check for new console commands. NetUpdate(); - R_DrawPlanes(); - IPU_R_RenderPlayerView(); + // R_DrawPlanes(); // Check for new console commands. NetUpdate(); @@ -757,4 +756,5 @@ void R_RenderPlayerView(player_t *player) { // Check for new console commands. NetUpdate(); + IPU_R_RenderPlayerView(); } diff --git a/src/r_plane.c b/src/r_plane.c index 5c0626a..255e0a5 100644 --- a/src/r_plane.c +++ b/src/r_plane.c @@ -295,6 +295,10 @@ void R_MakeSpans(int x, int t1, int b1, int t2, int b2) { } } +// TMP !!!! DELETE ME +byte* R_GetColumn_LikeIPU(int tex, int col); +extern int numtextures; + // // R_DrawPlanes // At the end of each frame.