Created
October 30, 2018 10:48
-
-
Save exjam/ce2de9fcc1e98ad0e12251c12bad5234 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#pragma once | |
#include <cstdint> | |
#include <cstring> | |
namespace gpu7::tiler | |
{ | |
enum TileMode : uint32_t | |
{ | |
LinearGeneral = 0x0, | |
LinearAligned = 0x1, | |
Tiled1DThin1 = 0x2, | |
Tiled1DThick = 0x3, | |
Tiled2DThin1 = 0x4, | |
Tiled2DThin2 = 0x5, | |
Tiled2DThin4 = 0x6, | |
Tiled2DThick = 0x7, | |
Tiled2BThin1 = 0x8, | |
Tiled2BThin2 = 0x9, | |
Tiled2BThin4 = 0xA, | |
Tiled2BThick = 0xB, | |
Tiled3DThin1 = 0xC, | |
Tiled3DThick = 0xD, | |
Tiled3BThin1 = 0xE, | |
Tiled3BThick = 0xF, | |
LinearSpecial = 0x10, | |
}; | |
struct TiledSurface | |
{ | |
void *image; | |
uint32_t bpp; | |
uint32_t tileMode; | |
uint32_t swizzle; | |
uint32_t pitch; | |
uint32_t height; | |
uint32_t depth; | |
uint32_t numSamples; | |
bool isDepth; | |
uint32_t bankSwizzle; | |
uint32_t pipeSwizzle; | |
}; | |
namespace detail | |
{ | |
static constexpr auto MicroTileWidth = 8; | |
static constexpr auto MicroTileHeight = 8; | |
struct MicroTiler8 | |
{ | |
/* | |
8 bits per element: | |
0: 0, 1, 2, 3, 4, 5, 6, 7, | |
8: 16, 17, 18, 19, 20, 21, 22, 23, | |
16: 8, 9, 10, 11, 12, 13, 14, 15, | |
24: 24, 25, 26, 27, 28, 29, 30, 31, | |
32: 32, 33, 34, 35, 36, 37, 38, 39, | |
40: 48, 49, 50, 51, 52, 53, 54, 55, | |
48: 40, 41, 42, 43, 44, 45, 46, 47, | |
56: 56, 57, 58, 59, 60, 61, 62, 63, | |
*/ | |
void apply(uint8_t *src, unsigned srcStrideBytes, uint8_t *dst, unsigned dstStrideBytes) | |
{ | |
static constexpr auto rowSize = MicroTileWidth * sizeof(uint8_t); | |
auto dstRow = [&](int row) { return dst + row * dstStrideBytes; }; | |
auto srcRow = [&](int row) { return src + row * srcStrideBytes; }; | |
for (int y = 0; y < MicroTileHeight; y += 4) { | |
std::memcpy(dstRow(y + 0), srcRow(y + 0), rowSize); | |
std::memcpy(dstRow(y + 1), srcRow(y + 2), rowSize); | |
std::memcpy(dstRow(y + 2), srcRow(y + 1), rowSize); | |
std::memcpy(dstRow(y + 3), srcRow(y + 3), rowSize); | |
} | |
} | |
}; | |
struct MicroTiler16 | |
{ | |
/* | |
16 bits per element: | |
0: 0, 1, 2, 3, 4, 5, 6, 7, | |
8: 8, 9, 10, 11, 12, 13, 14, 15, | |
16: 16, 17, 18, 19, 20, 21, 22, 23, | |
24: 24, 25, 26, 27, 28, 29, 30, 31, | |
32: 32, 33, 34, 35, 36, 37, 38, 39, | |
40: 40, 41, 42, 43, 44, 45, 46, 47, | |
48: 48, 49, 50, 51, 52, 53, 54, 55, | |
56: 56, 57, 58, 59, 60, 61, 62, 63, | |
*/ | |
void apply(uint8_t *src, unsigned srcStrideBytes, uint8_t *dst, unsigned dstStrideBytes) | |
{ | |
static constexpr auto rowSize = MicroTileWidth * sizeof(uint16_t); | |
for (int y = 0; y < MicroTileHeight; ++y) { | |
std::memcpy(dst, src, rowSize); | |
src += srcStrideBytes; | |
dst += dstStrideBytes; | |
} | |
} | |
}; | |
struct MicroTiler32 | |
{ | |
/* | |
32 bits per element: | |
0: 0, 1, 2, 3, 8, 9, 10, 11, | |
8: 4, 5, 6, 7, 12, 13, 14, 15, | |
16: 16, 17, 18, 19, 24, 25, 26, 27, | |
24: 20, 21, 22, 23, 28, 29, 30, 31, | |
32: 32, 33, 34, 35, 40, 41, 42, 43, | |
40: 36, 37, 38, 39, 44, 45, 46, 47, | |
48: 48, 49, 50, 51, 56, 57, 58, 59, | |
56: 52, 53, 54, 55, 60, 61, 62, 63, | |
*/ | |
void apply(uint8_t *src, unsigned srcStrideBytes, uint8_t *dst, unsigned dstStrideBytes) | |
{ | |
static constexpr auto groupSize = 4 * sizeof(uint32_t); | |
auto srcElem = [&](int idx) { return src + (idx * 4) + (idx / 8) * srcStrideBytes; }; | |
auto dstElem = [&](int idx) { return dst + (idx * 4) + (idx / 8) * dstStrideBytes; }; | |
for (int y = 0; y < MicroTileHeight; y += 2) { | |
auto yElem = y * MicroTileWidth; | |
std::memcpy(dstElem(0 + yElem), srcElem(0 + yElem), groupSize); | |
std::memcpy(dstElem(8 + yElem), srcElem(4 + yElem), groupSize); | |
std::memcpy(dstElem(4 + yElem), srcElem(8 + yElem), groupSize); | |
std::memcpy(dstElem(12 + yElem), srcElem(12 + yElem), groupSize); | |
} | |
} | |
}; | |
struct MicroTiler64 | |
{ | |
/* | |
64 bits per element: | |
0: 0, 1, 4, 5, 8, 9, 12, 13, | |
8: 2, 3, 6, 7, 10, 11, 14, 15, | |
16: 16, 17, 20, 21, 24, 25, 28, 29, | |
24: 18, 19, 22, 23, 26, 27, 30, 31, | |
32: 32, 33, 36, 37, 40, 41, 44, 45, | |
40: 34, 35, 38, 39, 42, 43, 46, 47, | |
48: 48, 49, 52, 53, 56, 57, 60, 61, | |
56: 50, 51, 54, 55, 58, 59, 62, 63, | |
*/ | |
inline void apply(uint8_t *src, unsigned srcStrideBytes, uint8_t *dst, unsigned dstStrideBytes) | |
{ | |
static constexpr auto groupBytes = 2 * sizeof(uint64_t); | |
auto srcElem = [&](int idx) { return src + (idx * groupBytes) + (idx / MicroTileWidth) * srcStrideBytes; }; | |
auto dstElem = [&](int idx) { return dst + (idx * groupBytes) + (idx / MicroTileWidth) * dstStrideBytes; }; | |
for (int y = 0; y < MicroTileHeight; y += 2) { | |
for (int x = 0; x < MicroTileWidth; x += 2) { | |
auto idx = x + y * MicroTileWidth; | |
std::memcpy(dstElem(0 + idx), srcElem(0 + idx), groupBytes); | |
std::memcpy(dstElem(8 + idx), srcElem(2 + idx), groupBytes); | |
} | |
} | |
} | |
}; | |
struct MicroTiler128 | |
{ | |
/* | |
128 bits per element: | |
0: 0, 2, 4, 6, 8, 10, 12, 14, | |
8: 1, 3, 5, 7, 9, 11, 13, 15, | |
16: 16, 18, 20, 22, 24, 26, 28, 30, | |
24: 17, 19, 21, 23, 25, 27, 29, 31, | |
32: 32, 34, 36, 38, 40, 42, 44, 46, | |
40: 33, 35, 37, 39, 41, 43, 45, 47, | |
48: 48, 50, 52, 54, 56, 58, 60, 62, | |
56: 49, 51, 53, 55, 57, 59, 61, 63, | |
*/ | |
inline void apply(uint8_t *src, unsigned srcStrideBytes, uint8_t *dst, unsigned dstStrideBytes) | |
{ | |
static constexpr auto elemBytes = 8; | |
auto srcElem = [&](int idx) { return src + (idx * elemBytes) + (idx / MicroTileWidth) * srcStrideBytes; }; | |
auto dstElem = [&](int idx) { return dst + (idx * elemBytes) + (idx / MicroTileWidth) * dstStrideBytes; }; | |
for (int y = 0; y < MicroTileHeight; y += 2) { | |
for (int x = 0; x < MicroTileWidth; x += 2) { | |
auto idx = x + y * MicroTileWidth; | |
std::memcpy(dstElem(idx + 0), srcElem(idx + 0), elemBytes); | |
std::memcpy(dstElem(idx + 1), srcElem(idx + 2), elemBytes); | |
std::memcpy(dstElem(idx + 8), srcElem(idx + 1), elemBytes); | |
std::memcpy(dstElem(idx + 9), srcElem(idx + 3), elemBytes); | |
} | |
} | |
} | |
}; | |
struct MicroTilerDepth | |
{ | |
/* | |
depth elements: | |
0: 0, 1, 4, 5, 16, 17, 20, 21, | |
8: 2, 3, 6, 7, 18, 19, 22, 23, | |
16: 8, 9, 12, 13, 24, 25, 28, 29, | |
24: 10, 11, 14, 15, 26, 27, 30, 31, | |
32: 32, 33, 36, 37, 48, 49, 52, 53, | |
40: 34, 35, 38, 39, 50, 51, 54, 55, | |
48: 40, 41, 44, 45, 56, 57, 60, 61, | |
56: 42, 43, 46, 47, 58, 59, 62, 63, | |
*/ | |
inline void apply(uint8_t *src, unsigned srcStrideBytes, uint8_t *dst, unsigned dstStrideBytes, unsigned bpp) | |
{ | |
auto groupSize = 2 * bpp; | |
auto srcElem = [&](int idx) { return src + (idx * bpp) + (idx / 8) * srcStrideBytes; }; | |
auto dstElem = [&](int idx) { return dst + (idx * bpp) + (idx / 8) * dstStrideBytes; }; | |
for (int y = 0; y < MicroTileHeight; y += 4) { | |
for (int x = 0; x < MicroTileWidth; x += 4) { | |
auto idx = x * 4 + y * 4; | |
std::memcpy(dstElem(0 + idx), srcElem(0 + idx), groupSize); | |
std::memcpy(dstElem(2 + idx), srcElem(4 + idx), groupSize); | |
std::memcpy(dstElem(8 + idx), srcElem(2 + idx), groupSize); | |
std::memcpy(dstElem(10 + idx), srcElem(6 + idx), groupSize); | |
std::memcpy(dstElem(16 + idx), srcElem(8 + idx), groupSize); | |
std::memcpy(dstElem(18 + idx), srcElem(12 + idx), groupSize); | |
std::memcpy(dstElem(24 + idx), srcElem(10 + idx), groupSize); | |
std::memcpy(dstElem(26 + idx), srcElem(14 + idx), groupSize); | |
} | |
} | |
} | |
}; | |
template<typename MicroTiler> | |
void applyMicroTiler(TiledSurface &tiled, unsigned sliceOffset, unsigned microTileBytes) | |
{ | |
auto bytesPerPixel = tiled.bpp / 8; | |
auto microTilesPerRow = tiled.pitch / MicroTileWidth; | |
auto microTilesNumRows = tiled.height / MicroTileHeight; | |
auto microTileOffset = sliceOffset; | |
auto dstStrideBytes = tiled.pitch * bytesPerPixel; | |
for (auto microTileIndexY = 0; microTileIndexY < microTilesNumRows; ++microTileIndexY) { | |
for (auto microTileIndexX = 0; microTileIndexX < microTilesPerRow; ++microTileIndexX) { | |
auto pixelX = microTileIndexX * MicroTileWidth; | |
auto pixelY = microTileIndexY * MicroTileHeight; | |
auto dstOffset = (pixelX + pixelY * tiled.pitch) * bytesPerPixel; | |
MicroTiler::apply(static_cast<uint8_t *>(tiled.image) + microTileOffset, | |
MicroTileWidth * bytesPerPixel, | |
static_cast<uint8_t *>(dst) + dstOffset, | |
dstStrideBytes); | |
microTileOffset += microTileBytes; | |
} | |
} | |
} | |
bool untileMicroTiledSurface(TiledSurface &tiled, void *dst, int slice) | |
{ | |
auto bytesPerPixel = tiled.bpp / 8; | |
auto microTileThickness = (tiled.tileMode == TileMode::Tiled1DThick) ? 4 : 1; | |
auto microTileBytes = MicroTileWidth * MicroTileHeight * microTileThickness * bytesPerPixel; | |
// Calculate slice offset | |
auto microTileIndexZ = slice / microTileThickness; | |
auto sliceBytes = tiled.pitch * tiled.height * microTileThickness * bytesPerPixel; | |
auto sliceOffset = microTileIndexZ * sliceBytes; | |
if (tiled.isDepth) { | |
applyMicroTiler<MicroTilerDepth>(tiled, sliceOffset, microTileBytes); | |
return true; | |
} | |
switch (tiled.bpp) { | |
case 8: | |
applyMicroTiler<MicroTiler8>(tiled, sliceOffset, microTileBytes); | |
break; | |
case 16: | |
applyMicroTiler<MicroTiler16>(tiled, sliceOffset, microTileBytes); | |
break; | |
case 32: | |
applyMicroTiler<MicroTiler32>(tiled, sliceOffset, microTileBytes); | |
break; | |
case 64: | |
applyMicroTiler<MicroTiler64>(tiled, sliceOffset, microTileBytes); | |
break; | |
case 128: | |
applyMicroTiler<MicroTiler128>(tiled, sliceOffset, microTileBytes); | |
break; | |
default: | |
return false; | |
} | |
return true; | |
} | |
} // namespace detail | |
bool untile(TiledSurface &tiled, void *dst) | |
{ | |
switch (static_cast<TileMode>(tiled.tileMode)) { | |
case TileMode::LinearGeneral: | |
case TileMode::LinearAligned: | |
case TileMode::LinearSpecial: | |
// Already "untiled" | |
return true; | |
case TileMode::Tiled1DThin1: | |
return detail::untileMicroTiledSurface(tiled, dst, 0); | |
case TileMode::Tiled1DThick: | |
return detail::untileMicroTiledSurface(tiled, dst, 0); | |
default: | |
return false; | |
} | |
// Linear | |
// MicroTiled | |
// MacroTiled | |
/* | |
case ADDR_TM_2D_TILED_THIN1: | |
case ADDR_TM_2D_TILED_THIN2: | |
case ADDR_TM_2D_TILED_THIN4: | |
case ADDR_TM_2D_TILED_THICK: | |
case ADDR_TM_2B_TILED_THIN1: | |
case ADDR_TM_2B_TILED_THIN2: | |
case ADDR_TM_2B_TILED_THIN4: | |
case ADDR_TM_2B_TILED_THICK: | |
case ADDR_TM_3D_TILED_THIN1: | |
case ADDR_TM_3D_TILED_THICK: | |
case ADDR_TM_3B_TILED_THIN1: | |
case ADDR_TM_3B_TILED_THICK: | |
addr = ComputeSurfaceAddrFromCoordMacroTiled(pIn->x, | |
pIn->y, | |
pIn->slice, | |
pIn->sample, | |
pIn->bpp, | |
pIn->pitch, | |
pIn->height, | |
numSamples, | |
pIn->tileMode, | |
pIn->isDepth, | |
pIn->tileBase, | |
pIn->compBits, | |
pIn->pipeSwizzle, | |
pIn->bankSwizzle, | |
&pOut->bitPosition); | |
break; | |
default: | |
addr = 0; | |
} | |
return addr; | |
*/ | |
} | |
} // namespace gpu7::tiler |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment