GPU: Query UnrestrictedBufferTextureCopyPitchSupported to avoid D3D12 realignment copies

This commit is contained in:
Caleb Cornett
2026-01-31 14:50:11 -05:00
committed by Ethan Lee
parent 4df13e8806
commit 742a6fd092
2 changed files with 52 additions and 22 deletions

View File

@@ -1398,11 +1398,12 @@ typedef struct SDL_GPUViewport
* SDL_DownloadFromGPUTexture are used as default values respectively and data * SDL_DownloadFromGPUTexture are used as default values respectively and data
* is considered to be tightly packed. * is considered to be tightly packed.
* *
* **WARNING**: Direct3D 12 requires texture data row pitch to be 256 byte * **WARNING**: On some older/integrated hardware, Direct3D 12 requires texture
* aligned, and offsets to be aligned to 512 bytes. If they are not, SDL will * data row pitch to be 256 byte aligned, and offsets to be aligned to 512 bytes.
* make a temporary copy of the data that is properly aligned, but this adds * If they are not, SDL will make a temporary copy of the data that is properly
* overhead to the transfer process. Apps can avoid this by aligning their * aligned, but this adds overhead to the transfer process. Apps can avoid this
* data appropriately, or using a different GPU backend than Direct3D 12. * by aligning their data appropriately, or using a different GPU backend than
* Direct3D 12.
* *
* \since This struct is available since SDL 3.2.0. * \since This struct is available since SDL 3.2.0.
* *

View File

@@ -935,6 +935,7 @@ struct D3D12Renderer
bool debug_mode; bool debug_mode;
bool GPUUploadHeapSupported; bool GPUUploadHeapSupported;
bool UnrestrictedBufferTextureCopyPitchSupported;
// FIXME: these might not be necessary since we're not using custom heaps // FIXME: these might not be necessary since we're not using custom heaps
bool UMA; bool UMA;
bool UMACacheCoherent; bool UMACacheCoherent;
@@ -5965,6 +5966,7 @@ static void D3D12_UploadToTexture(
bool cycle) bool cycle)
{ {
D3D12CommandBuffer *d3d12CommandBuffer = (D3D12CommandBuffer *)commandBuffer; D3D12CommandBuffer *d3d12CommandBuffer = (D3D12CommandBuffer *)commandBuffer;
D3D12Renderer *renderer = (D3D12Renderer *)d3d12CommandBuffer->renderer;
D3D12BufferContainer *transferBufferContainer = (D3D12BufferContainer *)source->transfer_buffer; D3D12BufferContainer *transferBufferContainer = (D3D12BufferContainer *)source->transfer_buffer;
D3D12Buffer *temporaryBuffer = NULL; D3D12Buffer *temporaryBuffer = NULL;
D3D12_TEXTURE_COPY_LOCATION sourceLocation; D3D12_TEXTURE_COPY_LOCATION sourceLocation;
@@ -5992,11 +5994,12 @@ static void D3D12_UploadToTexture(
cycle, cycle,
D3D12_RESOURCE_STATE_COPY_DEST); D3D12_RESOURCE_STATE_COPY_DEST);
/* D3D12 requires texture data row pitch to be 256 byte aligned, which is obviously insane. /* Unless the UnrestrictedBufferTextureCopyPitchSupported feature is supported, D3D12 requires
* Instead of exposing that restriction to the client, which is a huge rake to step on, * texture data row pitch to be 256 byte aligned, which is obviously insane. Instead of exposing
* and a restriction that no other backend requires, we're going to copy data to a temporary buffer, * that restriction to the client, which is a huge rake to step on, and a restriction that no
* copy THAT data to the texture, and then get rid of the temporary buffer ASAP. * other backend requires, we're going to copy data to a temporary buffer, copy THAT data to the
* If we're lucky and the row pitch and depth pitch are already aligned, we can skip all of that. * texture, and then get rid of the temporary buffer ASAP. If we're lucky and the row pitch and
* depth pitch are already aligned, we can skip all of that.
* *
* D3D12 also requires offsets to be 512 byte aligned. We'll fix that for the client and warn them as well. * D3D12 also requires offsets to be 512 byte aligned. We'll fix that for the client and warn them as well.
* *
@@ -6018,10 +6021,16 @@ static void D3D12_UploadToTexture(
bytesPerSlice = rowsPerSlice * rowPitch; bytesPerSlice = rowsPerSlice * rowPitch;
alignedRowPitch = (destination->w + (blockWidth - 1)) / blockWidth * blockSize; if (renderer->UnrestrictedBufferTextureCopyPitchSupported) {
alignedRowPitch = D3D12_INTERNAL_Align(alignedRowPitch, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); alignedRowPitch = rowPitch;
needsRealignment = rowsPerSlice != destination->h || rowPitch != alignedRowPitch; needsRealignment = false;
needsPlacementCopy = source->offset % D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT != 0; needsPlacementCopy = false;
} else {
alignedRowPitch = (destination->w + (blockWidth - 1)) / blockWidth * blockSize;
alignedRowPitch = D3D12_INTERNAL_Align(alignedRowPitch, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT);
needsRealignment = rowsPerSlice != destination->h || rowPitch != alignedRowPitch;
needsPlacementCopy = source->offset % D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT != 0;
}
alignedBytesPerSlice = alignedRowPitch * destination->h; alignedBytesPerSlice = alignedRowPitch * destination->h;
@@ -6300,6 +6309,7 @@ static void D3D12_DownloadFromTexture(
const SDL_GPUTextureTransferInfo *destination) const SDL_GPUTextureTransferInfo *destination)
{ {
D3D12CommandBuffer *d3d12CommandBuffer = (D3D12CommandBuffer *)commandBuffer; D3D12CommandBuffer *d3d12CommandBuffer = (D3D12CommandBuffer *)commandBuffer;
D3D12Renderer *renderer = d3d12CommandBuffer->renderer;
D3D12_TEXTURE_COPY_LOCATION sourceLocation; D3D12_TEXTURE_COPY_LOCATION sourceLocation;
D3D12_TEXTURE_COPY_LOCATION destinationLocation; D3D12_TEXTURE_COPY_LOCATION destinationLocation;
Uint32 pixelsPerRow = destination->pixels_per_row; Uint32 pixelsPerRow = destination->pixels_per_row;
@@ -6317,11 +6327,12 @@ static void D3D12_DownloadFromTexture(
D3D12BufferContainer *destinationContainer = (D3D12BufferContainer *)destination->transfer_buffer; D3D12BufferContainer *destinationContainer = (D3D12BufferContainer *)destination->transfer_buffer;
D3D12Buffer *destinationBuffer = destinationContainer->activeBuffer; D3D12Buffer *destinationBuffer = destinationContainer->activeBuffer;
/* D3D12 requires texture data row pitch to be 256 byte aligned, which is obviously insane. /* Unless the UnrestrictedBufferTextureCopyPitchSupported feature is supported, D3D12 requires
* Instead of exposing that restriction to the client, which is a huge rake to step on, * texture data row pitch to be 256 byte aligned, which is obviously insane. Instead of exposing
* and a restriction that no other backend requires, we're going to copy data to a temporary buffer, * that restriction to the client, which is a huge rake to step on, and a restriction that no
* copy THAT data to the texture, and then get rid of the temporary buffer ASAP. * other backend requires, we're going to copy data to a temporary buffer, copy THAT data to the
* If we're lucky and the row pitch and depth pitch are already aligned, we can skip all of that. * texture, and then get rid of the temporary buffer ASAP. If we're lucky and the row pitch and
* depth pitch are already aligned, we can skip all of that.
* *
* D3D12 also requires offsets to be 512 byte aligned. We'll fix that for the client and warn them as well. * D3D12 also requires offsets to be 512 byte aligned. We'll fix that for the client and warn them as well.
* *
@@ -6341,9 +6352,15 @@ static void D3D12_DownloadFromTexture(
rowsPerSlice = source->h; rowsPerSlice = source->h;
} }
alignedRowPitch = D3D12_INTERNAL_Align(rowPitch, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); if (renderer->UnrestrictedBufferTextureCopyPitchSupported) {
needsRealignment = rowsPerSlice != source->h || rowPitch != alignedRowPitch; alignedRowPitch = rowPitch;
needsPlacementCopy = destination->offset % D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT != 0; needsRealignment = false;
needsPlacementCopy = false;
} else {
alignedRowPitch = D3D12_INTERNAL_Align(rowPitch, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT);
needsRealignment = rowsPerSlice != source->h || rowPitch != alignedRowPitch;
needsPlacementCopy = destination->offset % D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT != 0;
}
sourceLocation.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; sourceLocation.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
sourceLocation.SubresourceIndex = sourceSubresource->index; sourceLocation.SubresourceIndex = sourceSubresource->index;
@@ -9687,6 +9704,18 @@ static SDL_GPUDevice *D3D12_CreateDevice(bool debugMode, bool preferLowPower, SD
} }
#endif #endif
// Check for unrestricted texture-buffer copy pitch support
D3D12_FEATURE_DATA_D3D12_OPTIONS13 options13;
res = ID3D12Device_CheckFeatureSupport(
renderer->device,
D3D12_FEATURE_D3D12_OPTIONS13,
&options13,
sizeof(options13));
if (SUCCEEDED(res)) {
renderer->UnrestrictedBufferTextureCopyPitchSupported = options13.UnrestrictedBufferTextureCopyPitchSupported;
}
// Create command queue // Create command queue
#if (defined(SDL_PLATFORM_XBOXONE) || defined(SDL_PLATFORM_XBOXSERIES)) #if (defined(SDL_PLATFORM_XBOXONE) || defined(SDL_PLATFORM_XBOXSERIES))
if (s_CommandQueue != NULL) { if (s_CommandQueue != NULL) {