Skip to content

Commit

Permalink
gpu: Add NonBindlessOOBTexelBufferPass
Browse files Browse the repository at this point in the history
  • Loading branch information
spencer-lunarg committed Aug 22, 2024
1 parent dac5088 commit b2ab14f
Show file tree
Hide file tree
Showing 21 changed files with 907 additions and 8 deletions.
4 changes: 4 additions & 0 deletions BUILD.gn
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,8 @@ vvl_sources = [
"layers/gpu/spirv/bindless_descriptor_pass.h",
"layers/gpu/spirv/non_bindless_oob_buffer_pass.cpp",
"layers/gpu/spirv/non_bindless_oob_buffer_pass.h",
"layers/gpu/spirv/non_bindless_oob_texel_buffer_pass.cpp",
"layers/gpu/spirv/non_bindless_oob_texel_buffer_pass.h",
"layers/gpu/spirv/buffer_device_address_pass.cpp",
"layers/gpu/spirv/buffer_device_address_pass.h",
"layers/gpu/spirv/function_basic_block.cpp",
Expand Down Expand Up @@ -356,6 +358,8 @@ vvl_sources = [
"layers/vulkan/generated/instrumentation_bindless_descriptor_comp.h",
"layers/vulkan/generated/instrumentation_non_bindless_oob_buffer_comp.cpp",
"layers/vulkan/generated/instrumentation_non_bindless_oob_buffer_comp.h",
"layers/vulkan/generated/instrumentation_non_bindless_oob_texel_buffer_comp.cpp",
"layers/vulkan/generated/instrumentation_non_bindless_oob_texel_buffer_comp.h",
"layers/vulkan/generated/instrumentation_buffer_device_address_comp.cpp",
"layers/vulkan/generated/instrumentation_buffer_device_address_comp.h",
"layers/vulkan/generated/instrumentation_ray_query_comp.cpp",
Expand Down
1 change: 1 addition & 0 deletions layers/gpu/instrumentation/gpu_shader_instrumentor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1204,6 +1204,7 @@ bool GpuShaderInstrumentor::InstrumentShader(const vvl::span<const uint32_t> &in
if (shader_instrumentation.bindless_descriptor) {
modified |= module.RunPassBindlessDescriptor();
modified |= module.RunPassNonBindlessOOBBuffer();
modified |= module.RunPassNonBindlessOOBTexelBuffer();
}

if (shader_instrumentation.buffer_device_address) {
Expand Down
26 changes: 26 additions & 0 deletions layers/gpu/instrumentation/gpuav_instrumentation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -364,6 +364,32 @@ bool LogMessageInstNonBindlessOOB(Validator &gpuav, const uint32_t *error_record
}
} break;

case kErrorSubCodeNonBindlessOOBTexelBufferArrays: {
const uint32_t desc_array_size = error_record[kInstNonBindlessOOBParamOffset0];
strm << " access out of bounds. The descriptor texel buffer array is " << desc_array_size
<< " large, but as accessed at index [" << desc_index << "]";
out_vuid_msg = "UNASSIGNED-Descriptor Texel Buffer index out of bounds";
} break;

case kErrorSubCodeNonBindlessOOBTexelBufferBounds: {
const auto *binding_state = descriptor_sets[set_num].state->GetBinding(binding_num);
const vvl::BufferView *buffer_view_state =
static_cast<const vvl::TexelBinding *>(binding_state)->descriptors[desc_index].GetBufferViewState();
assert(buffer_view_state);
const uint32_t byte_offset = error_record[kInstNonBindlessOOBParamOffset0];
const uint32_t resource_size = error_record[kInstNonBindlessOOBParamOffset1];

strm << " access out of bounds. The descriptor texel buffer (" << gpuav.FormatHandle(buffer_view_state->Handle())
<< ") size is " << resource_size << " texels and the highest out of bounds access was at [" << byte_offset
<< "] bytes";

if (binding_state->type == VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER) {
out_vuid_msg = uses_shader_object ? vuid.uniform_access_oob_08612 : vuid.uniform_access_oob_06935;
} else {
out_vuid_msg = uses_shader_object ? vuid.storage_access_oob_08613 : vuid.storage_access_oob_06936;
}
} break;

default:
error_found = false;
out_oob_access = false;
Expand Down
3 changes: 3 additions & 0 deletions layers/gpu/shaders/gpu_error_codes.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,9 @@ const int kErrorSubCodeBindlessDescriptorNullPointer = 5;
// Buffers
const int kErrorSubCodeNonBindlessOOBBufferArrays = 1;
const int kErrorSubCodeNonBindlessOOBBufferBounds = 2;
// Texel Buffers
const int kErrorSubCodeNonBindlessOOBTexelBufferArrays = 3;
const int kErrorSubCodeNonBindlessOOBTexelBufferBounds = 4;

// Buffer Device Address
//
Expand Down
138 changes: 138 additions & 0 deletions layers/gpu/shaders/instrumentation/non_bindless_oob_texel_buffer.comp
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
// Copyright (c) 2024 The Khronos Group Inc.
// Copyright (c) 2024 Valve Corporation
// Copyright (c) 2024 LunarG, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// NOTE: This file doesn't contain any entrypoints and should be compiled with then new --no-link option for glslang

#version 450
#extension GL_GOOGLE_include_directive : enable
#extension GL_EXT_buffer_reference : require
#extension GL_EXT_buffer_reference_uvec2 : require
#if defined(GL_ARB_gpu_shader_int64)
#extension GL_ARB_gpu_shader_int64 : require
#else
#error No extension available for 64-bit integers.
#endif

#include "gpu_error_header.h"
#include "gpu_shaders_constants.h"
#include "common_descriptor_sets.h"

layout(buffer_reference, buffer_reference_align = 8, std430) buffer DescriptorLayoutData {
// BindingLayout[0]
uint num_bindings;
uint pad; // always zero, used to keep things aligned

// BindingLayout[1] - BindingLayout[N]
// struct glsl::BindingLayout {
// x: count
// y: state_start
// }
uvec2 data[];
};

layout(buffer_reference, buffer_reference_align = 8, std430) buffer DescriptorSetInData {
// struct glsl::DescriptorState {
// x: id
// y: extra data depending on the descriptor type
// }
uvec2 data[];
};

layout(buffer_reference, buffer_reference_align = 8, std430) buffer GlobalState {
// Maps to DescriptorHeap and used to detect if descriptor is still valid on CPU
uint data[];
};

struct DescriptorSetRecord {
DescriptorLayoutData layout_data;
DescriptorSetInData in_data;
uvec2 out_data; // unused BDA pointer
};

layout(set = kInstDefaultDescriptorSet, binding = kBindingInstBindlessDescriptor, std430) buffer BindlessStateBuffer {
GlobalState global_state;
DescriptorSetRecord desc_sets[kDebugInputBindlessMaxDescSets];
} bindless_state_buffer;

// TODO - This currently the almost exact same code as inst_non_bindless_oob_buffer
// We do not want to waste time deciding which descriptor type it is inside the shader.
// While this is some code duplication, it keeps things seperated for non-bindlesss everywhere else
bool inst_non_bindless_oob_texel_buffer(const uint inst_num, const uvec4 stage_info, const uint desc_array_size,
const uint desc_set, const uint binding, const uint desc_index, const uint byte_offset) {
uint error = 0u;
uint param_0 = 0u;
uint param_1 = 0u;
do {
// For non-array this should hopefully optimized out as "if (0 > 1)"
if (desc_index > desc_array_size) {
error = kErrorSubCodeNonBindlessOOBTexelBufferArrays;
param_0 = desc_array_size;
break;
}

DescriptorLayoutData layout_data = bindless_state_buffer.desc_sets[desc_set].layout_data;
uvec2 binding_state = layout_data.data[binding];

DescriptorSetInData in_data = bindless_state_buffer.desc_sets[desc_set].in_data;

uint state_index = binding_state.y + desc_index;

// check that the offset is in bounds
uint resource_size = in_data.data[state_index].y;
if (byte_offset >= resource_size) {
error = kErrorSubCodeNonBindlessOOBTexelBufferBounds;
param_0 = byte_offset;
param_1 = resource_size;
break;
}
} while (false);

if (0u != error) {

const uint cmd_id = inst_cmd_resource_index_buffer.index[0];
const uint cmd_errors_count = atomicAdd(inst_cmd_errors_count_buffer.errors_count[cmd_id], 1);
const bool max_cmd_errors_count_reached = cmd_errors_count >= kMaxErrorsPerCmd;

if (max_cmd_errors_count_reached) return false;

uint write_pos = atomicAdd(inst_errors_buffer.written_count, kErrorRecordSize);
const bool errors_buffer_not_filled = (write_pos + kErrorRecordSize) <= uint(inst_errors_buffer.data.length());

if (errors_buffer_not_filled) {
inst_errors_buffer.data[write_pos + kHeaderErrorRecordSizeOffset] = kErrorRecordSize;
inst_errors_buffer.data[write_pos + kHeaderShaderIdOffset] = kLinkShaderId;
inst_errors_buffer.data[write_pos + kHeaderInstructionIdOffset] = inst_num;
inst_errors_buffer.data[write_pos + kHeaderStageIdOffset] = stage_info.x;
inst_errors_buffer.data[write_pos + kHeaderStageInfoOffset_0] = stage_info.y;
inst_errors_buffer.data[write_pos + kHeaderStageInfoOffset_1] = stage_info.z;
inst_errors_buffer.data[write_pos + kHeaderStageInfoOffset_2] = stage_info.w;

inst_errors_buffer.data[write_pos + kHeaderErrorGroupOffset] = kErrorGroupInstNonBindlessOOB;
inst_errors_buffer.data[write_pos + kHeaderErrorSubCodeOffset] = error;

inst_errors_buffer.data[write_pos + kHeaderActionIdOffset] = inst_action_index_buffer.index[0];
inst_errors_buffer.data[write_pos + kHeaderCommandResourceIdOffset] = inst_cmd_resource_index_buffer.index[0];

inst_errors_buffer.data[write_pos + kInstBindlessDescSetOffset] = desc_set;
inst_errors_buffer.data[write_pos + kInstBindlessDescBindingOffset] = binding;
inst_errors_buffer.data[write_pos + kInstBindlessDescIndexOffset] = desc_index;
inst_errors_buffer.data[write_pos + kInstBindlessCustomOffset_0] = param_0;
inst_errors_buffer.data[write_pos + kInstBindlessCustomOffset_1] = param_1;
}
return false;
}
return true;
}
4 changes: 4 additions & 0 deletions layers/gpu/spirv/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ target_sources(gpu_av_spirv PRIVATE
bindless_descriptor_pass.cpp
non_bindless_oob_buffer_pass.h
non_bindless_oob_buffer_pass.cpp
non_bindless_oob_texel_buffer_pass.h
non_bindless_oob_texel_buffer_pass.cpp
buffer_device_address_pass.h
buffer_device_address_pass.cpp
ray_query_pass.h
Expand Down Expand Up @@ -50,6 +52,8 @@ target_sources(gpu_av_spirv PRIVATE
${VVL_SOURCE_DIR}/layers/${API_TYPE}/generated/instrumentation_bindless_descriptor_comp.cpp
${VVL_SOURCE_DIR}/layers/${API_TYPE}/generated/instrumentation_non_bindless_oob_buffer_comp.h
${VVL_SOURCE_DIR}/layers/${API_TYPE}/generated/instrumentation_non_bindless_oob_buffer_comp.cpp
${VVL_SOURCE_DIR}/layers/${API_TYPE}/generated/instrumentation_non_bindless_oob_texel_buffer_comp.h
${VVL_SOURCE_DIR}/layers/${API_TYPE}/generated/instrumentation_non_bindless_oob_texel_buffer_comp.cpp
${VVL_SOURCE_DIR}/layers/${API_TYPE}/generated/instrumentation_buffer_device_address_comp.h
${VVL_SOURCE_DIR}/layers/${API_TYPE}/generated/instrumentation_buffer_device_address_comp.cpp
${VVL_SOURCE_DIR}/layers/${API_TYPE}/generated/instrumentation_ray_query_comp.h
Expand Down
3 changes: 3 additions & 0 deletions layers/gpu/spirv/bindless_descriptor_pass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,9 @@ bool BindlessDescriptorPass::AnalyzeInstruction(const Function& function, const
}

} else {
// TODO - Once all non-bindless passes are added, this check can be places at top of Run()
if (!module_.has_bindless_descriptors_ && opcode == spv::OpImageFetch) return false;

// Reference is not load or store, so ifi it isn't a image-based reference, move on
const uint32_t image_word = OpcodeImageAccessPosition(opcode);
if (image_word == 0) {
Expand Down
1 change: 1 addition & 0 deletions layers/gpu/spirv/link.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ enum class LinkFunctions {
inst_buffer_device_address,
inst_bindless_descriptor,
inst_non_bindless_oob_buffer,
inst_non_bindless_oob_texel_buffer,
inst_ray_query,
};

Expand Down
36 changes: 30 additions & 6 deletions layers/gpu/spirv/module.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include "buffer_device_address_pass.h"
#include "bindless_descriptor_pass.h"
#include "non_bindless_oob_buffer_pass.h"
#include "non_bindless_oob_texel_buffer_pass.h"
#include "ray_query_pass.h"
#include "debug_printf_pass.h"

Expand Down Expand Up @@ -293,6 +294,15 @@ bool Module::RunPassNonBindlessOOBBuffer() {
return changed;
}

bool Module::RunPassNonBindlessOOBTexelBuffer() {
NonBindlessOOBTexelBufferPass pass(*this);
const bool changed = pass.Run();
if (print_debug_info_) {
pass.PrintDebugInfo();
}
return changed;
}

bool Module::RunPassBufferDeviceAddress() {
BufferDeviceAddressPass pass(*this);
const bool changed = pass.Run();
Expand Down Expand Up @@ -393,7 +403,7 @@ void Module::LinkFunction(const LinkInfo& info) {
// track the incoming SSA IDs with what they are in the module
// < old_id, new_id >
vvl::unordered_map<uint32_t, uint32_t> id_swap_map;
const uint32_t function_type_id = TakeNextId();
uint32_t function_type_id = 0;

// Track all decorations and add after when have full id_swap_map
InstructionList decorations;
Expand Down Expand Up @@ -493,17 +503,31 @@ void Module::LinkFunction(const LinkInfo& info) {
type_manager_.AddType(std::move(new_inst), spv_type);
break;
}
case SpvType::kStruct:
case SpvType::kFunction: {
case SpvType::kStruct: {
// For OpTypeStruct, we just add it regardless since low chance to find for the amount of time to search all
// struct (which there can be quite a bit of) For OpTypeFunction, we will only have one and custom function
// likely won't match anything neither
type_id = (spv_type == SpvType::kFunction) ? function_type_id : TakeNextId();
// struct (which there can be quite a bit of)
type_id = TakeNextId();
new_inst->ReplaceResultId(type_id);
new_inst->ReplaceLinkedId(id_swap_map);
type_manager_.AddType(std::move(new_inst), spv_type).Id();
break;
}
case SpvType::kFunction: {
// It is not valid to have duplicate OpTypeFunction and some linked in functions will have the same signature
new_inst->ReplaceLinkedId(id_swap_map);
// First swap out IDs so comparison will be the same
const Type* function_type = type_manager_.FindFunctionType(*new_inst.get());
if (function_type) {
// Just reuse non-unique OpTypeFunction
function_type_id = function_type->Id();
} else {
function_type_id = TakeNextId();
type_id = function_type_id;
new_inst->ReplaceResultId(type_id);
type_manager_.AddType(std::move(new_inst), spv_type).Id();
}
break;
}
default:
break;
}
Expand Down
1 change: 1 addition & 0 deletions layers/gpu/spirv/module.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ class Module {
// Return true if code was instrumented
bool RunPassBindlessDescriptor();
bool RunPassNonBindlessOOBBuffer();
bool RunPassNonBindlessOOBTexelBuffer();
bool RunPassBufferDeviceAddress();
bool RunPassRayQuery();
bool RunPassDebugPrintf(uint32_t binding_slot = 0);
Expand Down
2 changes: 1 addition & 1 deletion layers/gpu/spirv/non_bindless_oob_buffer_pass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,6 @@ void NonBindlessOOBBufferPass::Reset() {
}

bool NonBindlessOOBBufferPass::AnalyzeInstruction(const Function& function, const Instruction& inst) {
if (module_.has_bindless_descriptors_) return false;
const uint32_t opcode = inst.Opcode();

if (opcode != spv::OpLoad && opcode != spv::OpStore) {
Expand Down Expand Up @@ -175,6 +174,7 @@ void NonBindlessOOBBufferPass::PrintDebugInfo() {

// Created own Run() because need to control finding the largest offset in a given block
bool NonBindlessOOBBufferPass::Run() {
if (module_.has_bindless_descriptors_) return false;
// Can safely loop function list as there is no injecting of new Functions until linking time
for (const auto& function : module_.functions_) {
for (auto block_it = function->blocks_.begin(); block_it != function->blocks_.end(); ++block_it) {
Expand Down
Loading

0 comments on commit b2ab14f

Please sign in to comment.