Skip to content

Commit

Permalink
gpu: Defer GPL instrumenation until linking
Browse files Browse the repository at this point in the history
  • Loading branch information
spencer-lunarg committed Aug 30, 2024
1 parent c155770 commit d5dd9d1
Show file tree
Hide file tree
Showing 3 changed files with 227 additions and 4 deletions.
220 changes: 216 additions & 4 deletions layers/gpu/instrumentation/gpu_shader_instrumentor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@
*/

#include "gpu/instrumentation/gpu_shader_instrumentor.h"
#include <vulkan/vulkan_core.h>

#include "generated/layer_chassis_dispatch.h"
#include "gpu/core/gpu_state_tracker.h"
#include "gpu/spirv/module.h"
#include "chassis/chassis_modification_state.h"
Expand Down Expand Up @@ -614,16 +616,26 @@ void GpuShaderInstrumentor::PreCallRecordCreateGraphicsPipelines(VkDevice device

for (uint32_t i = 0; i < count; ++i) {
const auto &pipeline_state = pipeline_states[i];

// Need to make a deep copy so if SPIR-V is inlined, user doesn't see it after the call
auto &new_pipeline_ci = chassis_state.modified_create_infos[i];
new_pipeline_ci.initialize(&pipeline_state->GraphicsCreateInfo());

// Move all instrumentation until the final linking time
// This still needs to create a copy of the create_info (we *could* have a mix of GPL and non-GPL)
if (pipeline_state->create_flags & VK_PIPELINE_CREATE_LIBRARY_BIT_KHR) {
continue;
}

const Location create_info_loc = record_obj.location.dot(vvl::Field::pCreateInfos, i);
auto &shader_instrumentation_metadata = chassis_state.shader_instrumentations_metadata[i];

PreCallRecordPipelineCreationShaderInstrumentation(pAllocator, *pipeline_state, new_pipeline_ci, create_info_loc,
shader_instrumentation_metadata);
if (pipeline_state->linking_shaders != 0) {
PreCallRecordPipelineCreationShaderInstrumentationGPL(pAllocator, *pipeline_state, new_pipeline_ci, create_info_loc,
shader_instrumentation_metadata);
} else {
PreCallRecordPipelineCreationShaderInstrumentation(pAllocator, *pipeline_state, new_pipeline_ci, create_info_loc,
shader_instrumentation_metadata);
}
}

chassis_state.pCreateInfos = reinterpret_cast<VkGraphicsPipelineCreateInfo *>(chassis_state.modified_create_infos.data());
Expand Down Expand Up @@ -734,8 +746,17 @@ void GpuShaderInstrumentor::PostCallRecordCreateGraphicsPipelines(VkDevice devic

auto pipeline_state = Get<vvl::Pipeline>(pPipelines[i]);
ASSERT_AND_CONTINUE(pipeline_state);
if (pipeline_state->create_flags & VK_PIPELINE_CREATE_LIBRARY_BIT_KHR) {
continue; // Move all instrumentation until the final linking time
}

auto &shader_instrumentation_metadata = chassis_state.shader_instrumentations_metadata[i];
PostCallRecordPipelineCreationShaderInstrumentation(*pipeline_state, shader_instrumentation_metadata);

if (pipeline_state->linking_shaders != 0) {
PostCallRecordPipelineCreationShaderInstrumentationGPL(*pipeline_state, pAllocator, shader_instrumentation_metadata);
} else {
PostCallRecordPipelineCreationShaderInstrumentation(*pipeline_state, shader_instrumentation_metadata);
}
}
}

Expand Down Expand Up @@ -803,6 +824,12 @@ void GpuShaderInstrumentor::PreCallRecordDestroyPipeline(VkDevice device, VkPipe
for (auto shader_module : pipeline_state->instrumented_shader_module) {
DispatchDestroyShaderModule(device, shader_module, pAllocator);
}
if (pipeline_state->pre_raster_lib != VK_NULL_HANDLE) {
DispatchDestroyPipeline(device, pipeline_state->pre_raster_lib, pAllocator);
}
if (pipeline_state->frag_out_lib != VK_NULL_HANDLE) {
DispatchDestroyPipeline(device, pipeline_state->frag_out_lib, pAllocator);
}
}

BaseClass::PreCallRecordDestroyPipeline(device, pipeline, pAllocator, record_obj);
Expand Down Expand Up @@ -1052,6 +1079,152 @@ void GpuShaderInstrumentor::PreCallRecordPipelineCreationShaderInstrumentation(
}
}

void GpuShaderInstrumentor::PreCallRecordPipelineCreationShaderInstrumentationGPL(
const VkAllocationCallbacks *pAllocator, vvl::Pipeline &pipeline_state, vku::safe_VkGraphicsPipelineCreateInfo &new_pipeline_ci,
const Location &loc, chassis::ShaderInstrumentationMetadata &shader_instrumentation_metadata) {
if (pipeline_state.stage_states.empty()) return; // will hit with GPL without shaders in them

// Init here instead of in chassis so we don't pay cost when GPU-AV is not used
const size_t total_stages = pipeline_state.stage_states.size();
shader_instrumentation_metadata.passed_in_shader_stage_ci = false;
shader_instrumentation_metadata.spirv_unique_id_map.resize(total_stages, 0);

bool instrument_shader = true;
// If the app requests all available sets, the pipeline layout was not modified at pipeline layout creation and the
// already instrumented shaders need to be replaced with uninstrumented shaders
if (pipeline_state.active_slots.find(desc_set_bind_index_) != pipeline_state.active_slots.end()) {
instrument_shader = false;
}
const auto pipeline_layout = pipeline_state.PipelineLayoutState();
if (pipeline_layout && pipeline_layout->set_layouts.size() > desc_set_bind_index_) {
instrument_shader = false;
}

if (!instrument_shader) return;

// TODO - measure and see if would be better to make a gpuav subclasses of pipeline layout and store this information once there
// (not sure how much pipeline layout re-usage there is)
bool has_bindless_descriptors = false;
if (pipeline_layout) {
for (const auto &set_layout : pipeline_layout->set_layouts) {
if (set_layout) {
for (uint32_t i = 0; i < set_layout->GetBindingCount(); i++) {
const VkDescriptorBindingFlags flags = set_layout->GetDescriptorBindingFlagsFromIndex(i);
if (vvl::IsBindless(flags)) {
has_bindless_descriptors = true;
break;
}
}
}
if (has_bindless_descriptors) break;
}
}

auto library_create_info = const_cast<VkPipelineLibraryCreateInfoKHR *>(
vku::FindStructInPNextChain<VkPipelineLibraryCreateInfoKHR>(new_pipeline_ci.pNext));

uint32_t shader_index = 0;
for (uint32_t i = 0; i < library_create_info->libraryCount; ++i) {
const auto lib = Get<vvl::Pipeline>(library_create_info->pLibraries[i]);
if (!lib) continue;
if (lib->stage_states.empty()) continue;

vku::safe_VkGraphicsPipelineCreateInfo new_lib_pipeline_ci(lib->GraphicsCreateInfo());

for (uint32_t k = 0; k < static_cast<uint32_t>(lib->stage_states.size()); ++k) {
const auto &stage_state = lib->stage_states[k];
auto module_state = std::const_pointer_cast<vvl::ShaderModule>(stage_state.module_state);
ASSERT_AND_CONTINUE(module_state);

const VkShaderStageFlagBits stage = stage_state.GetStage();

vku::safe_VkPipelineShaderStageCreateInfo *stage_ci = nullptr;
// Check pNext for inlined SPIR-V
for (uint32_t j = 0; j < new_lib_pipeline_ci.stageCount; ++j) {
if (new_lib_pipeline_ci.pStages[j].stage == stage) {
stage_ci = &new_lib_pipeline_ci.pStages[j];
}
}

// We're modifying the copied, safe create info, which is ok to be non-const
auto sm_ci =
const_cast<vku::safe_VkShaderModuleCreateInfo *>(reinterpret_cast<const vku::safe_VkShaderModuleCreateInfo *>(
vku::FindStructInPNextChain<VkShaderModuleCreateInfo>(stage_ci->pNext)));

if (gpuav_settings.select_instrumented_shaders) {
if (sm_ci && !IsSelectiveInstrumentationEnabled(sm_ci->pNext)) {
continue;
} else if (selected_instrumented_shaders.find(module_state->VkHandle()) == selected_instrumented_shaders.end()) {
continue;
}
}

uint32_t unique_shader_id = 0;
bool cached = false;
bool pass = false;
std::vector<uint32_t> instrumented_spirv;
if (gpuav_settings.cache_instrumented_shaders) {
unique_shader_id = hash_util::ShaderHash(module_state->spirv->words_.data(),
module_state->spirv->words_.size() * sizeof(uint32_t));
if (const auto spirv = instrumented_shaders_cache_.Get(unique_shader_id)) {
instrumented_spirv = *spirv;
cached = true;
}
} else {
unique_shader_id = unique_shader_module_id_++;
}
if (!cached) {
pass = InstrumentShader(module_state->spirv->words_, unique_shader_id, has_bindless_descriptors, loc,
instrumented_spirv);
}
if (cached || pass) {
shader_instrumentation_metadata.spirv_unique_id_map[shader_index++] = unique_shader_id;
if (module_state->VkHandle() != VK_NULL_HANDLE) {
// If the user used vkCreateShaderModule, we create a new VkShaderModule to replace with the instrumented
// shader
VkShaderModule instrumented_shader_module;
VkShaderModuleCreateInfo create_info = vku::InitStructHelper();
create_info.pCode = instrumented_spirv.data();
create_info.codeSize = instrumented_spirv.size() * sizeof(uint32_t);
VkResult result = DispatchCreateShaderModule(device, &create_info, pAllocator, &instrumented_shader_module);
if (result == VK_SUCCESS) {
SetShaderModule(new_lib_pipeline_ci, *stage_state.pipeline_create_info, instrumented_shader_module, i);
lib->instrumented_shader_module.emplace_back(instrumented_shader_module);
} else {
InternalError(device, loc, "Unable to replace non-instrumented shader with instrumented one.");
}
} else if (sm_ci) {
// The user is inlining the Shader Module into the pipeline, so just need to update the spirv
shader_instrumentation_metadata.passed_in_shader_stage_ci = true;
// TODO - This makes a copy, but could save on Chassis stack instead (then remove function from VUL).
// The core issue is we always use std::vector<uint32_t> but Safe Struct manages its own version of the pCode
// memory. It would be much harder to change everything from std::vector and instead to adjust Safe Struct to
// not double-free the memory on us. If making any changes, we have to consider a case where the user inlines
// the fragment shader, but use a normal VkShaderModule in the vertex shader.
sm_ci->SetCode(instrumented_spirv);
} else {
assert(false);
}

if (gpuav_settings.cache_instrumented_shaders && !cached) {
instrumented_shaders_cache_.Add(unique_shader_id, instrumented_spirv);
}
}
}

VkPipeline new_lib_pipeline;
DispatchCreateGraphicsPipelines(device, VK_NULL_HANDLE, 1, new_lib_pipeline_ci.ptr(), pAllocator, &new_lib_pipeline);

if (lib->active_shaders & VK_SHADER_STAGE_FRAGMENT_BIT) {
pipeline_state.frag_out_lib = new_lib_pipeline;
} else {
pipeline_state.pre_raster_lib = new_lib_pipeline;
}

const_cast<VkPipeline *>(library_create_info->pLibraries)[i] = new_lib_pipeline;
}
}

// Now that we have created the pipeline (and have its handle) build up the shader map for each shader we instrumented
void GpuShaderInstrumentor::PostCallRecordPipelineCreationShaderInstrumentation(
vvl::Pipeline &pipeline_state, chassis::ShaderInstrumentationMetadata &shader_instrumentation_metadata) {
Expand Down Expand Up @@ -1083,6 +1256,45 @@ void GpuShaderInstrumentor::PostCallRecordPipelineCreationShaderInstrumentation(
}
}

void GpuShaderInstrumentor::PostCallRecordPipelineCreationShaderInstrumentationGPL(
vvl::Pipeline &pipeline_state, const VkAllocationCallbacks *pAllocator,
chassis::ShaderInstrumentationMetadata &shader_instrumentation_metadata) {
uint32_t shader_index = 0;
for (uint32_t i = 0; i < pipeline_state.library_create_info->libraryCount; ++i) {
const auto lib = Get<vvl::Pipeline>(pipeline_state.library_create_info->pLibraries[i]);
if (!lib) continue;
if (lib->stage_states.empty()) continue;

vku::safe_VkGraphicsPipelineCreateInfo new_lib_pipeline_ci(lib->GraphicsCreateInfo());

for (uint32_t k = 0; k < static_cast<uint32_t>(lib->stage_states.size()); ++k) {
uint32_t unique_shader_id = shader_instrumentation_metadata.spirv_unique_id_map[shader_index++];
// if the shader for some reason was not instrumented, there is nothing to save
if (unique_shader_id == 0) {
continue;
}

const auto &stage_state = lib->stage_states[k];
auto &module_state = stage_state.module_state;

// We currently need to store a copy of the original, non-instrumented shader so if there is debug information,
// we can reference it by the instruction number printed out in the shader. Since the application can destroy the
// original VkShaderModule, there is a chance this will be gone, we need to copy it now.
// TODO - in the instrumentation, instead of printing the instruction number only, if we print out debug info, we
// can remove this copy
std::vector<uint32_t> code;
if (module_state && module_state->spirv) code = module_state->spirv->words_;

VkShaderModule shader_module_handle = module_state->VkHandle();
if (shader_module_handle == VK_NULL_HANDLE && shader_instrumentation_metadata.passed_in_shader_stage_ci) {
shader_module_handle = kPipelineStageInfoHandle;
}

shader_map_.insert_or_assign(unique_shader_id, lib->VkHandle(), shader_module_handle, VK_NULL_HANDLE, std::move(code));
}
}
}

void GpuShaderInstrumentor::PostCallRecordPipelineCreationsRT(
VkResult result, VkDeferredOperationKHR deferredOperation, const VkAllocationCallbacks *pAllocator,
std::shared_ptr<chassis::CreateRayTracingPipelinesKHR> chassis_state) {
Expand Down
7 changes: 7 additions & 0 deletions layers/gpu/instrumentation/gpu_shader_instrumentor.h
Original file line number Diff line number Diff line change
Expand Up @@ -195,8 +195,15 @@ class GpuShaderInstrumentor : public ValidationStateTracker {
void PreCallRecordPipelineCreationShaderInstrumentation(
const VkAllocationCallbacks *pAllocator, vvl::Pipeline &pipeline_state, SafeCreateInfo &new_pipeline_ci,
const Location &loc, chassis::ShaderInstrumentationMetadata &shader_instrumentation_metadata);
void PreCallRecordPipelineCreationShaderInstrumentationGPL(
const VkAllocationCallbacks *pAllocator, vvl::Pipeline &pipeline_state,
vku::safe_VkGraphicsPipelineCreateInfo &new_pipeline_ci, const Location &loc,
chassis::ShaderInstrumentationMetadata &shader_instrumentation_metadata);
void PostCallRecordPipelineCreationShaderInstrumentation(
vvl::Pipeline &pipeline_state, chassis::ShaderInstrumentationMetadata &shader_instrumentation_metadata);
void PostCallRecordPipelineCreationShaderInstrumentationGPL(
vvl::Pipeline &pipeline_state, const VkAllocationCallbacks *pAllocator,
chassis::ShaderInstrumentationMetadata &shader_instrumentation_metadata);
void PostCallRecordPipelineCreationsRT(VkResult result, VkDeferredOperationKHR deferredOperation,
const VkAllocationCallbacks *pAllocator,
std::shared_ptr<chassis::CreateRayTracingPipelinesKHR> chassis_state);
Expand Down
4 changes: 4 additions & 0 deletions layers/state_tracker/pipeline_state.h
Original file line number Diff line number Diff line change
Expand Up @@ -132,8 +132,12 @@ class Pipeline : public StateObject {

mutable bool binary_data_released = false;

// TODO - We need a gpu_tracker::Pipeline and have these there
// We create a VkShaderModule that is instrumented and need to delete before leaving the pipeline call
std::vector<VkShaderModule> instrumented_shader_module;
// When we instrument GPL at link time, we need to hold the new libraries until they are done
VkPipeline pre_raster_lib = VK_NULL_HANDLE;
VkPipeline frag_out_lib = VK_NULL_HANDLE;

// Executable or legacy pipeline
Pipeline(const ValidationStateTracker &state_data, const VkGraphicsPipelineCreateInfo *pCreateInfo,
Expand Down

0 comments on commit d5dd9d1

Please sign in to comment.