gpu: Defer GPL instrumenation until linking

KhronosGroup · Aug 30, 2024 · d5dd9d1 · d5dd9d1
1 parent c155770
commit d5dd9d1
Show file tree

Hide file tree

Showing 3 changed files with 227 additions and 4 deletions.
diff --git a/layers/gpu/instrumentation/gpu_shader_instrumentor.cpp b/layers/gpu/instrumentation/gpu_shader_instrumentor.cpp
@@ -16,7 +16,9 @@
  */
 
 #include "gpu/instrumentation/gpu_shader_instrumentor.h"
+#include <vulkan/vulkan_core.h>
 
+#include "generated/layer_chassis_dispatch.h"
 #include "gpu/core/gpu_state_tracker.h"
 #include "gpu/spirv/module.h"
 #include "chassis/chassis_modification_state.h"
@@ -614,16 +616,26 @@ void GpuShaderInstrumentor::PreCallRecordCreateGraphicsPipelines(VkDevice device
 
     for (uint32_t i = 0; i < count; ++i) {
         const auto &pipeline_state = pipeline_states[i];
-
         // Need to make a deep copy so if SPIR-V is inlined, user doesn't see it after the call
         auto &new_pipeline_ci = chassis_state.modified_create_infos[i];
         new_pipeline_ci.initialize(&pipeline_state->GraphicsCreateInfo());
 
+        // Move all instrumentation until the final linking time
+        // This still needs to create a copy of the create_info (we *could* have a mix of GPL and non-GPL)
+        if (pipeline_state->create_flags & VK_PIPELINE_CREATE_LIBRARY_BIT_KHR) {
+            continue;
+        }
+
         const Location create_info_loc = record_obj.location.dot(vvl::Field::pCreateInfos, i);
         auto &shader_instrumentation_metadata = chassis_state.shader_instrumentations_metadata[i];
 
-        PreCallRecordPipelineCreationShaderInstrumentation(pAllocator, *pipeline_state, new_pipeline_ci, create_info_loc,
-                                                           shader_instrumentation_metadata);
+        if (pipeline_state->linking_shaders != 0) {
+            PreCallRecordPipelineCreationShaderInstrumentationGPL(pAllocator, *pipeline_state, new_pipeline_ci, create_info_loc,
+                                                                  shader_instrumentation_metadata);
+        } else {
+            PreCallRecordPipelineCreationShaderInstrumentation(pAllocator, *pipeline_state, new_pipeline_ci, create_info_loc,
+                                                               shader_instrumentation_metadata);
+        }
     }
 
     chassis_state.pCreateInfos = reinterpret_cast<VkGraphicsPipelineCreateInfo *>(chassis_state.modified_create_infos.data());
@@ -734,8 +746,17 @@ void GpuShaderInstrumentor::PostCallRecordCreateGraphicsPipelines(VkDevice devic
 
         auto pipeline_state = Get<vvl::Pipeline>(pPipelines[i]);
         ASSERT_AND_CONTINUE(pipeline_state);
+        if (pipeline_state->create_flags & VK_PIPELINE_CREATE_LIBRARY_BIT_KHR) {
+            continue;  // Move all instrumentation until the final linking time
+        }
+
         auto &shader_instrumentation_metadata = chassis_state.shader_instrumentations_metadata[i];
-        PostCallRecordPipelineCreationShaderInstrumentation(*pipeline_state, shader_instrumentation_metadata);
+
+        if (pipeline_state->linking_shaders != 0) {
+            PostCallRecordPipelineCreationShaderInstrumentationGPL(*pipeline_state, pAllocator, shader_instrumentation_metadata);
+        } else {
+            PostCallRecordPipelineCreationShaderInstrumentation(*pipeline_state, shader_instrumentation_metadata);
+        }
     }
 }
 
@@ -803,6 +824,12 @@ void GpuShaderInstrumentor::PreCallRecordDestroyPipeline(VkDevice device, VkPipe
         for (auto shader_module : pipeline_state->instrumented_shader_module) {
             DispatchDestroyShaderModule(device, shader_module, pAllocator);
         }
+        if (pipeline_state->pre_raster_lib != VK_NULL_HANDLE) {
+            DispatchDestroyPipeline(device, pipeline_state->pre_raster_lib, pAllocator);
+        }
+        if (pipeline_state->frag_out_lib != VK_NULL_HANDLE) {
+            DispatchDestroyPipeline(device, pipeline_state->frag_out_lib, pAllocator);
+        }
     }
 
     BaseClass::PreCallRecordDestroyPipeline(device, pipeline, pAllocator, record_obj);
@@ -1052,6 +1079,152 @@ void GpuShaderInstrumentor::PreCallRecordPipelineCreationShaderInstrumentation(
     }
 }
 
+void GpuShaderInstrumentor::PreCallRecordPipelineCreationShaderInstrumentationGPL(
+    const VkAllocationCallbacks *pAllocator, vvl::Pipeline &pipeline_state, vku::safe_VkGraphicsPipelineCreateInfo &new_pipeline_ci,
+    const Location &loc, chassis::ShaderInstrumentationMetadata &shader_instrumentation_metadata) {
+    if (pipeline_state.stage_states.empty()) return;  // will hit with GPL without shaders in them
+
+    // Init here instead of in chassis so we don't pay cost when GPU-AV is not used
+    const size_t total_stages = pipeline_state.stage_states.size();
+    shader_instrumentation_metadata.passed_in_shader_stage_ci = false;
+    shader_instrumentation_metadata.spirv_unique_id_map.resize(total_stages, 0);
+
+    bool instrument_shader = true;
+    // If the app requests all available sets, the pipeline layout was not modified at pipeline layout creation and the
+    // already instrumented shaders need to be replaced with uninstrumented shaders
+    if (pipeline_state.active_slots.find(desc_set_bind_index_) != pipeline_state.active_slots.end()) {
+        instrument_shader = false;
+    }
+    const auto pipeline_layout = pipeline_state.PipelineLayoutState();
+    if (pipeline_layout && pipeline_layout->set_layouts.size() > desc_set_bind_index_) {
+        instrument_shader = false;
+    }
+
+    if (!instrument_shader) return;
+
+    // TODO - measure and see if would be better to make a gpuav subclasses of pipeline layout and store this information once there
+    // (not sure how much pipeline layout re-usage there is)
+    bool has_bindless_descriptors = false;
+    if (pipeline_layout) {
+        for (const auto &set_layout : pipeline_layout->set_layouts) {
+            if (set_layout) {
+                for (uint32_t i = 0; i < set_layout->GetBindingCount(); i++) {
+                    const VkDescriptorBindingFlags flags = set_layout->GetDescriptorBindingFlagsFromIndex(i);
+                    if (vvl::IsBindless(flags)) {
+                        has_bindless_descriptors = true;
+                        break;
+                    }
+                }
+            }
+            if (has_bindless_descriptors) break;
+        }
+    }
+
+    auto library_create_info = const_cast<VkPipelineLibraryCreateInfoKHR *>(
+        vku::FindStructInPNextChain<VkPipelineLibraryCreateInfoKHR>(new_pipeline_ci.pNext));
+
+    uint32_t shader_index = 0;
+    for (uint32_t i = 0; i < library_create_info->libraryCount; ++i) {
+        const auto lib = Get<vvl::Pipeline>(library_create_info->pLibraries[i]);
+        if (!lib) continue;
+        if (lib->stage_states.empty()) continue;
+
+        vku::safe_VkGraphicsPipelineCreateInfo new_lib_pipeline_ci(lib->GraphicsCreateInfo());
+
+        for (uint32_t k = 0; k < static_cast<uint32_t>(lib->stage_states.size()); ++k) {
+            const auto &stage_state = lib->stage_states[k];
+            auto module_state = std::const_pointer_cast<vvl::ShaderModule>(stage_state.module_state);
+            ASSERT_AND_CONTINUE(module_state);
+
+            const VkShaderStageFlagBits stage = stage_state.GetStage();
+
+            vku::safe_VkPipelineShaderStageCreateInfo *stage_ci = nullptr;
+            // Check pNext for inlined SPIR-V
+            for (uint32_t j = 0; j < new_lib_pipeline_ci.stageCount; ++j) {
+                if (new_lib_pipeline_ci.pStages[j].stage == stage) {
+                    stage_ci = &new_lib_pipeline_ci.pStages[j];
+                }
+            }
+
+            // We're modifying the copied, safe create info, which is ok to be non-const
+            auto sm_ci =
+                const_cast<vku::safe_VkShaderModuleCreateInfo *>(reinterpret_cast<const vku::safe_VkShaderModuleCreateInfo *>(
+                    vku::FindStructInPNextChain<VkShaderModuleCreateInfo>(stage_ci->pNext)));
+
+            if (gpuav_settings.select_instrumented_shaders) {
+                if (sm_ci && !IsSelectiveInstrumentationEnabled(sm_ci->pNext)) {
+                    continue;
+                } else if (selected_instrumented_shaders.find(module_state->VkHandle()) == selected_instrumented_shaders.end()) {
+                    continue;
+                }
+            }
+
+            uint32_t unique_shader_id = 0;
+            bool cached = false;
+            bool pass = false;
+            std::vector<uint32_t> instrumented_spirv;
+            if (gpuav_settings.cache_instrumented_shaders) {
+                unique_shader_id = hash_util::ShaderHash(module_state->spirv->words_.data(),
+                                                         module_state->spirv->words_.size() * sizeof(uint32_t));
+                if (const auto spirv = instrumented_shaders_cache_.Get(unique_shader_id)) {
+                    instrumented_spirv = *spirv;
+                    cached = true;
+                }
+            } else {
+                unique_shader_id = unique_shader_module_id_++;
+            }
+            if (!cached) {
+                pass = InstrumentShader(module_state->spirv->words_, unique_shader_id, has_bindless_descriptors, loc,
+                                        instrumented_spirv);
+            }
+            if (cached || pass) {
+                shader_instrumentation_metadata.spirv_unique_id_map[shader_index++] = unique_shader_id;
+                if (module_state->VkHandle() != VK_NULL_HANDLE) {
+                    // If the user used vkCreateShaderModule, we create a new VkShaderModule to replace with the instrumented
+                    // shader
+                    VkShaderModule instrumented_shader_module;
+                    VkShaderModuleCreateInfo create_info = vku::InitStructHelper();
+                    create_info.pCode = instrumented_spirv.data();
+                    create_info.codeSize = instrumented_spirv.size() * sizeof(uint32_t);
+                    VkResult result = DispatchCreateShaderModule(device, &create_info, pAllocator, &instrumented_shader_module);
+                    if (result == VK_SUCCESS) {
+                        SetShaderModule(new_lib_pipeline_ci, *stage_state.pipeline_create_info, instrumented_shader_module, i);
+                        lib->instrumented_shader_module.emplace_back(instrumented_shader_module);
+                    } else {
+                        InternalError(device, loc, "Unable to replace non-instrumented shader with instrumented one.");
+                    }
+                } else if (sm_ci) {
+                    // The user is inlining the Shader Module into the pipeline, so just need to update the spirv
+                    shader_instrumentation_metadata.passed_in_shader_stage_ci = true;
+                    // TODO - This makes a copy, but could save on Chassis stack instead (then remove function from VUL).
+                    // The core issue is we always use std::vector<uint32_t> but Safe Struct manages its own version of the pCode
+                    // memory. It would be much harder to change everything from std::vector and instead to adjust Safe Struct to
+                    // not double-free the memory on us. If making any changes, we have to consider a case where the user inlines
+                    // the fragment shader, but use a normal VkShaderModule in the vertex shader.
+                    sm_ci->SetCode(instrumented_spirv);
+                } else {
+                    assert(false);
+                }
+
+                if (gpuav_settings.cache_instrumented_shaders && !cached) {
+                    instrumented_shaders_cache_.Add(unique_shader_id, instrumented_spirv);
+                }
+            }
+        }
+
+        VkPipeline new_lib_pipeline;
+        DispatchCreateGraphicsPipelines(device, VK_NULL_HANDLE, 1, new_lib_pipeline_ci.ptr(), pAllocator, &new_lib_pipeline);
+
+        if (lib->active_shaders & VK_SHADER_STAGE_FRAGMENT_BIT) {
+            pipeline_state.frag_out_lib = new_lib_pipeline;
+        } else {
+            pipeline_state.pre_raster_lib = new_lib_pipeline;
+        }
+
+        const_cast<VkPipeline *>(library_create_info->pLibraries)[i] = new_lib_pipeline;
+    }
+}
+
 // Now that we have created the pipeline (and have its handle) build up the shader map for each shader we instrumented
 void GpuShaderInstrumentor::PostCallRecordPipelineCreationShaderInstrumentation(
     vvl::Pipeline &pipeline_state, chassis::ShaderInstrumentationMetadata &shader_instrumentation_metadata) {
@@ -1083,6 +1256,45 @@ void GpuShaderInstrumentor::PostCallRecordPipelineCreationShaderInstrumentation(
     }
 }
 
+void GpuShaderInstrumentor::PostCallRecordPipelineCreationShaderInstrumentationGPL(
+    vvl::Pipeline &pipeline_state, const VkAllocationCallbacks *pAllocator,
+    chassis::ShaderInstrumentationMetadata &shader_instrumentation_metadata) {
+    uint32_t shader_index = 0;
+    for (uint32_t i = 0; i < pipeline_state.library_create_info->libraryCount; ++i) {
+        const auto lib = Get<vvl::Pipeline>(pipeline_state.library_create_info->pLibraries[i]);
+        if (!lib) continue;
+        if (lib->stage_states.empty()) continue;
+
+        vku::safe_VkGraphicsPipelineCreateInfo new_lib_pipeline_ci(lib->GraphicsCreateInfo());
+
+        for (uint32_t k = 0; k < static_cast<uint32_t>(lib->stage_states.size()); ++k) {
+            uint32_t unique_shader_id = shader_instrumentation_metadata.spirv_unique_id_map[shader_index++];
+            // if the shader for some reason was not instrumented, there is nothing to save
+            if (unique_shader_id == 0) {
+                continue;
+            }
+
+            const auto &stage_state = lib->stage_states[k];
+            auto &module_state = stage_state.module_state;
+
+            // We currently need to store a copy of the original, non-instrumented shader so if there is debug information,
+            // we can reference it by the instruction number printed out in the shader. Since the application can destroy the
+            // original VkShaderModule, there is a chance this will be gone, we need to copy it now.
+            // TODO - in the instrumentation, instead of printing the instruction number only, if we print out debug info, we
+            // can remove this copy
+            std::vector<uint32_t> code;
+            if (module_state && module_state->spirv) code = module_state->spirv->words_;
+
+            VkShaderModule shader_module_handle = module_state->VkHandle();
+            if (shader_module_handle == VK_NULL_HANDLE && shader_instrumentation_metadata.passed_in_shader_stage_ci) {
+                shader_module_handle = kPipelineStageInfoHandle;
+            }
+
+            shader_map_.insert_or_assign(unique_shader_id, lib->VkHandle(), shader_module_handle, VK_NULL_HANDLE, std::move(code));
+        }
+    }
+}
+
 void GpuShaderInstrumentor::PostCallRecordPipelineCreationsRT(
     VkResult result, VkDeferredOperationKHR deferredOperation, const VkAllocationCallbacks *pAllocator,
     std::shared_ptr<chassis::CreateRayTracingPipelinesKHR> chassis_state) {

diff --git a/layers/gpu/instrumentation/gpu_shader_instrumentor.h b/layers/gpu/instrumentation/gpu_shader_instrumentor.h
@@ -195,8 +195,15 @@ class GpuShaderInstrumentor : public ValidationStateTracker {
     void PreCallRecordPipelineCreationShaderInstrumentation(
         const VkAllocationCallbacks *pAllocator, vvl::Pipeline &pipeline_state, SafeCreateInfo &new_pipeline_ci,
         const Location &loc, chassis::ShaderInstrumentationMetadata &shader_instrumentation_metadata);
+    void PreCallRecordPipelineCreationShaderInstrumentationGPL(
+        const VkAllocationCallbacks *pAllocator, vvl::Pipeline &pipeline_state,
+        vku::safe_VkGraphicsPipelineCreateInfo &new_pipeline_ci, const Location &loc,
+        chassis::ShaderInstrumentationMetadata &shader_instrumentation_metadata);
     void PostCallRecordPipelineCreationShaderInstrumentation(
         vvl::Pipeline &pipeline_state, chassis::ShaderInstrumentationMetadata &shader_instrumentation_metadata);
+    void PostCallRecordPipelineCreationShaderInstrumentationGPL(
+        vvl::Pipeline &pipeline_state, const VkAllocationCallbacks *pAllocator,
+        chassis::ShaderInstrumentationMetadata &shader_instrumentation_metadata);
     void PostCallRecordPipelineCreationsRT(VkResult result, VkDeferredOperationKHR deferredOperation,
                                            const VkAllocationCallbacks *pAllocator,
                                            std::shared_ptr<chassis::CreateRayTracingPipelinesKHR> chassis_state);

diff --git a/layers/state_tracker/pipeline_state.h b/layers/state_tracker/pipeline_state.h
@@ -132,8 +132,12 @@ class Pipeline : public StateObject {
 
     mutable bool binary_data_released = false;
 
+    // TODO - We need a gpu_tracker::Pipeline and have these there
     // We create a VkShaderModule that is instrumented and need to delete before leaving the pipeline call
     std::vector<VkShaderModule> instrumented_shader_module;
+    // When we instrument GPL at link time, we need to hold the new libraries until they are done
+    VkPipeline pre_raster_lib = VK_NULL_HANDLE;
+    VkPipeline frag_out_lib = VK_NULL_HANDLE;
 
     // Executable or legacy pipeline
     Pipeline(const ValidationStateTracker &state_data, const VkGraphicsPipelineCreateInfo *pCreateInfo,