Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

sync: Cleanup signals that are never/rarely waited on #8613

Merged
merged 3 commits into from
Sep 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 33 additions & 29 deletions layers/sync/sync_validation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,31 @@ ResourceUsageRange SyncValidator::ReserveGlobalTagRange(size_t tag_count) const
return reserve;
}

void SyncValidator::EnsureTimelineSignalsLimit(uint32_t signals_per_queue_limit, QueueId queue) {
for (auto &[_, signals] : timeline_signals_) {
const size_t initial_signal_count = signals.size();
std::unordered_map<QueueId, uint32_t> signals_per_queue;
for (const SignalInfo &signal : signals) {
++signals_per_queue[signal.first_scope.queue];
}
const bool filter_queue = queue != kQueueIdInvalid;
for (auto it = signals.begin(); it != signals.end();) {
if (filter_queue && it->first_scope.queue != queue) {
++it;
continue;
}
auto &counter = signals_per_queue[it->first_scope.queue];
if (counter > signals_per_queue_limit) {
it = signals.erase(it);
--counter;
} else {
++it;
}
}
stats.RemoveTimelineSignals(uint32_t(initial_signal_count - signals.size()));
}
}

void SyncValidator::ApplySignalsUpdate(SignalsUpdate &update, const QueueBatchContext::Ptr &last_batch) {
// NOTE: All conserved QueueBatchContexts need to have their access logs reset to use the global
// logger and the only conserved QBCs are those referenced by unwaited signals and the last batch.
Expand Down Expand Up @@ -110,6 +135,11 @@ void SyncValidator::ApplySignalsUpdate(SignalsUpdate &update, const QueueBatchCo
++it;
}
}

// Enforce max signals limit in case timeline is signaled multiple times and never/rarely is waited on.
// This does not introduce errors/false-positives (check EnsureTimelineSignalsLimit documentation)
const uint32_t kMaxTimelineSignalsPerQueue = 100;
EnsureTimelineSignalsLimit(kMaxTimelineSignalsPerQueue);
}

void SyncValidator::ApplyTaggedWait(QueueId queue_id, ResourceUsageTag tag) {
Expand Down Expand Up @@ -2959,19 +2989,9 @@ void SyncValidator::PostCallRecordQueueWaitIdle(VkQueue queue, const RecordObjec
QueueId waited_queue = queue_state->GetQueueId();
ApplyTaggedWait(waited_queue, ResourceUsageRecord::kMaxIndex);

// For each timeline, remove all signals signaled on this queue except the last one.
// For each timeline, remove all signals signaled on the waited queue, except the last one.
// The last signal is needed to represent the current timeline state.
for (auto &[_, signals] : timeline_signals_) {
const size_t initial_signal_count = signals.size();
auto queue_pred = [waited_queue](const auto &signal) { return signal.first_scope.queue == waited_queue; };
auto last_signal_reverse_it = std::find_if(signals.rbegin(), signals.rend(), queue_pred);
if (last_signal_reverse_it != signals.rend()) {
auto last_signal_it = last_signal_reverse_it.base() - 1; // convert to forward iterator
// removes all queue signals excepts the last one
signals.erase(std::remove_if(signals.begin(), last_signal_it, queue_pred), last_signal_it);
}
stats.RemoveTimelineSignals(uint32_t(initial_signal_count - signals.size()));
}
EnsureTimelineSignalsLimit(1, waited_queue);

// Eliminate host waitable objects from the current queue.
vvl::EraseIf(waitable_fences_, [waited_queue](const auto &sf) { return sf.second.queue_id == waited_queue; });
Expand All @@ -2989,23 +3009,7 @@ void SyncValidator::PostCallRecordDeviceWaitIdle(VkDevice device, const RecordOb

// For each timeline keep only the last signal per queue.
// The last signal is needed to represent the current timeline state.
for (auto &[_, signals] : timeline_signals_) {
const size_t initial_signal_count = signals.size();
std::unordered_map<QueueId, uint32_t> signals_per_queue;
for (const SignalInfo &signal : signals) {
++signals_per_queue[signal.first_scope.queue];
}
for (auto it = signals.begin(); it != signals.end();) {
auto &counter = signals_per_queue[it->first_scope.queue];
if (counter > 1) {
it = signals.erase(it);
--counter;
} else {
++it;
}
}
stats.RemoveTimelineSignals(uint32_t(initial_signal_count - signals.size()));
}
EnsureTimelineSignalsLimit(1);

// As we we've waited for everything on device, any waits are mooted. (except for acquires)
vvl::EraseIf(waitable_fences_, [](const auto &waitable) { return waitable.second.acquired.Invalid(); });
Expand Down
11 changes: 11 additions & 0 deletions layers/sync/sync_validation.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,17 @@ class SyncValidator : public ValidationStateTracker, public SyncStageAccess {
uint32_t debug_reset_count = 1;
std::string debug_cmdbuf_pattern;

// Ensures that the number of signals per timeline per queue does not exceed the specified limit.
// If `queue` parameter is specified, then only that queue is checked (used by vkQueueWaitIdle).
// If the number of signals exceeds the limit, then signals with the smallest values are removed.
//
// Note, removing registered signals can't introduce errors/false-positives assuming at least
// a single signal per timeline is left. That's because if there are more than one matching signal
// to resolve a wait then the specification defines that only one signal is selected, which one is
// unspecified. In the current implementation we keep multiple signals per timeline to have additional
// options of validation, but, for example, keeping only the last signal is sufficient.
void EnsureTimelineSignalsLimit(uint32_t signals_per_queue_limit, QueueId queue = kQueueIdInvalid);

// Applies information from update object to binary_signals_/timeline_signals_.
// The update object is mutable to be able to std::move SignalInfo from it.
void ApplySignalsUpdate(SignalsUpdate &update, const QueueBatchContext::Ptr &last_batch);
Expand Down
55 changes: 53 additions & 2 deletions tests/unit/sync_val_semaphore_positive.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -655,7 +655,7 @@ TEST_F(PositiveSyncValTimelineSemaphore, ExternalSemaphoreWaitBeforeSignal) {
}
#endif // VK_USE_PLATFORM_WIN32_KHR

TEST_F(PositiveSyncValTimelineSemaphore, QueueWaitIdleRemovesAllSignals) {
TEST_F(PositiveSyncValTimelineSemaphore, QueueWaitIdleRemovesSignals) {
TEST_DESCRIPTION("Test for manual inspection of registered signals (VK_SYNCVAL_SHOW_STATS can be used)");
RETURN_IF_SKIP(InitTimelineSemaphore());

Expand All @@ -671,7 +671,7 @@ TEST_F(PositiveSyncValTimelineSemaphore, QueueWaitIdleRemovesAllSignals) {
m_device->Wait();
}

TEST_F(PositiveSyncValTimelineSemaphore, DeviceWaitIdleRemovesAllSignals) {
TEST_F(PositiveSyncValTimelineSemaphore, DeviceWaitIdleRemovesignals) {
TEST_DESCRIPTION("Test for manual inspection of registered signals (VK_SYNCVAL_SHOW_STATS can be used)");
RETURN_IF_SKIP(InitTimelineSemaphore());

Expand All @@ -686,3 +686,54 @@ TEST_F(PositiveSyncValTimelineSemaphore, DeviceWaitIdleRemovesAllSignals) {
}
m_device->Wait();
}

TEST_F(PositiveSyncValTimelineSemaphore, ManyOrphanedSignals) {
TEST_DESCRIPTION("Test limit of maximum number of registered signals per timeline");
RETURN_IF_SKIP(InitTimelineSemaphore());

vkt::Semaphore semaphore(*m_device, VK_SEMAPHORE_TYPE_TIMELINE);
vkt::Semaphore binary_semaphore(*m_device);

vkt::Buffer buffer_a(*m_device, 256, VK_BUFFER_USAGE_TRANSFER_SRC_BIT);
vkt::Buffer buffer_b(*m_device, 256, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
m_command_buffer.begin(VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT);
m_command_buffer.Copy(buffer_a, buffer_b);
m_command_buffer.end();

VkCommandBufferSubmitInfo cbuf_info = vku::InitStructHelper();
cbuf_info.commandBuffer = m_command_buffer;

VkSemaphoreSubmitInfo wait = vku::InitStructHelper();
wait.semaphore = binary_semaphore;
wait.stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT;

VkSemaphoreSubmitInfo signals[2];
// binary signal
signals[0] = vku::InitStructHelper();
signals[0].semaphore = binary_semaphore;
signals[0].stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT;
// timeline signal (value is set for each loop iteration)
signals[1] = vku::InitStructHelper();
signals[1].semaphore = semaphore;
signals[1].stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT;

const uint32_t N = 1'000;
for (uint32_t i = 1; i <= N; i++) {
// signal timeline on each iteration but never wait for it
signals[1].value = i;

VkSubmitInfo2 submit = vku::InitStructHelper();
if (i > 1) {
// wait binary signal from previous iteration
submit.waitSemaphoreInfoCount = 1;
submit.pWaitSemaphoreInfos = &wait;
}
submit.commandBufferInfoCount = 1;
submit.pCommandBufferInfos = &cbuf_info;
submit.signalSemaphoreInfoCount = 2;
submit.pSignalSemaphoreInfos = signals;

vk::QueueSubmit2(m_default_queue->handle(), 1, &submit, VK_NULL_HANDLE);
}
m_device->Wait();
}
Loading