mirror of
https://github.com/NVIDIA/open-gpu-kernel-modules.git
synced 2025-01-07 13:46:05 +01:00
684 lines
31 KiB
C
684 lines
31 KiB
C
|
/*******************************************************************************
|
||
|
Copyright (c) 2017-2021 NVIDIA Corporation
|
||
|
|
||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||
|
of this software and associated documentation files (the "Software"), to
|
||
|
deal in the Software without restriction, including without limitation the
|
||
|
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||
|
sell copies of the Software, and to permit persons to whom the Software is
|
||
|
furnished to do so, subject to the following conditions:
|
||
|
|
||
|
The above copyright notice and this permission notice shall be
|
||
|
included in all copies or substantial portions of the Software.
|
||
|
|
||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||
|
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||
|
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||
|
DEALINGS IN THE SOFTWARE.
|
||
|
*******************************************************************************/
|
||
|
|
||
|
#include "nv_uvm_interface.h"
|
||
|
#include "uvm_common.h"
|
||
|
#include "uvm_api.h"
|
||
|
#include "uvm_gpu_non_replayable_faults.h"
|
||
|
#include "uvm_gpu.h"
|
||
|
#include "uvm_hal.h"
|
||
|
#include "uvm_lock.h"
|
||
|
#include "uvm_tools.h"
|
||
|
#include "uvm_user_channel.h"
|
||
|
#include "uvm_va_space_mm.h"
|
||
|
#include "uvm_va_block.h"
|
||
|
#include "uvm_va_range.h"
|
||
|
#include "uvm_kvmalloc.h"
|
||
|
#include "uvm_ats_faults.h"
|
||
|
|
||
|
// In the context of a CUDA application using Unified Memory, it is sometimes
|
||
|
// assumed that there is a single type of fault, originated by a memory
|
||
|
// load/store in a SM (Graphics Engine), which itself can be traced back to a
|
||
|
// memory access in a CUDA kernel written by a developer. In reality, faults can
|
||
|
// also be triggered by other parts of the GPU i.e. by other engines, as the
|
||
|
// result of developer-facing APIs, or operations initiated by a user-mode
|
||
|
// driver. The Graphics Engine faults are called replayable faults, while the
|
||
|
// rest are called non-replayable. The differences between the two types of
|
||
|
// faults go well beyond the engine originating the fault.
|
||
|
//
|
||
|
// A non-replayable fault originates in an engine other than Graphics. UVM
|
||
|
// services non-replayable faults from the Copy and PBDMA (Host/ESCHED) Engines.
|
||
|
// Non-replayable faults originated in other engines are considered fatal, and
|
||
|
// do not reach the UVM driver. While UVM can distinguish between faults
|
||
|
// originated in the Copy Engine and faults originated in the PBDMA Engine, in
|
||
|
// practice they are all processed in the same way. Replayable fault support in
|
||
|
// Graphics was introduced in Pascal, and non-replayable fault support in CE and
|
||
|
// PBDMA Engines was introduced in Volta; all non-replayable faults were fatal
|
||
|
// before Volta.
|
||
|
//
|
||
|
// An example of a Copy Engine non-replayable fault is a memory copy between two
|
||
|
// virtual addresses on a GPU, in which either the source or destination
|
||
|
// pointers are not currently mapped to a physical address in the page tables of
|
||
|
// the GPU. An example of a PBDMA non-replayable fault is a semaphore acquire in
|
||
|
// which the semaphore virtual address passed as argument is currently not
|
||
|
// mapped to any physical address.
|
||
|
//
|
||
|
// Non-replayable faults originated in the CE and PBDMA Engines result in HW
|
||
|
// preempting the channel associated with the fault, a mechanism called "fault
|
||
|
// and switch". More precisely, the switching out affects not only the channel
|
||
|
// that caused the fault, but all the channels in the same Time Slice Group
|
||
|
// (TSG). SW intervention is required so all the channels in the TSG can be
|
||
|
// scheduled again, but channels in other TSGs can be scheduled and resume their
|
||
|
// normal execution. In the case of the non-replayable faults serviced by UVM,
|
||
|
// the driver clears a channel's faulted bit upon successful servicing, but it
|
||
|
// is only when the servicing has completed for all the channels in the TSG that
|
||
|
// they are all allowed to be switched in. Non-replayable faults originated in
|
||
|
// engines other than CE and PBDMA are fatal because these other units lack
|
||
|
// hardware support for the "fault and switch" and restart mechanisms just
|
||
|
// described.
|
||
|
// On the other hand, replayable faults block preemption of the channel until
|
||
|
// software (UVM) services the fault. This is sometimes known as "fault and
|
||
|
// stall". Note that replayable faults prevent the execution of other channels,
|
||
|
// which are stalled until the fault is serviced.
|
||
|
//
|
||
|
// The "non-replayable" naming alludes to the fact that, historically, these
|
||
|
// faults indicated a fatal condition so there was no recovery ("replay")
|
||
|
// process, and SW could not ignore or drop the fault. As discussed before, this
|
||
|
// is no longer the case and while at times the hardware documentation uses the
|
||
|
// "fault and replay" expression for CE and PBDMA faults, we reserve that
|
||
|
// expression for Graphics faults and favor the term "fault and reschedule"
|
||
|
// instead. Replaying a fault does not necessarily imply that UVM has serviced
|
||
|
// it. For example, the UVM driver may choose to ignore the replayable faults
|
||
|
// associated with a GPU for some period of time if it detects that there is
|
||
|
// thrashing going on, and the GPU needs to be throttled. The fault entries
|
||
|
// corresponding to the ignored faults are never saved by UVM, but new entries
|
||
|
// (and new interrupts) will be generated by hardware each time after UVM issues
|
||
|
// a replay.
|
||
|
//
|
||
|
// While replayable faults are always the responsibility of UVM, the servicing
|
||
|
// of non-replayable faults is split between RM and UVM. In the case of
|
||
|
// replayable faults, UVM has sole SW ownership of the hardware buffer
|
||
|
// containing the faults, and it is responsible for updating the GET pointer to
|
||
|
// signal the hardware that a number of faults have been read. UVM also reads
|
||
|
// the PUT pointer value written by hardware. But in the case of non-replayable
|
||
|
// faults, UVM reads the fault entries out of a regular CPU buffer, shared with
|
||
|
// RM, called "shadow buffer". RM is responsible for accessing the actual
|
||
|
// non-replayable hardware buffer, reading the PUT pointer, updating the GET
|
||
|
// pointer, and moving CE and PBDMA faults from the hardware buffer to the
|
||
|
// shadow buffer. Because the Resource Manager owns the HW buffer, UVM needs to
|
||
|
// call RM when servicing a non-replayable fault, first to figure out if there
|
||
|
// is a pending fault, and then to read entries from the shadow buffer.
|
||
|
//
|
||
|
// Once UVM has parsed a non-replayable fault entry corresponding to managed
|
||
|
// memory, and identified the VA block associated with it, the servicing logic
|
||
|
// for that block is identical to that of a replayable fault, see
|
||
|
// uvm_va_block_service_locked. Another similarity between the two types of
|
||
|
// faults is that they use the same entry format, uvm_fault_buffer_entry_t.
|
||
|
|
||
|
|
||
|
// There is no error handling in this function. The caller is in charge of
|
||
|
// calling uvm_gpu_fault_buffer_deinit_non_replayable_faults on failure.
|
||
|
NV_STATUS uvm_gpu_fault_buffer_init_non_replayable_faults(uvm_parent_gpu_t *parent_gpu)
|
||
|
{
|
||
|
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &parent_gpu->fault_buffer_info.non_replayable;
|
||
|
|
||
|
UVM_ASSERT(parent_gpu->non_replayable_faults_supported);
|
||
|
|
||
|
non_replayable_faults->shadow_buffer_copy = NULL;
|
||
|
non_replayable_faults->fault_cache = NULL;
|
||
|
|
||
|
non_replayable_faults->max_faults = parent_gpu->fault_buffer_info.rm_info.nonReplayable.bufferSize /
|
||
|
parent_gpu->fault_buffer_hal->entry_size(parent_gpu);
|
||
|
|
||
|
non_replayable_faults->shadow_buffer_copy =
|
||
|
uvm_kvmalloc_zero(parent_gpu->fault_buffer_info.rm_info.nonReplayable.bufferSize);
|
||
|
if (!non_replayable_faults->shadow_buffer_copy)
|
||
|
return NV_ERR_NO_MEMORY;
|
||
|
|
||
|
non_replayable_faults->fault_cache = uvm_kvmalloc_zero(non_replayable_faults->max_faults *
|
||
|
sizeof(*non_replayable_faults->fault_cache));
|
||
|
if (!non_replayable_faults->fault_cache)
|
||
|
return NV_ERR_NO_MEMORY;
|
||
|
|
||
|
uvm_tracker_init(&non_replayable_faults->clear_faulted_tracker);
|
||
|
uvm_tracker_init(&non_replayable_faults->fault_service_tracker);
|
||
|
|
||
|
return NV_OK;
|
||
|
}
|
||
|
|
||
|
void uvm_gpu_fault_buffer_deinit_non_replayable_faults(uvm_parent_gpu_t *parent_gpu)
|
||
|
{
|
||
|
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &parent_gpu->fault_buffer_info.non_replayable;
|
||
|
|
||
|
if (non_replayable_faults->fault_cache) {
|
||
|
UVM_ASSERT(uvm_tracker_is_empty(&non_replayable_faults->clear_faulted_tracker));
|
||
|
uvm_tracker_deinit(&non_replayable_faults->clear_faulted_tracker);
|
||
|
|
||
|
UVM_ASSERT(uvm_tracker_is_empty(&non_replayable_faults->fault_service_tracker));
|
||
|
uvm_tracker_deinit(&non_replayable_faults->fault_service_tracker);
|
||
|
}
|
||
|
|
||
|
uvm_kvfree(non_replayable_faults->shadow_buffer_copy);
|
||
|
uvm_kvfree(non_replayable_faults->fault_cache);
|
||
|
non_replayable_faults->shadow_buffer_copy = NULL;
|
||
|
non_replayable_faults->fault_cache = NULL;
|
||
|
}
|
||
|
|
||
|
bool uvm_gpu_non_replayable_faults_pending(uvm_parent_gpu_t *parent_gpu)
|
||
|
{
|
||
|
NV_STATUS status;
|
||
|
NvBool has_pending_faults;
|
||
|
|
||
|
UVM_ASSERT(parent_gpu->isr.non_replayable_faults.handling);
|
||
|
|
||
|
status = nvUvmInterfaceHasPendingNonReplayableFaults(&parent_gpu->fault_buffer_info.rm_info,
|
||
|
&has_pending_faults);
|
||
|
UVM_ASSERT(status == NV_OK);
|
||
|
|
||
|
return has_pending_faults == NV_TRUE;
|
||
|
}
|
||
|
|
||
|
static NvU32 fetch_non_replayable_fault_buffer_entries(uvm_gpu_t *gpu)
|
||
|
{
|
||
|
NV_STATUS status;
|
||
|
NvU32 i = 0;
|
||
|
NvU32 cached_faults = 0;
|
||
|
uvm_fault_buffer_entry_t *fault_cache;
|
||
|
NvU32 entry_size = gpu->parent->fault_buffer_hal->entry_size(gpu->parent);
|
||
|
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &gpu->parent->fault_buffer_info.non_replayable;
|
||
|
char *current_hw_entry = (char *)non_replayable_faults->shadow_buffer_copy;
|
||
|
|
||
|
fault_cache = non_replayable_faults->fault_cache;
|
||
|
|
||
|
UVM_ASSERT(uvm_sem_is_locked(&gpu->parent->isr.non_replayable_faults.service_lock));
|
||
|
UVM_ASSERT(gpu->parent->non_replayable_faults_supported);
|
||
|
|
||
|
status = nvUvmInterfaceGetNonReplayableFaults(&gpu->parent->fault_buffer_info.rm_info,
|
||
|
non_replayable_faults->shadow_buffer_copy,
|
||
|
&cached_faults);
|
||
|
UVM_ASSERT(status == NV_OK);
|
||
|
|
||
|
// Parse all faults
|
||
|
for (i = 0; i < cached_faults; ++i) {
|
||
|
uvm_fault_buffer_entry_t *fault_entry = &non_replayable_faults->fault_cache[i];
|
||
|
|
||
|
gpu->parent->fault_buffer_hal->parse_non_replayable_entry(gpu->parent, current_hw_entry, fault_entry);
|
||
|
|
||
|
// The GPU aligns the fault addresses to 4k, but all of our tracking is
|
||
|
// done in PAGE_SIZE chunks which might be larger.
|
||
|
fault_entry->fault_address = UVM_PAGE_ALIGN_DOWN(fault_entry->fault_address);
|
||
|
|
||
|
// Make sure that all fields in the entry are properly initialized
|
||
|
fault_entry->va_space = NULL;
|
||
|
fault_entry->is_fatal = (fault_entry->fault_type >= UVM_FAULT_TYPE_FATAL);
|
||
|
fault_entry->filtered = false;
|
||
|
|
||
|
fault_entry->num_instances = 1;
|
||
|
fault_entry->access_type_mask = uvm_fault_access_type_mask_bit(fault_entry->fault_access_type);
|
||
|
INIT_LIST_HEAD(&fault_entry->merged_instances_list);
|
||
|
fault_entry->non_replayable.buffer_index = i;
|
||
|
|
||
|
if (fault_entry->is_fatal) {
|
||
|
// Record the fatal fault event later as we need the va_space locked
|
||
|
fault_entry->fatal_reason = UvmEventFatalReasonInvalidFaultType;
|
||
|
}
|
||
|
else {
|
||
|
fault_entry->fatal_reason = UvmEventFatalReasonInvalid;
|
||
|
}
|
||
|
|
||
|
current_hw_entry += entry_size;
|
||
|
}
|
||
|
|
||
|
return cached_faults;
|
||
|
}
|
||
|
|
||
|
// In SRIOV, the UVM (guest) driver does not have access to the privileged
|
||
|
// registers used to clear the faulted bit. Instead, UVM requests host RM to do
|
||
|
// the clearing on its behalf, using a SW method.
|
||
|
static bool use_clear_faulted_channel_sw_method(uvm_gpu_t *gpu)
|
||
|
{
|
||
|
if (uvm_gpu_is_virt_mode_sriov(gpu)) {
|
||
|
UVM_ASSERT(gpu->parent->has_clear_faulted_channel_sw_method);
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
static NV_STATUS clear_faulted_method_on_gpu(uvm_gpu_t *gpu,
|
||
|
uvm_user_channel_t *user_channel,
|
||
|
const uvm_fault_buffer_entry_t *fault_entry,
|
||
|
NvU32 batch_id,
|
||
|
uvm_tracker_t *tracker)
|
||
|
{
|
||
|
NV_STATUS status;
|
||
|
uvm_push_t push;
|
||
|
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &gpu->parent->fault_buffer_info.non_replayable;
|
||
|
|
||
|
UVM_ASSERT(!fault_entry->is_fatal);
|
||
|
|
||
|
status = uvm_push_begin_acquire(gpu->channel_manager,
|
||
|
UVM_CHANNEL_TYPE_MEMOPS,
|
||
|
tracker,
|
||
|
&push,
|
||
|
"Clearing set bit for address 0x%llx",
|
||
|
fault_entry->fault_address);
|
||
|
if (status != NV_OK) {
|
||
|
UVM_ERR_PRINT("Error acquiring tracker before clearing faulted: %s, GPU %s\n",
|
||
|
nvstatusToString(status),
|
||
|
uvm_gpu_name(gpu));
|
||
|
return status;
|
||
|
}
|
||
|
|
||
|
if (use_clear_faulted_channel_sw_method(gpu))
|
||
|
gpu->parent->host_hal->clear_faulted_channel_sw_method(&push, user_channel, fault_entry);
|
||
|
else
|
||
|
gpu->parent->host_hal->clear_faulted_channel_method(&push, user_channel, fault_entry);
|
||
|
|
||
|
uvm_tools_broadcast_replay(gpu, &push, batch_id, fault_entry->fault_source.client_type);
|
||
|
|
||
|
uvm_push_end(&push);
|
||
|
|
||
|
// Add this push to the GPU's clear_faulted_tracker so GPU removal can wait
|
||
|
// on it.
|
||
|
status = uvm_tracker_add_push_safe(&non_replayable_faults->clear_faulted_tracker, &push);
|
||
|
|
||
|
// Add this push to the channel's clear_faulted_tracker so user channel
|
||
|
// removal can wait on it instead of using the per-GPU tracker, which would
|
||
|
// require a lock.
|
||
|
if (status == NV_OK)
|
||
|
status = uvm_tracker_add_push_safe(&user_channel->clear_faulted_tracker, &push);
|
||
|
|
||
|
return status;
|
||
|
}
|
||
|
|
||
|
static NV_STATUS clear_faulted_register_on_gpu(uvm_gpu_t *gpu,
|
||
|
uvm_user_channel_t *user_channel,
|
||
|
const uvm_fault_buffer_entry_t *fault_entry,
|
||
|
NvU32 batch_id,
|
||
|
uvm_tracker_t *tracker)
|
||
|
{
|
||
|
NV_STATUS status;
|
||
|
|
||
|
UVM_ASSERT(!gpu->parent->has_clear_faulted_channel_method);
|
||
|
|
||
|
// We need to wait for all pending work before writing to the channel
|
||
|
// register
|
||
|
status = uvm_tracker_wait(tracker);
|
||
|
if (status != NV_OK)
|
||
|
return status;
|
||
|
|
||
|
gpu->parent->host_hal->clear_faulted_channel_register(user_channel, fault_entry);
|
||
|
|
||
|
uvm_tools_broadcast_replay_sync(gpu, batch_id, fault_entry->fault_source.client_type);
|
||
|
|
||
|
return NV_OK;
|
||
|
}
|
||
|
|
||
|
static NV_STATUS clear_faulted_on_gpu(uvm_gpu_t *gpu,
|
||
|
uvm_user_channel_t *user_channel,
|
||
|
const uvm_fault_buffer_entry_t *fault_entry,
|
||
|
NvU32 batch_id,
|
||
|
uvm_tracker_t *tracker)
|
||
|
{
|
||
|
if (gpu->parent->has_clear_faulted_channel_method || use_clear_faulted_channel_sw_method(gpu))
|
||
|
return clear_faulted_method_on_gpu(gpu, user_channel, fault_entry, batch_id, tracker);
|
||
|
|
||
|
return clear_faulted_register_on_gpu(gpu, user_channel, fault_entry, batch_id, tracker);
|
||
|
}
|
||
|
|
||
|
static NV_STATUS service_managed_fault_in_block_locked(uvm_gpu_t *gpu,
|
||
|
uvm_va_block_t *va_block,
|
||
|
uvm_va_block_retry_t *va_block_retry,
|
||
|
uvm_fault_buffer_entry_t *fault_entry,
|
||
|
uvm_service_block_context_t *service_context)
|
||
|
{
|
||
|
NV_STATUS status = NV_OK;
|
||
|
uvm_page_index_t page_index;
|
||
|
uvm_perf_thrashing_hint_t thrashing_hint;
|
||
|
uvm_processor_id_t new_residency;
|
||
|
bool read_duplicate;
|
||
|
uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
|
||
|
uvm_va_range_t *va_range = va_block->va_range;
|
||
|
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &gpu->parent->fault_buffer_info.non_replayable;
|
||
|
|
||
|
UVM_ASSERT(!fault_entry->is_fatal);
|
||
|
|
||
|
uvm_assert_rwsem_locked(&va_space->lock);
|
||
|
|
||
|
UVM_ASSERT(fault_entry->va_space == va_space);
|
||
|
UVM_ASSERT(fault_entry->fault_address >= va_block->start);
|
||
|
UVM_ASSERT(fault_entry->fault_address <= va_block->end);
|
||
|
|
||
|
service_context->block_context.policy = uvm_va_policy_get(va_block, fault_entry->fault_address);
|
||
|
|
||
|
if (service_context->num_retries == 0) {
|
||
|
// notify event to tools/performance heuristics. For now we use a
|
||
|
// unique batch id per fault, since we clear the faulted channel for
|
||
|
// each fault.
|
||
|
uvm_perf_event_notify_gpu_fault(&va_space->perf_events,
|
||
|
va_block,
|
||
|
gpu->id,
|
||
|
service_context->block_context.policy->preferred_location,
|
||
|
fault_entry,
|
||
|
++non_replayable_faults->batch_id,
|
||
|
false);
|
||
|
}
|
||
|
|
||
|
// Check logical permissions
|
||
|
status = uvm_va_range_check_logical_permissions(va_range,
|
||
|
gpu->id,
|
||
|
fault_entry->fault_access_type,
|
||
|
uvm_range_group_address_migratable(va_space,
|
||
|
fault_entry->fault_address));
|
||
|
if (status != NV_OK) {
|
||
|
fault_entry->is_fatal = true;
|
||
|
fault_entry->fatal_reason = uvm_tools_status_to_fatal_fault_reason(status);
|
||
|
return NV_OK;
|
||
|
}
|
||
|
|
||
|
// TODO: Bug 1880194: Revisit thrashing detection
|
||
|
thrashing_hint.type = UVM_PERF_THRASHING_HINT_TYPE_NONE;
|
||
|
|
||
|
service_context->read_duplicate_count = 0;
|
||
|
service_context->thrashing_pin_count = 0;
|
||
|
|
||
|
page_index = uvm_va_block_cpu_page_index(va_block, fault_entry->fault_address);
|
||
|
|
||
|
// Compute new residency and update the masks
|
||
|
new_residency = uvm_va_block_select_residency(va_block,
|
||
|
page_index,
|
||
|
gpu->id,
|
||
|
fault_entry->access_type_mask,
|
||
|
service_context->block_context.policy,
|
||
|
&thrashing_hint,
|
||
|
UVM_SERVICE_OPERATION_NON_REPLAYABLE_FAULTS,
|
||
|
&read_duplicate);
|
||
|
|
||
|
// Initialize the minimum necessary state in the fault service context
|
||
|
uvm_processor_mask_zero(&service_context->resident_processors);
|
||
|
|
||
|
// Set new residency and update the masks
|
||
|
uvm_processor_mask_set(&service_context->resident_processors, new_residency);
|
||
|
|
||
|
// The masks need to be fully zeroed as the fault region may grow due to prefetching
|
||
|
uvm_page_mask_zero(&service_context->per_processor_masks[uvm_id_value(new_residency)].new_residency);
|
||
|
uvm_page_mask_set(&service_context->per_processor_masks[uvm_id_value(new_residency)].new_residency, page_index);
|
||
|
|
||
|
if (read_duplicate) {
|
||
|
uvm_page_mask_zero(&service_context->read_duplicate_mask);
|
||
|
uvm_page_mask_set(&service_context->read_duplicate_mask, page_index);
|
||
|
service_context->read_duplicate_count = 1;
|
||
|
}
|
||
|
|
||
|
service_context->access_type[page_index] = fault_entry->fault_access_type;
|
||
|
|
||
|
service_context->region = uvm_va_block_region_for_page(page_index);
|
||
|
|
||
|
status = uvm_va_block_service_locked(gpu->id, va_block, va_block_retry, service_context);
|
||
|
|
||
|
++service_context->num_retries;
|
||
|
|
||
|
return status;
|
||
|
}
|
||
|
|
||
|
static NV_STATUS service_managed_fault_in_block(uvm_gpu_t *gpu,
|
||
|
struct mm_struct *mm,
|
||
|
uvm_va_block_t *va_block,
|
||
|
uvm_fault_buffer_entry_t *fault_entry)
|
||
|
{
|
||
|
NV_STATUS status, tracker_status;
|
||
|
uvm_va_block_retry_t va_block_retry;
|
||
|
uvm_service_block_context_t *service_context = &gpu->parent->fault_buffer_info.non_replayable.block_service_context;
|
||
|
|
||
|
service_context->operation = UVM_SERVICE_OPERATION_NON_REPLAYABLE_FAULTS;
|
||
|
service_context->num_retries = 0;
|
||
|
service_context->block_context.mm = mm;
|
||
|
|
||
|
uvm_mutex_lock(&va_block->lock);
|
||
|
|
||
|
status = UVM_VA_BLOCK_RETRY_LOCKED(va_block, &va_block_retry,
|
||
|
service_managed_fault_in_block_locked(gpu,
|
||
|
va_block,
|
||
|
&va_block_retry,
|
||
|
fault_entry,
|
||
|
service_context));
|
||
|
|
||
|
tracker_status = uvm_tracker_add_tracker_safe(&gpu->parent->fault_buffer_info.non_replayable.fault_service_tracker,
|
||
|
&va_block->tracker);
|
||
|
|
||
|
uvm_mutex_unlock(&va_block->lock);
|
||
|
|
||
|
return status == NV_OK? tracker_status: status;
|
||
|
}
|
||
|
|
||
|
// See uvm_unregister_channel for comments on the the channel destruction
|
||
|
// sequence.
|
||
|
static void kill_channel_delayed(void *_user_channel)
|
||
|
{
|
||
|
uvm_user_channel_t *user_channel = (uvm_user_channel_t *)_user_channel;
|
||
|
uvm_va_space_t *va_space = user_channel->kill_channel.va_space;
|
||
|
|
||
|
UVM_ASSERT(uvm_va_space_initialized(va_space) == NV_OK);
|
||
|
|
||
|
uvm_va_space_down_read_rm(va_space);
|
||
|
if (user_channel->gpu_va_space) {
|
||
|
// RM handles the fault, which will do the correct fault reporting in the
|
||
|
// kernel logs and will initiate channel teardown
|
||
|
NV_STATUS status = nvUvmInterfaceReportNonReplayableFault(uvm_gpu_device_handle(user_channel->gpu),
|
||
|
user_channel->kill_channel.fault_packet);
|
||
|
UVM_ASSERT(status == NV_OK);
|
||
|
}
|
||
|
uvm_va_space_up_read_rm(va_space);
|
||
|
|
||
|
uvm_user_channel_release(user_channel);
|
||
|
}
|
||
|
|
||
|
static void kill_channel_delayed_entry(void *user_channel)
|
||
|
{
|
||
|
UVM_ENTRY_VOID(kill_channel_delayed(user_channel));
|
||
|
}
|
||
|
|
||
|
static void schedule_kill_channel(uvm_gpu_t *gpu,
|
||
|
uvm_fault_buffer_entry_t *fault_entry,
|
||
|
uvm_user_channel_t *user_channel)
|
||
|
{
|
||
|
uvm_va_space_t *va_space = fault_entry->va_space;
|
||
|
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &gpu->parent->fault_buffer_info.non_replayable;
|
||
|
void *packet = (char *)non_replayable_faults->shadow_buffer_copy +
|
||
|
(fault_entry->non_replayable.buffer_index * gpu->parent->fault_buffer_hal->entry_size(gpu->parent));
|
||
|
|
||
|
UVM_ASSERT(gpu);
|
||
|
UVM_ASSERT(va_space);
|
||
|
UVM_ASSERT(user_channel);
|
||
|
|
||
|
if (user_channel->kill_channel.scheduled)
|
||
|
return;
|
||
|
|
||
|
user_channel->kill_channel.scheduled = true;
|
||
|
user_channel->kill_channel.va_space = va_space;
|
||
|
|
||
|
// Save the packet to be handled by RM in the channel structure
|
||
|
memcpy(user_channel->kill_channel.fault_packet, packet, gpu->parent->fault_buffer_hal->entry_size(gpu->parent));
|
||
|
|
||
|
// Retain the channel here so it is not prematurely destroyed. It will be
|
||
|
// released after forwarding the fault to RM in kill_channel_delayed.
|
||
|
uvm_user_channel_retain(user_channel);
|
||
|
|
||
|
// Schedule a work item to kill the channel
|
||
|
nv_kthread_q_item_init(&user_channel->kill_channel.kill_channel_q_item,
|
||
|
kill_channel_delayed_entry,
|
||
|
user_channel);
|
||
|
|
||
|
nv_kthread_q_schedule_q_item(&gpu->parent->isr.kill_channel_q,
|
||
|
&user_channel->kill_channel.kill_channel_q_item);
|
||
|
}
|
||
|
|
||
|
static NV_STATUS service_non_managed_fault(uvm_gpu_va_space_t *gpu_va_space,
|
||
|
struct mm_struct *mm,
|
||
|
uvm_fault_buffer_entry_t *fault_entry,
|
||
|
NV_STATUS lookup_status)
|
||
|
{
|
||
|
uvm_gpu_t *gpu = gpu_va_space->gpu;
|
||
|
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &gpu->parent->fault_buffer_info.non_replayable;
|
||
|
uvm_ats_fault_invalidate_t *ats_invalidate = &non_replayable_faults->ats_invalidate;
|
||
|
NV_STATUS status = lookup_status;
|
||
|
|
||
|
UVM_ASSERT(!fault_entry->is_fatal);
|
||
|
|
||
|
// Avoid dropping fault events when the VA block is not found or cannot be created
|
||
|
uvm_perf_event_notify_gpu_fault(&fault_entry->va_space->perf_events,
|
||
|
NULL,
|
||
|
gpu->id,
|
||
|
UVM_ID_INVALID,
|
||
|
fault_entry,
|
||
|
++non_replayable_faults->batch_id,
|
||
|
false);
|
||
|
|
||
|
if (status != NV_ERR_INVALID_ADDRESS)
|
||
|
return status;
|
||
|
|
||
|
if (uvm_ats_can_service_faults(gpu_va_space, mm)) {
|
||
|
ats_invalidate->write_faults_in_batch = false;
|
||
|
|
||
|
// The VA isn't managed. See if ATS knows about it.
|
||
|
status = uvm_ats_service_fault_entry(gpu_va_space, fault_entry, ats_invalidate);
|
||
|
|
||
|
// Invalidate ATS TLB entries if needed
|
||
|
if (status == NV_OK) {
|
||
|
status = uvm_ats_invalidate_tlbs(gpu_va_space,
|
||
|
ats_invalidate,
|
||
|
&non_replayable_faults->fault_service_tracker);
|
||
|
}
|
||
|
}
|
||
|
else {
|
||
|
UVM_ASSERT(fault_entry->fault_access_type != UVM_FAULT_ACCESS_TYPE_PREFETCH);
|
||
|
fault_entry->is_fatal = true;
|
||
|
fault_entry->fatal_reason = uvm_tools_status_to_fatal_fault_reason(status);
|
||
|
|
||
|
// Do not return error due to logical errors in the application
|
||
|
status = NV_OK;
|
||
|
}
|
||
|
|
||
|
return status;
|
||
|
}
|
||
|
|
||
|
static NV_STATUS service_fault(uvm_gpu_t *gpu, uvm_fault_buffer_entry_t *fault_entry)
|
||
|
{
|
||
|
NV_STATUS status;
|
||
|
uvm_user_channel_t *user_channel;
|
||
|
uvm_va_block_t *va_block;
|
||
|
uvm_va_space_t *va_space = NULL;
|
||
|
struct mm_struct *mm;
|
||
|
uvm_gpu_va_space_t *gpu_va_space;
|
||
|
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &gpu->parent->fault_buffer_info.non_replayable;
|
||
|
uvm_va_block_context_t *va_block_context =
|
||
|
&gpu->parent->fault_buffer_info.non_replayable.block_service_context.block_context;
|
||
|
|
||
|
status = uvm_gpu_fault_entry_to_va_space(gpu, fault_entry, &va_space);
|
||
|
if (status != NV_OK) {
|
||
|
// The VA space lookup will fail if we're running concurrently with
|
||
|
// removal of the channel from the VA space (channel unregister, GPU VA
|
||
|
// space unregister, VA space destroy, etc). The other thread will stop
|
||
|
// the channel and remove the channel from the table, so the faulting
|
||
|
// condition will be gone. In the case of replayable faults we need to
|
||
|
// flush the buffer, but here we can just ignore the entry and proceed
|
||
|
// on.
|
||
|
//
|
||
|
// Note that we can't have any subcontext issues here, since non-
|
||
|
// replayable faults only use the address space of their channel.
|
||
|
UVM_ASSERT(status == NV_ERR_INVALID_CHANNEL);
|
||
|
UVM_ASSERT(!va_space);
|
||
|
return NV_OK;
|
||
|
}
|
||
|
|
||
|
UVM_ASSERT(va_space);
|
||
|
|
||
|
// If an mm is registered with the VA space, we have to retain it
|
||
|
// in order to lock it before locking the VA space. It is guaranteed
|
||
|
// to remain valid until we release. If no mm is registered, we
|
||
|
// can only service managed faults, not ATS/HMM faults.
|
||
|
mm = uvm_va_space_mm_retain_lock(va_space);
|
||
|
|
||
|
uvm_va_space_down_read(va_space);
|
||
|
|
||
|
gpu_va_space = uvm_gpu_va_space_get_by_parent_gpu(va_space, gpu->parent);
|
||
|
|
||
|
if (!gpu_va_space) {
|
||
|
// The va_space might have gone away. See the comment above.
|
||
|
status = NV_OK;
|
||
|
goto exit_no_channel;
|
||
|
}
|
||
|
|
||
|
fault_entry->va_space = va_space;
|
||
|
|
||
|
user_channel = uvm_gpu_va_space_get_user_channel(gpu_va_space, fault_entry->instance_ptr);
|
||
|
if (!user_channel) {
|
||
|
// The channel might have gone away. See the comment above.
|
||
|
status = NV_OK;
|
||
|
goto exit_no_channel;
|
||
|
}
|
||
|
|
||
|
fault_entry->fault_source.channel_id = user_channel->hw_channel_id;
|
||
|
|
||
|
if (!fault_entry->is_fatal) {
|
||
|
status = uvm_va_block_find_create(fault_entry->va_space,
|
||
|
mm,
|
||
|
fault_entry->fault_address,
|
||
|
va_block_context,
|
||
|
&va_block);
|
||
|
if (status == NV_OK)
|
||
|
status = service_managed_fault_in_block(gpu_va_space->gpu, mm, va_block, fault_entry);
|
||
|
else
|
||
|
status = service_non_managed_fault(gpu_va_space, mm, fault_entry, status);
|
||
|
|
||
|
// We are done, we clear the faulted bit on the channel, so it can be
|
||
|
// re-scheduled again
|
||
|
if (status == NV_OK && !fault_entry->is_fatal) {
|
||
|
status = clear_faulted_on_gpu(gpu,
|
||
|
user_channel,
|
||
|
fault_entry,
|
||
|
non_replayable_faults->batch_id,
|
||
|
&non_replayable_faults->fault_service_tracker);
|
||
|
uvm_tracker_clear(&non_replayable_faults->fault_service_tracker);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (fault_entry->is_fatal)
|
||
|
uvm_tools_record_gpu_fatal_fault(gpu->parent->id, fault_entry->va_space, fault_entry, fault_entry->fatal_reason);
|
||
|
|
||
|
if (status != NV_OK || fault_entry->is_fatal)
|
||
|
schedule_kill_channel(gpu, fault_entry, user_channel);
|
||
|
|
||
|
exit_no_channel:
|
||
|
uvm_va_space_up_read(va_space);
|
||
|
uvm_va_space_mm_release_unlock(va_space, mm);
|
||
|
|
||
|
return status;
|
||
|
}
|
||
|
|
||
|
void uvm_gpu_service_non_replayable_fault_buffer(uvm_gpu_t *gpu)
|
||
|
{
|
||
|
NV_STATUS status = NV_OK;
|
||
|
NvU32 cached_faults;
|
||
|
|
||
|
// If this handler is modified to handle fewer than all of the outstanding
|
||
|
// faults, then special handling will need to be added to uvm_suspend()
|
||
|
// to guarantee that fault processing has completed before control is
|
||
|
// returned to the RM.
|
||
|
while ((cached_faults = fetch_non_replayable_fault_buffer_entries(gpu)) > 0) {
|
||
|
NvU32 i;
|
||
|
|
||
|
// Differently to replayable faults, we do not batch up and preprocess
|
||
|
// non-replayable faults since getting multiple faults on the same
|
||
|
// memory region is not very likely
|
||
|
for (i = 0; i < cached_faults; ++i) {
|
||
|
status = service_fault(gpu, &gpu->parent->fault_buffer_info.non_replayable.fault_cache[i]);
|
||
|
if (status != NV_OK)
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (status != NV_OK)
|
||
|
UVM_DBG_PRINT("Error servicing non-replayable faults on GPU: %s\n", uvm_gpu_name(gpu));
|
||
|
}
|