mirror of
https://github.com/NVIDIA/open-gpu-kernel-modules.git
synced 2024-11-30 03:24:14 +01:00
2133 lines
88 KiB
C
2133 lines
88 KiB
C
|
/*******************************************************************************
|
||
|
Copyright (c) 2016-2022 NVIDIA Corporation
|
||
|
|
||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||
|
of this software and associated documentation files (the "Software"), to
|
||
|
deal in the Software without restriction, including without limitation the
|
||
|
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||
|
sell copies of the Software, and to permit persons to whom the Software is
|
||
|
furnished to do so, subject to the following conditions:
|
||
|
|
||
|
The above copyright notice and this permission notice shall be
|
||
|
included in all copies or substantial portions of the Software.
|
||
|
|
||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||
|
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||
|
LIABILITY, WHETHER IN AN hint OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||
|
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||
|
DEALINGS IN THE SOFTWARE.
|
||
|
|
||
|
*******************************************************************************/
|
||
|
|
||
|
#include "uvm_api.h"
|
||
|
#include "uvm_perf_events.h"
|
||
|
#include "uvm_perf_module.h"
|
||
|
#include "uvm_perf_thrashing.h"
|
||
|
#include "uvm_perf_utils.h"
|
||
|
#include "uvm_va_block.h"
|
||
|
#include "uvm_va_range.h"
|
||
|
#include "uvm_kvmalloc.h"
|
||
|
#include "uvm_tools.h"
|
||
|
#include "uvm_procfs.h"
|
||
|
#include "uvm_test.h"
|
||
|
|
||
|
// Number of bits for page-granularity time stamps. Currently we ignore the first 6 bits
|
||
|
// of the timestamp (i.e. we have 64ns resolution, which is good enough)
|
||
|
#define PAGE_THRASHING_LAST_TIME_STAMP_BITS 58
|
||
|
#define PAGE_THRASHING_NUM_EVENTS_BITS 3
|
||
|
|
||
|
#define PAGE_THRASHING_THROTTLING_END_TIME_STAMP_BITS 58
|
||
|
#define PAGE_THRASHING_THROTTLING_COUNT_BITS 8
|
||
|
|
||
|
// Per-page thrashing detection structure.
|
||
|
typedef struct
|
||
|
{
|
||
|
struct
|
||
|
{
|
||
|
// Last time stamp when a thrashing-related event was recorded
|
||
|
NvU64 last_time_stamp : PAGE_THRASHING_LAST_TIME_STAMP_BITS;
|
||
|
|
||
|
bool has_migration_events : 1;
|
||
|
|
||
|
bool has_revocation_events : 1;
|
||
|
|
||
|
// Number of consecutive "thrashing" events (within the configured
|
||
|
// thrashing lapse)
|
||
|
NvU8 num_thrashing_events : PAGE_THRASHING_NUM_EVENTS_BITS;
|
||
|
|
||
|
bool pinned : 1;
|
||
|
};
|
||
|
|
||
|
struct
|
||
|
{
|
||
|
// Deadline for throttled processors to wake up
|
||
|
NvU64 throttling_end_time_stamp : PAGE_THRASHING_THROTTLING_END_TIME_STAMP_BITS;
|
||
|
|
||
|
// Number of times a processor has been throttled. This is used to
|
||
|
// determine when the page needs to get pinned. After getting pinned
|
||
|
// this field is always 0.
|
||
|
NvU8 throttling_count : PAGE_THRASHING_THROTTLING_COUNT_BITS;
|
||
|
};
|
||
|
|
||
|
// Processors accessing this page
|
||
|
uvm_processor_mask_t processors;
|
||
|
|
||
|
// Processors that have been throttled. This must be a subset of processors
|
||
|
uvm_processor_mask_t throttled_processors;
|
||
|
|
||
|
// Memory residency for the page when in pinning phase
|
||
|
uvm_processor_id_t pinned_residency_id;
|
||
|
|
||
|
// Processor not to be throttled in the current throttling period
|
||
|
uvm_processor_id_t do_not_throttle_processor_id;
|
||
|
} page_thrashing_info_t;
|
||
|
|
||
|
// Per-VA block thrashing detection structure. This state is protected by the
|
||
|
// VA block lock.
|
||
|
typedef struct
|
||
|
{
|
||
|
page_thrashing_info_t *pages;
|
||
|
|
||
|
NvU16 num_thrashing_pages;
|
||
|
|
||
|
NvU8 thrashing_reset_count;
|
||
|
|
||
|
uvm_processor_id_t last_processor;
|
||
|
|
||
|
NvU64 last_time_stamp;
|
||
|
|
||
|
NvU64 last_thrashing_time_stamp;
|
||
|
|
||
|
// Stats
|
||
|
NvU32 throttling_count;
|
||
|
|
||
|
uvm_page_mask_t thrashing_pages;
|
||
|
|
||
|
struct
|
||
|
{
|
||
|
NvU32 count;
|
||
|
|
||
|
uvm_page_mask_t mask;
|
||
|
|
||
|
// List of pinned pages. This list is only used if the pinning timeout
|
||
|
// is not 0.
|
||
|
struct list_head list;
|
||
|
} pinned_pages;
|
||
|
} block_thrashing_info_t;
|
||
|
|
||
|
// Descriptor for a page that has been pinned due to thrashing. This structure
|
||
|
// is only used if the pinning timeout is not 0.
|
||
|
typedef struct
|
||
|
{
|
||
|
uvm_va_block_t *va_block;
|
||
|
|
||
|
// Page index within va_block
|
||
|
uvm_page_index_t page_index;
|
||
|
|
||
|
// Absolute timestamp after which the page will be unpinned
|
||
|
NvU64 deadline;
|
||
|
|
||
|
// Entry in the per-VA Space list of pinned pages. See
|
||
|
// va_space_thrashing_info_t::pinned_pages::list.
|
||
|
struct list_head va_space_list_entry;
|
||
|
|
||
|
// Entry in the per-VA Block list of pinned pages. See
|
||
|
// block_thrashing_info_t::pinned_pages::list.
|
||
|
struct list_head va_block_list_entry;
|
||
|
} pinned_page_t;
|
||
|
|
||
|
// Per-VA space data structures and policy configuration
|
||
|
typedef struct
|
||
|
{
|
||
|
// Per-VA space accounting of pinned pages that is used to speculatively
|
||
|
// unpin pages after the configured timeout. This struct is only used if
|
||
|
// the pinning timeout is not 0.
|
||
|
struct
|
||
|
{
|
||
|
// Work descriptor that is executed asynchronously by a helper thread
|
||
|
struct delayed_work dwork;
|
||
|
|
||
|
// List of pinned pages. They are (mostly) ordered by unpin deadline.
|
||
|
// New entries are inserted blindly at the tail since the expectation
|
||
|
// is that they will have the largest deadline value. However, given
|
||
|
// the drift between when multiple threads query their timestamps and
|
||
|
// add those pages to the list under the lock, it might not be
|
||
|
// strictly ordered. But this is OK since the difference will be very
|
||
|
// small and they will be eventually removed from the list.
|
||
|
//
|
||
|
// Entries are removed when they reach the deadline by the function
|
||
|
// configured in dwork. This list is protected by lock.
|
||
|
struct list_head list;
|
||
|
|
||
|
uvm_spinlock_t lock;
|
||
|
|
||
|
uvm_va_block_context_t va_block_context;
|
||
|
|
||
|
// Flag used to avoid scheduling delayed unpinning operations after
|
||
|
// uvm_perf_thrashing_stop has been called.
|
||
|
bool in_va_space_teardown;
|
||
|
} pinned_pages;
|
||
|
|
||
|
struct
|
||
|
{
|
||
|
// Whether thrashing mitigation is enabled on this VA space
|
||
|
bool enable;
|
||
|
|
||
|
// true if the thrashing mitigation parameters have been modified using
|
||
|
// test ioctls
|
||
|
bool test_overrides;
|
||
|
|
||
|
//
|
||
|
// Fields below are the thrashing mitigation parameters on the VA space
|
||
|
//
|
||
|
unsigned threshold;
|
||
|
|
||
|
unsigned pin_threshold;
|
||
|
|
||
|
NvU64 lapse_ns;
|
||
|
|
||
|
NvU64 nap_ns;
|
||
|
|
||
|
NvU64 epoch_ns;
|
||
|
|
||
|
unsigned max_resets;
|
||
|
|
||
|
NvU64 pin_ns;
|
||
|
} params;
|
||
|
|
||
|
uvm_va_space_t *va_space;
|
||
|
} va_space_thrashing_info_t;
|
||
|
|
||
|
typedef struct
|
||
|
{
|
||
|
// Entry for the per-processor thrashing_stats file in procfs
|
||
|
struct proc_dir_entry *procfs_file;
|
||
|
|
||
|
// Number of times thrashing is detected
|
||
|
atomic64_t num_thrashing;
|
||
|
|
||
|
// Number of times the processor was throttled while thrashing
|
||
|
atomic64_t num_throttle;
|
||
|
|
||
|
// Number of times a page was pinned on this processor while thrashing
|
||
|
atomic64_t num_pin_local;
|
||
|
|
||
|
// Number of times a page was pinned on a different processor while thrashing
|
||
|
atomic64_t num_pin_remote;
|
||
|
} processor_thrashing_stats_t;
|
||
|
|
||
|
// Pre-allocated thrashing stats structure for the CPU. This is only valid if
|
||
|
// uvm_procfs_is_debug_enabled() returns true.
|
||
|
static processor_thrashing_stats_t g_cpu_thrashing_stats;
|
||
|
|
||
|
#define PROCESSOR_THRASHING_STATS_INC(va_space, proc, field) \
|
||
|
do { \
|
||
|
processor_thrashing_stats_t *_processor_stats = thrashing_stats_get_or_null(va_space, proc); \
|
||
|
if (_processor_stats) \
|
||
|
atomic64_inc(&_processor_stats->field); \
|
||
|
} while (0)
|
||
|
|
||
|
// Global caches for the per-VA block thrashing detection structures
|
||
|
static struct kmem_cache *g_va_block_thrashing_info_cache __read_mostly;
|
||
|
static struct kmem_cache *g_pinned_page_cache __read_mostly;
|
||
|
|
||
|
//
|
||
|
// Tunables for thrashing detection/prevention (configurable via module parameters)
|
||
|
//
|
||
|
|
||
|
#define UVM_PERF_THRASHING_ENABLE_DEFAULT 1
|
||
|
|
||
|
// Enable/disable thrashing performance heuristics
|
||
|
static unsigned uvm_perf_thrashing_enable = UVM_PERF_THRASHING_ENABLE_DEFAULT;
|
||
|
|
||
|
#define UVM_PERF_THRASHING_THRESHOLD_DEFAULT 3
|
||
|
#define UVM_PERF_THRASHING_THRESHOLD_MAX ((1 << PAGE_THRASHING_NUM_EVENTS_BITS) - 1)
|
||
|
|
||
|
// Number of consecutive thrashing events to initiate thrashing prevention
|
||
|
//
|
||
|
// Maximum value is UVM_PERF_THRASHING_THRESHOLD_MAX
|
||
|
static unsigned uvm_perf_thrashing_threshold = UVM_PERF_THRASHING_THRESHOLD_DEFAULT;
|
||
|
|
||
|
#define UVM_PERF_THRASHING_PIN_THRESHOLD_DEFAULT 10
|
||
|
#define UVM_PERF_THRASHING_PIN_THRESHOLD_MAX ((1 << PAGE_THRASHING_THROTTLING_COUNT_BITS) - 1)
|
||
|
|
||
|
// Number of consecutive throttling operations before trying to map remotely
|
||
|
//
|
||
|
// Maximum value is UVM_PERF_THRASHING_PIN_THRESHOLD_MAX
|
||
|
static unsigned uvm_perf_thrashing_pin_threshold = UVM_PERF_THRASHING_PIN_THRESHOLD_DEFAULT;
|
||
|
|
||
|
// TODO: Bug 1768615: [uvm] Automatically tune default values for thrashing
|
||
|
// detection/prevention parameters
|
||
|
#define UVM_PERF_THRASHING_LAPSE_USEC_DEFAULT 500
|
||
|
#define UVM_PERF_THRASHING_LAPSE_USEC_DEFAULT_EMULATION (UVM_PERF_THRASHING_LAPSE_USEC_DEFAULT * 800)
|
||
|
|
||
|
// Lapse of time in microseconds that determines if two consecutive events on
|
||
|
// the same page can be considered thrashing
|
||
|
static unsigned uvm_perf_thrashing_lapse_usec = UVM_PERF_THRASHING_LAPSE_USEC_DEFAULT;
|
||
|
|
||
|
#define UVM_PERF_THRASHING_NAP_DEFAULT 1
|
||
|
#define UVM_PERF_THRASHING_NAP_MAX 100
|
||
|
|
||
|
// Time that the processor being throttled is forbidden to work on the thrashing
|
||
|
// page. This value is a multiplier of uvm_perf_thrashing_lapse_usec.
|
||
|
static unsigned uvm_perf_thrashing_nap = UVM_PERF_THRASHING_NAP_DEFAULT;
|
||
|
|
||
|
#define UVM_PERF_THRASHING_EPOCH_DEFAULT 2000
|
||
|
|
||
|
// Time lapse after which we consider thrashing is no longer happening. This
|
||
|
// value is a multiplier of uvm_perf_thrashing_lapse_usec.
|
||
|
static unsigned uvm_perf_thrashing_epoch = UVM_PERF_THRASHING_EPOCH_DEFAULT;
|
||
|
|
||
|
// When pages are pinned and the rest of thrashing processors are mapped
|
||
|
// remotely we lose track of who is accessing the page for the rest of
|
||
|
// program execution. This can lead to tremendous performance loss if the page
|
||
|
// is not thrashing anymore and it is always being accessed remotely.
|
||
|
// In order to avoid that scenario, we use a timer that unpins memory after
|
||
|
// some time. We use a per-VA space list of pinned pages, sorted by the
|
||
|
// deadline at which it will be unmapped from remote processors. Therefore,
|
||
|
// next remote access will trigger a fault that will migrate the page.
|
||
|
#define UVM_PERF_THRASHING_PIN_DEFAULT 300
|
||
|
#define UVM_PERF_THRASHING_PIN_DEFAULT_EMULATION 10
|
||
|
|
||
|
// Time for which a page remains pinned. This value is a multiplier of
|
||
|
// uvm_perf_thrashing_lapse_usec. 0 means that it is pinned forever.
|
||
|
static unsigned uvm_perf_thrashing_pin = UVM_PERF_THRASHING_PIN_DEFAULT;
|
||
|
|
||
|
// Number of times a VA block can be reset back to non-thrashing. This
|
||
|
// mechanism tries to avoid performing optimizations on a block that periodically
|
||
|
// causes thrashing
|
||
|
#define UVM_PERF_THRASHING_MAX_RESETS_DEFAULT 4
|
||
|
|
||
|
static unsigned uvm_perf_thrashing_max_resets = UVM_PERF_THRASHING_MAX_RESETS_DEFAULT;
|
||
|
|
||
|
// Module parameters for the tunables
|
||
|
module_param(uvm_perf_thrashing_enable, uint, S_IRUGO);
|
||
|
module_param(uvm_perf_thrashing_threshold, uint, S_IRUGO);
|
||
|
module_param(uvm_perf_thrashing_pin_threshold, uint, S_IRUGO);
|
||
|
module_param(uvm_perf_thrashing_lapse_usec, uint, S_IRUGO);
|
||
|
module_param(uvm_perf_thrashing_nap, uint, S_IRUGO);
|
||
|
module_param(uvm_perf_thrashing_epoch, uint, S_IRUGO);
|
||
|
module_param(uvm_perf_thrashing_pin, uint, S_IRUGO);
|
||
|
module_param(uvm_perf_thrashing_max_resets, uint, S_IRUGO);
|
||
|
|
||
|
// See map_remote_on_atomic_fault uvm_va_block.c
|
||
|
unsigned uvm_perf_map_remote_on_native_atomics_fault = 0;
|
||
|
module_param(uvm_perf_map_remote_on_native_atomics_fault, uint, S_IRUGO);
|
||
|
|
||
|
// Global post-processed values of the module parameters. They can be overriden
|
||
|
// per VA-space.
|
||
|
static bool g_uvm_perf_thrashing_enable;
|
||
|
static unsigned g_uvm_perf_thrashing_threshold;
|
||
|
static unsigned g_uvm_perf_thrashing_pin_threshold;
|
||
|
static NvU64 g_uvm_perf_thrashing_lapse_usec;
|
||
|
static NvU64 g_uvm_perf_thrashing_nap;
|
||
|
static NvU64 g_uvm_perf_thrashing_epoch;
|
||
|
static NvU64 g_uvm_perf_thrashing_pin;
|
||
|
static unsigned g_uvm_perf_thrashing_max_resets;
|
||
|
|
||
|
// Helper macros to initialize thrashing parameters from module parameters
|
||
|
//
|
||
|
// This helper returns whether the type for the parameter is signed
|
||
|
#define THRASHING_PARAMETER_IS_SIGNED(v) (((typeof(v)) -1) < 0)
|
||
|
|
||
|
// Macro that initializes the given thrashing parameter and checks its validity
|
||
|
// (within [_mi:_ma]). Otherwise it is initialized with the given default
|
||
|
// parameter _d. The user value is read from _v, and the final value is stored
|
||
|
// in a variable named g_##_v, so it must be declared, too. Only unsigned
|
||
|
// parameters are supported.
|
||
|
#define INIT_THRASHING_PARAMETER_MIN_MAX(_v, _d, _mi, _ma) \
|
||
|
do { \
|
||
|
unsigned v = (_v); \
|
||
|
unsigned d = (_d); \
|
||
|
unsigned mi = (_mi); \
|
||
|
unsigned ma = (_ma); \
|
||
|
\
|
||
|
BUILD_BUG_ON(sizeof(_v) > sizeof(unsigned)); \
|
||
|
BUILD_BUG_ON(THRASHING_PARAMETER_IS_SIGNED(_v)); \
|
||
|
\
|
||
|
UVM_ASSERT(mi <= ma); \
|
||
|
UVM_ASSERT(d >= mi); \
|
||
|
UVM_ASSERT(d <= ma); \
|
||
|
\
|
||
|
if (v >= mi && v <= ma) { \
|
||
|
g_##_v = v; \
|
||
|
} \
|
||
|
else { \
|
||
|
pr_info("Invalid value %u for " #_v ". Using %u instead\n", v, d); \
|
||
|
\
|
||
|
g_##_v = d; \
|
||
|
} \
|
||
|
} while (0)
|
||
|
|
||
|
#define INIT_THRASHING_PARAMETER(v, d) INIT_THRASHING_PARAMETER_MIN_MAX(v, d, 0u, UINT_MAX)
|
||
|
|
||
|
#define INIT_THRASHING_PARAMETER_MIN(v, d, mi) INIT_THRASHING_PARAMETER_MIN_MAX(v, d, mi, UINT_MAX)
|
||
|
#define INIT_THRASHING_PARAMETER_MAX(v, d, ma) INIT_THRASHING_PARAMETER_MIN_MAX(v, d, 0u, ma)
|
||
|
|
||
|
#define INIT_THRASHING_PARAMETER_NONZERO(v, d) INIT_THRASHING_PARAMETER_MIN_MAX(v, d, 1u, UINT_MAX)
|
||
|
#define INIT_THRASHING_PARAMETER_NONZERO_MAX(v, d, ma) INIT_THRASHING_PARAMETER_MIN_MAX(v, d, 1u, ma)
|
||
|
|
||
|
#define INIT_THRASHING_PARAMETER_TOGGLE(v, d) INIT_THRASHING_PARAMETER_MIN_MAX(v, d, 0u, 1u)
|
||
|
|
||
|
// Helpers to get/set the time stamp
|
||
|
static NvU64 page_thrashing_get_time_stamp(page_thrashing_info_t *entry)
|
||
|
{
|
||
|
return entry->last_time_stamp << (64 - PAGE_THRASHING_LAST_TIME_STAMP_BITS);
|
||
|
}
|
||
|
|
||
|
static void page_thrashing_set_time_stamp(page_thrashing_info_t *entry, NvU64 time_stamp)
|
||
|
{
|
||
|
entry->last_time_stamp = time_stamp >> (64 - PAGE_THRASHING_LAST_TIME_STAMP_BITS);
|
||
|
}
|
||
|
|
||
|
static NvU64 page_thrashing_get_throttling_end_time_stamp(page_thrashing_info_t *entry)
|
||
|
{
|
||
|
return entry->throttling_end_time_stamp << (64 - PAGE_THRASHING_THROTTLING_END_TIME_STAMP_BITS);
|
||
|
}
|
||
|
|
||
|
static void page_thrashing_set_throttling_end_time_stamp(page_thrashing_info_t *entry, NvU64 time_stamp)
|
||
|
{
|
||
|
entry->throttling_end_time_stamp = time_stamp >> (64 - PAGE_THRASHING_THROTTLING_END_TIME_STAMP_BITS);
|
||
|
}
|
||
|
|
||
|
// Performance heuristics module for thrashing
|
||
|
static uvm_perf_module_t g_module_thrashing;
|
||
|
|
||
|
// Callback declaration for the performance heuristics events
|
||
|
static void thrashing_event_cb(uvm_perf_event_t event_id, uvm_perf_event_data_t *event_data);
|
||
|
static void thrashing_block_destroy_cb(uvm_perf_event_t event_id, uvm_perf_event_data_t *event_data);
|
||
|
|
||
|
static uvm_perf_module_event_callback_desc_t g_callbacks_thrashing[] = {
|
||
|
{ UVM_PERF_EVENT_BLOCK_DESTROY, thrashing_block_destroy_cb },
|
||
|
{ UVM_PERF_EVENT_MODULE_UNLOAD, thrashing_block_destroy_cb },
|
||
|
{ UVM_PERF_EVENT_BLOCK_SHRINK , thrashing_block_destroy_cb },
|
||
|
{ UVM_PERF_EVENT_MIGRATION, thrashing_event_cb },
|
||
|
{ UVM_PERF_EVENT_REVOCATION, thrashing_event_cb }
|
||
|
};
|
||
|
|
||
|
static int nv_procfs_read_thrashing_stats(struct seq_file *s, void *v)
|
||
|
{
|
||
|
processor_thrashing_stats_t *processor_stats = (processor_thrashing_stats_t *)s->private;
|
||
|
|
||
|
UVM_ASSERT(processor_stats);
|
||
|
|
||
|
if (!uvm_down_read_trylock(&g_uvm_global.pm.lock))
|
||
|
return -EAGAIN;
|
||
|
|
||
|
UVM_SEQ_OR_DBG_PRINT(s, "thrashing %llu\n", (NvU64)atomic64_read(&processor_stats->num_thrashing));
|
||
|
UVM_SEQ_OR_DBG_PRINT(s, "throttle %llu\n", (NvU64)atomic64_read(&processor_stats->num_throttle));
|
||
|
UVM_SEQ_OR_DBG_PRINT(s, "pin_local %llu\n", (NvU64)atomic64_read(&processor_stats->num_pin_local));
|
||
|
UVM_SEQ_OR_DBG_PRINT(s, "pin_remote %llu\n", (NvU64)atomic64_read(&processor_stats->num_pin_remote));
|
||
|
|
||
|
uvm_up_read(&g_uvm_global.pm.lock);
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
static int nv_procfs_read_thrashing_stats_entry(struct seq_file *s, void *v)
|
||
|
{
|
||
|
UVM_ENTRY_RET(nv_procfs_read_thrashing_stats(s, v));
|
||
|
}
|
||
|
|
||
|
UVM_DEFINE_SINGLE_PROCFS_FILE(thrashing_stats_entry);
|
||
|
|
||
|
#define THRASHING_STATS_FILE_NAME "thrashing_stats"
|
||
|
|
||
|
// Initialization/deinitialization of CPU thrashing stats
|
||
|
//
|
||
|
static NV_STATUS cpu_thrashing_stats_init(void)
|
||
|
{
|
||
|
struct proc_dir_entry *cpu_base_dir_entry = uvm_procfs_get_cpu_base_dir();
|
||
|
|
||
|
if (uvm_procfs_is_debug_enabled()) {
|
||
|
UVM_ASSERT(!g_cpu_thrashing_stats.procfs_file);
|
||
|
g_cpu_thrashing_stats.procfs_file = NV_CREATE_PROC_FILE(THRASHING_STATS_FILE_NAME,
|
||
|
cpu_base_dir_entry,
|
||
|
thrashing_stats_entry,
|
||
|
&g_cpu_thrashing_stats);
|
||
|
if (!g_cpu_thrashing_stats.procfs_file)
|
||
|
return NV_ERR_OPERATING_SYSTEM;
|
||
|
}
|
||
|
|
||
|
return NV_OK;
|
||
|
}
|
||
|
|
||
|
static void cpu_thrashing_stats_exit(void)
|
||
|
{
|
||
|
if (g_cpu_thrashing_stats.procfs_file) {
|
||
|
UVM_ASSERT(uvm_procfs_is_debug_enabled());
|
||
|
uvm_procfs_destroy_entry(g_cpu_thrashing_stats.procfs_file);
|
||
|
g_cpu_thrashing_stats.procfs_file = NULL;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Get the thrashing stats struct for the given VA space if it exists
|
||
|
//
|
||
|
// No lock may be held. Therefore, the stats must be updated using atomics
|
||
|
static processor_thrashing_stats_t *gpu_thrashing_stats_get_or_null(uvm_gpu_t *gpu)
|
||
|
{
|
||
|
return uvm_perf_module_type_data(gpu->perf_modules_data, UVM_PERF_MODULE_TYPE_THRASHING);
|
||
|
}
|
||
|
|
||
|
static processor_thrashing_stats_t *thrashing_stats_get_or_null(uvm_va_space_t *va_space, uvm_processor_id_t id)
|
||
|
{
|
||
|
if (UVM_ID_IS_CPU(id)) {
|
||
|
if (g_cpu_thrashing_stats.procfs_file)
|
||
|
return &g_cpu_thrashing_stats;
|
||
|
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
return gpu_thrashing_stats_get_or_null(uvm_va_space_get_gpu(va_space, id));
|
||
|
}
|
||
|
|
||
|
// Create the thrashing stats struct for the given GPU
|
||
|
//
|
||
|
// Global lock needs to be held
|
||
|
static NV_STATUS gpu_thrashing_stats_create(uvm_gpu_t *gpu)
|
||
|
{
|
||
|
processor_thrashing_stats_t *gpu_thrashing;
|
||
|
|
||
|
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
|
||
|
UVM_ASSERT(gpu_thrashing_stats_get_or_null(gpu) == NULL);
|
||
|
UVM_ASSERT(uvm_procfs_is_debug_enabled());
|
||
|
|
||
|
gpu_thrashing = uvm_kvmalloc_zero(sizeof(*gpu_thrashing));
|
||
|
if (!gpu_thrashing)
|
||
|
return NV_ERR_NO_MEMORY;
|
||
|
|
||
|
gpu_thrashing->procfs_file = NV_CREATE_PROC_FILE(THRASHING_STATS_FILE_NAME,
|
||
|
gpu->procfs.dir,
|
||
|
thrashing_stats_entry,
|
||
|
gpu_thrashing);
|
||
|
if (!gpu_thrashing->procfs_file) {
|
||
|
uvm_kvfree(gpu_thrashing);
|
||
|
return NV_ERR_OPERATING_SYSTEM;
|
||
|
}
|
||
|
|
||
|
uvm_perf_module_type_set_data(gpu->perf_modules_data, gpu_thrashing, UVM_PERF_MODULE_TYPE_THRASHING);
|
||
|
|
||
|
return NV_OK;
|
||
|
}
|
||
|
|
||
|
static void gpu_thrashing_stats_destroy(uvm_gpu_t *gpu)
|
||
|
{
|
||
|
processor_thrashing_stats_t *gpu_thrashing = gpu_thrashing_stats_get_or_null(gpu);
|
||
|
|
||
|
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
|
||
|
|
||
|
if (gpu_thrashing) {
|
||
|
uvm_perf_module_type_unset_data(gpu->perf_modules_data, UVM_PERF_MODULE_TYPE_THRASHING);
|
||
|
|
||
|
if (gpu_thrashing->procfs_file)
|
||
|
uvm_procfs_destroy_entry(gpu_thrashing->procfs_file);
|
||
|
|
||
|
uvm_kvfree(gpu_thrashing);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Get the thrashing detection struct for the given VA space if it exists
|
||
|
//
|
||
|
// VA space lock needs to be held
|
||
|
static va_space_thrashing_info_t *va_space_thrashing_info_get_or_null(uvm_va_space_t *va_space)
|
||
|
{
|
||
|
uvm_assert_rwsem_locked(&va_space->lock);
|
||
|
|
||
|
return uvm_perf_module_type_data(va_space->perf_modules_data, UVM_PERF_MODULE_TYPE_THRASHING);
|
||
|
}
|
||
|
|
||
|
// Get the thrashing detection struct for the given VA space. It asserts that
|
||
|
// the information has been previously created.
|
||
|
//
|
||
|
// VA space lock needs to be held
|
||
|
static va_space_thrashing_info_t *va_space_thrashing_info_get(uvm_va_space_t *va_space)
|
||
|
{
|
||
|
va_space_thrashing_info_t *va_space_thrashing = va_space_thrashing_info_get_or_null(va_space);
|
||
|
UVM_ASSERT(va_space_thrashing);
|
||
|
|
||
|
return va_space_thrashing;
|
||
|
}
|
||
|
|
||
|
static void va_space_thrashing_info_init_params(va_space_thrashing_info_t *va_space_thrashing)
|
||
|
{
|
||
|
UVM_ASSERT(!va_space_thrashing->params.test_overrides);
|
||
|
|
||
|
va_space_thrashing->params.enable = g_uvm_perf_thrashing_enable;
|
||
|
|
||
|
// Snap the thrashing parameters so that they can be tuned per VA space
|
||
|
va_space_thrashing->params.threshold = g_uvm_perf_thrashing_threshold;
|
||
|
va_space_thrashing->params.pin_threshold = g_uvm_perf_thrashing_pin_threshold;
|
||
|
|
||
|
// Default thrashing parameters are overriden for simulated/emulated GPUs
|
||
|
if (g_uvm_global.num_simulated_devices > 0 &&
|
||
|
(g_uvm_perf_thrashing_lapse_usec == UVM_PERF_THRASHING_LAPSE_USEC_DEFAULT)) {
|
||
|
va_space_thrashing->params.lapse_ns = UVM_PERF_THRASHING_LAPSE_USEC_DEFAULT_EMULATION * 1000;
|
||
|
}
|
||
|
else {
|
||
|
va_space_thrashing->params.lapse_ns = g_uvm_perf_thrashing_lapse_usec * 1000;
|
||
|
}
|
||
|
|
||
|
va_space_thrashing->params.nap_ns = va_space_thrashing->params.lapse_ns * g_uvm_perf_thrashing_nap;
|
||
|
va_space_thrashing->params.epoch_ns = va_space_thrashing->params.lapse_ns * g_uvm_perf_thrashing_epoch;
|
||
|
|
||
|
if (g_uvm_global.num_simulated_devices > 0 && (g_uvm_perf_thrashing_pin == UVM_PERF_THRASHING_PIN_DEFAULT)) {
|
||
|
va_space_thrashing->params.pin_ns = va_space_thrashing->params.lapse_ns
|
||
|
* UVM_PERF_THRASHING_PIN_DEFAULT_EMULATION;
|
||
|
}
|
||
|
else {
|
||
|
va_space_thrashing->params.pin_ns = va_space_thrashing->params.lapse_ns * g_uvm_perf_thrashing_pin;
|
||
|
}
|
||
|
|
||
|
va_space_thrashing->params.max_resets = g_uvm_perf_thrashing_max_resets;
|
||
|
}
|
||
|
|
||
|
// Create the thrashing detection struct for the given VA space
|
||
|
//
|
||
|
// VA space lock needs to be held in write mode
|
||
|
static va_space_thrashing_info_t *va_space_thrashing_info_create(uvm_va_space_t *va_space)
|
||
|
{
|
||
|
va_space_thrashing_info_t *va_space_thrashing;
|
||
|
uvm_assert_rwsem_locked_write(&va_space->lock);
|
||
|
|
||
|
UVM_ASSERT(va_space_thrashing_info_get_or_null(va_space) == NULL);
|
||
|
|
||
|
va_space_thrashing = uvm_kvmalloc_zero(sizeof(*va_space_thrashing));
|
||
|
if (va_space_thrashing) {
|
||
|
va_space_thrashing->va_space = va_space;
|
||
|
|
||
|
va_space_thrashing_info_init_params(va_space_thrashing);
|
||
|
|
||
|
uvm_perf_module_type_set_data(va_space->perf_modules_data, va_space_thrashing, UVM_PERF_MODULE_TYPE_THRASHING);
|
||
|
}
|
||
|
|
||
|
return va_space_thrashing;
|
||
|
}
|
||
|
|
||
|
// Destroy the thrashing detection struct for the given VA space
|
||
|
//
|
||
|
// VA space lock needs to be in write mode
|
||
|
static void va_space_thrashing_info_destroy(uvm_va_space_t *va_space)
|
||
|
{
|
||
|
va_space_thrashing_info_t *va_space_thrashing = va_space_thrashing_info_get_or_null(va_space);
|
||
|
uvm_assert_rwsem_locked_write(&va_space->lock);
|
||
|
|
||
|
if (va_space_thrashing) {
|
||
|
uvm_perf_module_type_unset_data(va_space->perf_modules_data, UVM_PERF_MODULE_TYPE_THRASHING);
|
||
|
uvm_kvfree(va_space_thrashing);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Get the thrashing detection struct for the given block
|
||
|
static block_thrashing_info_t *thrashing_info_get(uvm_va_block_t *va_block)
|
||
|
{
|
||
|
uvm_assert_mutex_locked(&va_block->lock);
|
||
|
return uvm_perf_module_type_data(va_block->perf_modules_data, UVM_PERF_MODULE_TYPE_THRASHING);
|
||
|
}
|
||
|
|
||
|
// Get the thrashing detection struct for the given block or create it if it
|
||
|
// does not exist
|
||
|
static block_thrashing_info_t *thrashing_info_get_create(uvm_va_block_t *va_block)
|
||
|
{
|
||
|
block_thrashing_info_t *block_thrashing = thrashing_info_get(va_block);
|
||
|
|
||
|
BUILD_BUG_ON((1 << 8 * sizeof(block_thrashing->num_thrashing_pages)) < PAGES_PER_UVM_VA_BLOCK);
|
||
|
BUILD_BUG_ON((1 << 16) < UVM_ID_MAX_PROCESSORS);
|
||
|
|
||
|
if (!block_thrashing) {
|
||
|
block_thrashing = nv_kmem_cache_zalloc(g_va_block_thrashing_info_cache, NV_UVM_GFP_FLAGS);
|
||
|
if (!block_thrashing)
|
||
|
goto done;
|
||
|
|
||
|
block_thrashing->last_processor = UVM_ID_INVALID;
|
||
|
INIT_LIST_HEAD(&block_thrashing->pinned_pages.list);
|
||
|
|
||
|
uvm_perf_module_type_set_data(va_block->perf_modules_data, block_thrashing, UVM_PERF_MODULE_TYPE_THRASHING);
|
||
|
}
|
||
|
|
||
|
done:
|
||
|
return block_thrashing;
|
||
|
}
|
||
|
|
||
|
static void thrashing_reset_pages_in_region(uvm_va_block_t *va_block, NvU64 address, NvU64 bytes);
|
||
|
|
||
|
// Destroy the thrashing detection struct for the given block
|
||
|
static void thrashing_info_destroy(uvm_va_block_t *va_block)
|
||
|
{
|
||
|
block_thrashing_info_t *block_thrashing = thrashing_info_get(va_block);
|
||
|
|
||
|
if (block_thrashing) {
|
||
|
thrashing_reset_pages_in_region(va_block, va_block->start, uvm_va_block_size(va_block));
|
||
|
|
||
|
uvm_perf_module_type_unset_data(va_block->perf_modules_data, UVM_PERF_MODULE_TYPE_THRASHING);
|
||
|
|
||
|
uvm_kvfree(block_thrashing->pages);
|
||
|
kmem_cache_free(g_va_block_thrashing_info_cache, block_thrashing);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void thrashing_block_destroy_cb(uvm_perf_event_t event_id, uvm_perf_event_data_t *event_data)
|
||
|
{
|
||
|
uvm_va_block_t *va_block;
|
||
|
|
||
|
UVM_ASSERT(g_uvm_perf_thrashing_enable);
|
||
|
|
||
|
UVM_ASSERT(event_id == UVM_PERF_EVENT_BLOCK_DESTROY ||
|
||
|
event_id == UVM_PERF_EVENT_BLOCK_SHRINK ||
|
||
|
event_id == UVM_PERF_EVENT_MODULE_UNLOAD);
|
||
|
|
||
|
if (event_id == UVM_PERF_EVENT_BLOCK_DESTROY)
|
||
|
va_block = event_data->block_destroy.block;
|
||
|
else if (event_id == UVM_PERF_EVENT_BLOCK_SHRINK)
|
||
|
va_block = event_data->block_shrink.block;
|
||
|
else
|
||
|
va_block = event_data->module_unload.block;
|
||
|
|
||
|
if (!va_block)
|
||
|
return;
|
||
|
|
||
|
thrashing_info_destroy(va_block);
|
||
|
}
|
||
|
|
||
|
// Sanity checks of the thrashing tracking state
|
||
|
static bool thrashing_state_checks(uvm_va_block_t *va_block,
|
||
|
block_thrashing_info_t *block_thrashing,
|
||
|
page_thrashing_info_t *page_thrashing,
|
||
|
uvm_page_index_t page_index)
|
||
|
{
|
||
|
uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
|
||
|
va_space_thrashing_info_t *va_space_thrashing = va_space_thrashing_info_get(va_space);
|
||
|
|
||
|
if (!block_thrashing) {
|
||
|
UVM_ASSERT(!page_thrashing);
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
UVM_ASSERT(uvm_page_mask_subset(&block_thrashing->pinned_pages.mask, &block_thrashing->thrashing_pages));
|
||
|
|
||
|
if (page_thrashing) {
|
||
|
UVM_ASSERT(block_thrashing->pages);
|
||
|
UVM_ASSERT(page_thrashing == &block_thrashing->pages[page_index]);
|
||
|
}
|
||
|
else {
|
||
|
UVM_ASSERT(!uvm_page_mask_test(&block_thrashing->thrashing_pages, page_index));
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
UVM_ASSERT(uvm_processor_mask_subset(&page_thrashing->throttled_processors,
|
||
|
&page_thrashing->processors));
|
||
|
|
||
|
if (uvm_page_mask_test(&block_thrashing->thrashing_pages, page_index))
|
||
|
UVM_ASSERT(page_thrashing->num_thrashing_events >= va_space_thrashing->params.threshold);
|
||
|
|
||
|
if (page_thrashing->pinned) {
|
||
|
UVM_ASSERT(uvm_page_mask_test(&block_thrashing->pinned_pages.mask, page_index));
|
||
|
UVM_ASSERT(UVM_ID_IS_VALID(page_thrashing->pinned_residency_id));
|
||
|
UVM_ASSERT(page_thrashing->throttling_count == 0);
|
||
|
}
|
||
|
else {
|
||
|
UVM_ASSERT(!uvm_page_mask_test(&block_thrashing->pinned_pages.mask, page_index));
|
||
|
UVM_ASSERT(UVM_ID_IS_INVALID(page_thrashing->pinned_residency_id));
|
||
|
|
||
|
if (!uvm_processor_mask_empty(&page_thrashing->throttled_processors)) {
|
||
|
UVM_ASSERT(page_thrashing->throttling_count > 0);
|
||
|
UVM_ASSERT(uvm_page_mask_test(&block_thrashing->thrashing_pages, page_index));
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
// Update throttling heuristics. Mainly check if a new throttling period has
|
||
|
// started and choose the next processor not to be throttled. This function
|
||
|
// is executed before the thrashing mitigation logic kicks in.
|
||
|
static void thrashing_throttle_update(va_space_thrashing_info_t *va_space_thrashing,
|
||
|
uvm_va_block_t *va_block,
|
||
|
page_thrashing_info_t *page_thrashing,
|
||
|
uvm_processor_id_t processor,
|
||
|
NvU64 time_stamp)
|
||
|
{
|
||
|
NvU64 current_end_time_stamp = page_thrashing_get_throttling_end_time_stamp(page_thrashing);
|
||
|
|
||
|
uvm_assert_mutex_locked(&va_block->lock);
|
||
|
|
||
|
if (time_stamp > current_end_time_stamp) {
|
||
|
NvU64 throttling_end_time_stamp = time_stamp + va_space_thrashing->params.nap_ns;
|
||
|
page_thrashing_set_throttling_end_time_stamp(page_thrashing, throttling_end_time_stamp);
|
||
|
|
||
|
// Avoid choosing the same processor in consecutive thrashing periods
|
||
|
if (uvm_id_equal(page_thrashing->do_not_throttle_processor_id, processor))
|
||
|
page_thrashing->do_not_throttle_processor_id = UVM_ID_INVALID;
|
||
|
else
|
||
|
page_thrashing->do_not_throttle_processor_id = processor;
|
||
|
}
|
||
|
else if (UVM_ID_IS_INVALID(page_thrashing->do_not_throttle_processor_id)) {
|
||
|
page_thrashing->do_not_throttle_processor_id = processor;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Throttle the execution of a processor. If this is the first processor being
|
||
|
// throttled for a throttling period, compute the time stamp until which the
|
||
|
// rest of processors will be throttled on fault.
|
||
|
//
|
||
|
// - Page may be pinned (possible in thrashing due to revocation, such as
|
||
|
// in system-wide atomics)
|
||
|
// - Requesting processor must not be throttled at this point.
|
||
|
//
|
||
|
static void thrashing_throttle_processor(uvm_va_block_t *va_block,
|
||
|
block_thrashing_info_t *block_thrashing,
|
||
|
page_thrashing_info_t *page_thrashing,
|
||
|
uvm_page_index_t page_index,
|
||
|
uvm_processor_id_t processor)
|
||
|
{
|
||
|
uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
|
||
|
NvU64 address = uvm_va_block_cpu_page_address(va_block, page_index);
|
||
|
|
||
|
uvm_assert_mutex_locked(&va_block->lock);
|
||
|
|
||
|
UVM_ASSERT(!uvm_id_equal(processor, page_thrashing->do_not_throttle_processor_id));
|
||
|
|
||
|
if (!uvm_processor_mask_test_and_set(&page_thrashing->throttled_processors, processor)) {
|
||
|
// CPU is throttled by sleeping. This is done in uvm_vm_fault so it
|
||
|
// drops the VA block and VA space locks. Throttling start/end events
|
||
|
// are recorded around the sleep calls.
|
||
|
if (UVM_ID_IS_GPU(processor))
|
||
|
uvm_tools_record_throttling_start(va_space, address, processor);
|
||
|
|
||
|
if (!page_thrashing->pinned)
|
||
|
UVM_PERF_SATURATING_INC(page_thrashing->throttling_count);
|
||
|
|
||
|
UVM_PERF_SATURATING_INC(block_thrashing->throttling_count);
|
||
|
}
|
||
|
|
||
|
UVM_ASSERT(thrashing_state_checks(va_block, block_thrashing, page_thrashing, page_index));
|
||
|
}
|
||
|
|
||
|
// Stop throttling on the given processor. If this is the last processor being
|
||
|
// throttled for a throttling period, it will clear the throttling period.
|
||
|
//
|
||
|
// - Page may be pinned (possible in thrashing due to revocation, such as
|
||
|
// in system-wide atomics)
|
||
|
// - Requesting processor must be throttled at this point.
|
||
|
//
|
||
|
static void thrashing_throttle_end_processor(uvm_va_block_t *va_block,
|
||
|
block_thrashing_info_t *block_thrashing,
|
||
|
page_thrashing_info_t *page_thrashing,
|
||
|
uvm_page_index_t page_index,
|
||
|
uvm_processor_id_t processor)
|
||
|
{
|
||
|
uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
|
||
|
NvU64 address = uvm_va_block_cpu_page_address(va_block, page_index);
|
||
|
|
||
|
UVM_ASSERT(uvm_processor_mask_test(&page_thrashing->throttled_processors, processor));
|
||
|
uvm_processor_mask_clear(&page_thrashing->throttled_processors, processor);
|
||
|
if (uvm_processor_mask_empty(&page_thrashing->throttled_processors))
|
||
|
page_thrashing_set_throttling_end_time_stamp(page_thrashing, 0);
|
||
|
|
||
|
// See comment regarding throttling start/end events for CPU in
|
||
|
// thrashing_throttle_processor
|
||
|
if (UVM_ID_IS_GPU(processor))
|
||
|
uvm_tools_record_throttling_end(va_space, address, processor);
|
||
|
|
||
|
UVM_ASSERT(thrashing_state_checks(va_block, block_thrashing, page_thrashing, page_index));
|
||
|
}
|
||
|
|
||
|
// Clear the throttling state for all processors. This is used while
|
||
|
// transitioning to pinned state and during thrashing information reset.
|
||
|
static void thrashing_throttling_reset_page(uvm_va_block_t *va_block,
|
||
|
block_thrashing_info_t *block_thrashing,
|
||
|
page_thrashing_info_t *page_thrashing,
|
||
|
uvm_page_index_t page_index)
|
||
|
{
|
||
|
uvm_processor_id_t processor_id;
|
||
|
|
||
|
for_each_id_in_mask(processor_id, &page_thrashing->throttled_processors) {
|
||
|
thrashing_throttle_end_processor(va_block,
|
||
|
block_thrashing,
|
||
|
page_thrashing,
|
||
|
page_index,
|
||
|
processor_id);
|
||
|
}
|
||
|
|
||
|
UVM_ASSERT(uvm_processor_mask_empty(&page_thrashing->throttled_processors));
|
||
|
}
|
||
|
|
||
|
// Find the pinned page descriptor for the given page index. Return NULL if the
|
||
|
// page is not pinned.
|
||
|
static pinned_page_t *find_pinned_page(block_thrashing_info_t *block_thrashing, uvm_page_index_t page_index)
|
||
|
{
|
||
|
pinned_page_t *pinned_page;
|
||
|
|
||
|
list_for_each_entry(pinned_page, &block_thrashing->pinned_pages.list, va_block_list_entry) {
|
||
|
if (pinned_page->page_index == page_index)
|
||
|
return pinned_page;
|
||
|
}
|
||
|
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
// Pin a page on the specified processor. All thrashing processors will be
|
||
|
// mapped remotely on this location, when possible
|
||
|
//
|
||
|
// - Requesting processor cannot be throttled
|
||
|
//
|
||
|
static NV_STATUS thrashing_pin_page(va_space_thrashing_info_t *va_space_thrashing,
|
||
|
uvm_va_block_t *va_block,
|
||
|
block_thrashing_info_t *block_thrashing,
|
||
|
page_thrashing_info_t *page_thrashing,
|
||
|
uvm_page_index_t page_index,
|
||
|
NvU64 time_stamp,
|
||
|
uvm_processor_id_t residency,
|
||
|
uvm_processor_id_t requester)
|
||
|
{
|
||
|
uvm_processor_mask_t current_residency;
|
||
|
|
||
|
uvm_assert_mutex_locked(&va_block->lock);
|
||
|
UVM_ASSERT(!uvm_processor_mask_test(&page_thrashing->throttled_processors, requester));
|
||
|
|
||
|
uvm_va_block_page_resident_processors(va_block, page_index, ¤t_residency);
|
||
|
|
||
|
// If we are pinning the page for the first time or we are pinning it on a
|
||
|
// different location that the current location, reset the throttling state
|
||
|
// to make sure that we flush any pending ThrottlingEnd events.
|
||
|
if (!page_thrashing->pinned || !uvm_processor_mask_test(¤t_residency, residency))
|
||
|
thrashing_throttling_reset_page(va_block, block_thrashing, page_thrashing, page_index);
|
||
|
|
||
|
if (!page_thrashing->pinned) {
|
||
|
if (va_space_thrashing->params.pin_ns > 0) {
|
||
|
pinned_page_t *pinned_page = nv_kmem_cache_zalloc(g_pinned_page_cache, NV_UVM_GFP_FLAGS);
|
||
|
if (!pinned_page)
|
||
|
return NV_ERR_NO_MEMORY;
|
||
|
|
||
|
pinned_page->va_block = va_block;
|
||
|
pinned_page->page_index = page_index;
|
||
|
pinned_page->deadline = time_stamp + va_space_thrashing->params.pin_ns;
|
||
|
|
||
|
uvm_spin_lock(&va_space_thrashing->pinned_pages.lock);
|
||
|
|
||
|
list_add_tail(&pinned_page->va_space_list_entry, &va_space_thrashing->pinned_pages.list);
|
||
|
list_add_tail(&pinned_page->va_block_list_entry, &block_thrashing->pinned_pages.list);
|
||
|
|
||
|
// We only schedule the delayed work if the list was empty before
|
||
|
// adding this page. Otherwise, we just add it to the list. The
|
||
|
// unpinning helper will remove from the list those pages with
|
||
|
// deadline prior to its wakeup timestamp and will reschedule
|
||
|
// itself if there are remaining pages in the list.
|
||
|
if (list_is_singular(&va_space_thrashing->pinned_pages.list) &&
|
||
|
!va_space_thrashing->pinned_pages.in_va_space_teardown) {
|
||
|
int scheduled;
|
||
|
scheduled = schedule_delayed_work(&va_space_thrashing->pinned_pages.dwork,
|
||
|
usecs_to_jiffies(va_space_thrashing->params.pin_ns / 1000));
|
||
|
UVM_ASSERT(scheduled != 0);
|
||
|
}
|
||
|
|
||
|
uvm_spin_unlock(&va_space_thrashing->pinned_pages.lock);
|
||
|
}
|
||
|
|
||
|
page_thrashing->throttling_count = 0;
|
||
|
page_thrashing->pinned = true;
|
||
|
UVM_PERF_SATURATING_INC(block_thrashing->pinned_pages.count);
|
||
|
uvm_page_mask_set(&block_thrashing->pinned_pages.mask, page_index);
|
||
|
}
|
||
|
|
||
|
page_thrashing->pinned_residency_id = residency;
|
||
|
|
||
|
UVM_ASSERT(thrashing_state_checks(va_block, block_thrashing, page_thrashing, page_index));
|
||
|
|
||
|
return NV_OK;
|
||
|
}
|
||
|
|
||
|
// Unpin a page. This function just clears the pinning tracking state, and does
|
||
|
// not remove remote mappings on the page. Callers will need to do it manually
|
||
|
// BEFORE calling this function, if so desired.
|
||
|
// - Page must be pinned
|
||
|
//
|
||
|
static void thrashing_unpin_page(va_space_thrashing_info_t *va_space_thrashing,
|
||
|
uvm_va_block_t *va_block,
|
||
|
block_thrashing_info_t *block_thrashing,
|
||
|
page_thrashing_info_t *page_thrashing,
|
||
|
uvm_page_index_t page_index)
|
||
|
{
|
||
|
uvm_assert_mutex_locked(&va_block->lock);
|
||
|
UVM_ASSERT(page_thrashing->pinned);
|
||
|
|
||
|
if (va_space_thrashing->params.pin_ns > 0) {
|
||
|
bool do_free = false;
|
||
|
pinned_page_t *pinned_page = find_pinned_page(block_thrashing, page_index);
|
||
|
|
||
|
UVM_ASSERT(pinned_page);
|
||
|
UVM_ASSERT(pinned_page->page_index == page_index);
|
||
|
UVM_ASSERT(pinned_page->va_block == va_block);
|
||
|
|
||
|
// The va_space_list_entry and va_block_list_entry have special
|
||
|
// meanings here:
|
||
|
// - va_space_list_entry: when the delayed unpin worker removes the
|
||
|
// pinned_page from this list, it takes the ownership of the page and
|
||
|
// is in charge of freeing it.
|
||
|
// - va_block_list_entry: by removing the page from this list,
|
||
|
// thrashing_unpin_page tells the unpin delayed worker to skip
|
||
|
// unpinning that page.
|
||
|
uvm_spin_lock(&va_space_thrashing->pinned_pages.lock);
|
||
|
list_del_init(&pinned_page->va_block_list_entry);
|
||
|
|
||
|
if (!list_empty(&pinned_page->va_space_list_entry)) {
|
||
|
do_free = true;
|
||
|
list_del_init(&pinned_page->va_space_list_entry);
|
||
|
|
||
|
if (list_empty(&va_space_thrashing->pinned_pages.list))
|
||
|
cancel_delayed_work(&va_space_thrashing->pinned_pages.dwork);
|
||
|
}
|
||
|
|
||
|
uvm_spin_unlock(&va_space_thrashing->pinned_pages.lock);
|
||
|
|
||
|
if (do_free)
|
||
|
kmem_cache_free(g_pinned_page_cache, pinned_page);
|
||
|
}
|
||
|
|
||
|
page_thrashing->pinned_residency_id = UVM_ID_INVALID;
|
||
|
page_thrashing->pinned = false;
|
||
|
uvm_page_mask_clear(&block_thrashing->pinned_pages.mask, page_index);
|
||
|
|
||
|
UVM_ASSERT(thrashing_state_checks(va_block, block_thrashing, page_thrashing, page_index));
|
||
|
}
|
||
|
|
||
|
static void thrashing_detected(uvm_va_block_t *va_block,
|
||
|
block_thrashing_info_t *block_thrashing,
|
||
|
page_thrashing_info_t *page_thrashing,
|
||
|
uvm_page_index_t page_index,
|
||
|
uvm_processor_id_t processor_id)
|
||
|
{
|
||
|
uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
|
||
|
NvU64 address = uvm_va_block_cpu_page_address(va_block, page_index);
|
||
|
|
||
|
// Thrashing detected, record the event
|
||
|
uvm_tools_record_thrashing(va_space, address, PAGE_SIZE, &page_thrashing->processors);
|
||
|
if (!uvm_page_mask_test_and_set(&block_thrashing->thrashing_pages, page_index))
|
||
|
++block_thrashing->num_thrashing_pages;
|
||
|
|
||
|
PROCESSOR_THRASHING_STATS_INC(va_space, processor_id, num_thrashing);
|
||
|
|
||
|
UVM_ASSERT(thrashing_state_checks(va_block, block_thrashing, page_thrashing, page_index));
|
||
|
}
|
||
|
|
||
|
// Clear the thrashing information for the given page. This function does not
|
||
|
// unmap remote mappings on the page. Callers will need to do it BEFORE calling
|
||
|
// this function, if so desired
|
||
|
static void thrashing_reset_page(va_space_thrashing_info_t *va_space_thrashing,
|
||
|
uvm_va_block_t *va_block,
|
||
|
block_thrashing_info_t *block_thrashing,
|
||
|
uvm_page_index_t page_index)
|
||
|
{
|
||
|
page_thrashing_info_t *page_thrashing = &block_thrashing->pages[page_index];
|
||
|
uvm_assert_mutex_locked(&va_block->lock);
|
||
|
|
||
|
UVM_ASSERT(block_thrashing->num_thrashing_pages > 0);
|
||
|
UVM_ASSERT(uvm_page_mask_test(&block_thrashing->thrashing_pages, page_index));
|
||
|
UVM_ASSERT(page_thrashing->num_thrashing_events > 0);
|
||
|
|
||
|
thrashing_throttling_reset_page(va_block, block_thrashing, page_thrashing, page_index);
|
||
|
UVM_ASSERT(uvm_processor_mask_empty(&page_thrashing->throttled_processors));
|
||
|
|
||
|
if (page_thrashing->pinned)
|
||
|
thrashing_unpin_page(va_space_thrashing, va_block, block_thrashing, page_thrashing, page_index);
|
||
|
|
||
|
page_thrashing->last_time_stamp = 0;
|
||
|
page_thrashing->has_migration_events = 0;
|
||
|
page_thrashing->has_revocation_events = 0;
|
||
|
page_thrashing->num_thrashing_events = 0;
|
||
|
uvm_processor_mask_zero(&page_thrashing->processors);
|
||
|
|
||
|
if (uvm_page_mask_test_and_clear(&block_thrashing->thrashing_pages, page_index))
|
||
|
--block_thrashing->num_thrashing_pages;
|
||
|
|
||
|
UVM_ASSERT(thrashing_state_checks(va_block, block_thrashing, page_thrashing, page_index));
|
||
|
}
|
||
|
|
||
|
// Call thrashing_reset_page for all the thrashing pages in the region
|
||
|
// described by address and bytes
|
||
|
static void thrashing_reset_pages_in_region(uvm_va_block_t *va_block, NvU64 address, NvU64 bytes)
|
||
|
{
|
||
|
uvm_page_index_t page_index;
|
||
|
uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
|
||
|
va_space_thrashing_info_t *va_space_thrashing = va_space_thrashing_info_get(va_space);
|
||
|
block_thrashing_info_t *block_thrashing = NULL;
|
||
|
uvm_va_block_region_t region = uvm_va_block_region_from_start_size(va_block, address, bytes);
|
||
|
|
||
|
block_thrashing = thrashing_info_get(va_block);
|
||
|
if (!block_thrashing || !block_thrashing->pages)
|
||
|
return;
|
||
|
|
||
|
// Update all pages in the region
|
||
|
for_each_va_block_page_in_region_mask(page_index, &block_thrashing->thrashing_pages, region)
|
||
|
thrashing_reset_page(va_space_thrashing, va_block, block_thrashing, page_index);
|
||
|
}
|
||
|
|
||
|
|
||
|
// Unmap remote mappings from the given processors on the pinned pages
|
||
|
// described by region and block_thrashing->pinned pages.
|
||
|
static NV_STATUS unmap_remote_pinned_pages_from_processors(uvm_va_block_t *va_block,
|
||
|
uvm_va_block_context_t *va_block_context,
|
||
|
block_thrashing_info_t *block_thrashing,
|
||
|
uvm_va_block_region_t region,
|
||
|
const uvm_processor_mask_t *unmap_processors)
|
||
|
{
|
||
|
NV_STATUS status = NV_OK;
|
||
|
NV_STATUS tracker_status;
|
||
|
uvm_tracker_t local_tracker = UVM_TRACKER_INIT();
|
||
|
uvm_processor_id_t processor_id;
|
||
|
uvm_va_policy_t *policy = va_block_context->policy;
|
||
|
|
||
|
uvm_assert_mutex_locked(&va_block->lock);
|
||
|
|
||
|
for_each_id_in_mask(processor_id, unmap_processors) {
|
||
|
UVM_ASSERT(uvm_id_equal(processor_id, policy->preferred_location) ||
|
||
|
!uvm_processor_mask_test(&policy->accessed_by, processor_id));
|
||
|
|
||
|
if (uvm_processor_mask_test(&va_block->resident, processor_id)) {
|
||
|
const uvm_page_mask_t *resident_mask = uvm_va_block_resident_mask_get(va_block, processor_id);
|
||
|
|
||
|
if (!uvm_page_mask_andnot(&va_block_context->caller_page_mask,
|
||
|
&block_thrashing->pinned_pages.mask,
|
||
|
resident_mask))
|
||
|
continue;
|
||
|
}
|
||
|
else {
|
||
|
uvm_page_mask_copy(&va_block_context->caller_page_mask,
|
||
|
&block_thrashing->pinned_pages.mask);
|
||
|
}
|
||
|
|
||
|
status = uvm_va_block_unmap(va_block,
|
||
|
va_block_context,
|
||
|
processor_id,
|
||
|
region,
|
||
|
&va_block_context->caller_page_mask,
|
||
|
&local_tracker);
|
||
|
if (status != NV_OK)
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
tracker_status = uvm_tracker_add_tracker_safe(&va_block->tracker, &local_tracker);
|
||
|
if (status == NV_OK)
|
||
|
status = tracker_status;
|
||
|
|
||
|
uvm_tracker_deinit(&local_tracker);
|
||
|
|
||
|
return status;
|
||
|
}
|
||
|
|
||
|
// Unmap remote mappings from all processors on the pinned pages
|
||
|
// described by region and block_thrashing->pinned pages.
|
||
|
static NV_STATUS unmap_remote_pinned_pages_from_all_processors(uvm_va_block_t *va_block,
|
||
|
uvm_va_block_context_t *va_block_context,
|
||
|
uvm_va_block_region_t region)
|
||
|
{
|
||
|
block_thrashing_info_t *block_thrashing;
|
||
|
uvm_processor_mask_t unmap_processors;
|
||
|
uvm_va_policy_t *policy;
|
||
|
|
||
|
uvm_assert_mutex_locked(&va_block->lock);
|
||
|
|
||
|
block_thrashing = thrashing_info_get(va_block);
|
||
|
if (!block_thrashing || !block_thrashing->pages)
|
||
|
return NV_OK;
|
||
|
|
||
|
if (uvm_page_mask_empty(&block_thrashing->pinned_pages.mask))
|
||
|
return NV_OK;
|
||
|
|
||
|
// Unmap all mapped processors (that are not SetAccessedBy) with
|
||
|
// no copy of the page
|
||
|
policy = uvm_va_policy_get(va_block, uvm_va_block_region_start(va_block, region));
|
||
|
|
||
|
uvm_processor_mask_andnot(&unmap_processors, &va_block->mapped, &policy->accessed_by);
|
||
|
|
||
|
return unmap_remote_pinned_pages_from_processors(va_block,
|
||
|
va_block_context,
|
||
|
block_thrashing,
|
||
|
region,
|
||
|
&unmap_processors);
|
||
|
}
|
||
|
|
||
|
// Check that we are not migrating pages away from its pinned location and
|
||
|
// that we are not prefetching thrashing pages.
|
||
|
static bool migrating_wrong_pages(uvm_va_block_t *va_block,
|
||
|
NvU64 address,
|
||
|
NvU64 bytes,
|
||
|
uvm_processor_id_t proc_id,
|
||
|
uvm_make_resident_cause_t cause)
|
||
|
{
|
||
|
uvm_page_index_t page_index;
|
||
|
block_thrashing_info_t *block_thrashing = NULL;
|
||
|
uvm_va_block_region_t region = uvm_va_block_region_from_start_size(va_block, address, bytes);
|
||
|
|
||
|
block_thrashing = thrashing_info_get(va_block);
|
||
|
if (!block_thrashing || !block_thrashing->pages)
|
||
|
return false;
|
||
|
|
||
|
for_each_va_block_page_in_region(page_index, region) {
|
||
|
page_thrashing_info_t *page_thrashing = &block_thrashing->pages[page_index];
|
||
|
UVM_ASSERT_MSG(!page_thrashing->pinned || uvm_id_equal(proc_id, page_thrashing->pinned_residency_id),
|
||
|
"Migrating to %u instead of %u\n",
|
||
|
uvm_id_value(proc_id), uvm_id_value(page_thrashing->pinned_residency_id));
|
||
|
if (cause == UVM_MAKE_RESIDENT_CAUSE_PREFETCH)
|
||
|
UVM_ASSERT(!uvm_page_mask_test(&block_thrashing->thrashing_pages, page_index));
|
||
|
}
|
||
|
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
static bool is_migration_pinned_pages_update(uvm_va_block_t *va_block,
|
||
|
const uvm_perf_event_data_t *event_data,
|
||
|
NvU64 address,
|
||
|
NvU64 bytes)
|
||
|
{
|
||
|
const block_thrashing_info_t *block_thrashing = NULL;
|
||
|
uvm_va_block_region_t region = uvm_va_block_region_from_start_size(va_block, address, bytes);
|
||
|
bool ret;
|
||
|
|
||
|
if (event_data->migration.cause != UVM_MAKE_RESIDENT_CAUSE_REPLAYABLE_FAULT &&
|
||
|
event_data->migration.cause != UVM_MAKE_RESIDENT_CAUSE_ACCESS_COUNTER) {
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
block_thrashing = thrashing_info_get(va_block);
|
||
|
if (!block_thrashing || !block_thrashing->pages)
|
||
|
return false;
|
||
|
|
||
|
ret = uvm_page_mask_region_full(&block_thrashing->pinned_pages.mask, region);
|
||
|
if (ret) {
|
||
|
uvm_page_index_t page_index;
|
||
|
for_each_va_block_page_in_region(page_index, region) {
|
||
|
page_thrashing_info_t *page_thrashing = &block_thrashing->pages[page_index];
|
||
|
UVM_ASSERT(uvm_id_equal(page_thrashing->pinned_residency_id, event_data->migration.dst));
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
// This function processes migration/revocation events and determines if the
|
||
|
// affected pages are thrashing or not.
|
||
|
void thrashing_event_cb(uvm_perf_event_t event_id, uvm_perf_event_data_t *event_data)
|
||
|
{
|
||
|
va_space_thrashing_info_t *va_space_thrashing;
|
||
|
block_thrashing_info_t *block_thrashing = NULL;
|
||
|
uvm_va_block_t *va_block;
|
||
|
uvm_va_space_t *va_space;
|
||
|
NvU64 address;
|
||
|
NvU64 bytes;
|
||
|
uvm_processor_id_t processor_id;
|
||
|
uvm_page_index_t page_index;
|
||
|
NvU64 time_stamp;
|
||
|
uvm_va_block_region_t region;
|
||
|
uvm_read_duplication_policy_t read_duplication;
|
||
|
|
||
|
UVM_ASSERT(g_uvm_perf_thrashing_enable);
|
||
|
|
||
|
UVM_ASSERT(event_id == UVM_PERF_EVENT_MIGRATION || event_id == UVM_PERF_EVENT_REVOCATION);
|
||
|
|
||
|
if (event_id == UVM_PERF_EVENT_MIGRATION) {
|
||
|
va_block = event_data->migration.block;
|
||
|
address = event_data->migration.address;
|
||
|
bytes = event_data->migration.bytes;
|
||
|
processor_id = event_data->migration.dst;
|
||
|
|
||
|
// Skip the thrashing detection logic on eviction as we cannot take
|
||
|
// the VA space lock
|
||
|
if (event_data->migration.cause == UVM_MAKE_RESIDENT_CAUSE_EVICTION)
|
||
|
return;
|
||
|
|
||
|
// Do not perform checks during the first part of staging copies
|
||
|
if (!uvm_id_equal(event_data->migration.dst, event_data->migration.make_resident_context->dest_id))
|
||
|
return;
|
||
|
|
||
|
va_space = uvm_va_block_get_va_space(va_block);
|
||
|
va_space_thrashing = va_space_thrashing_info_get(va_space);
|
||
|
if (!va_space_thrashing->params.enable)
|
||
|
return;
|
||
|
|
||
|
// TODO: Bug 2046423: HMM will need to look up the policy when
|
||
|
// read duplication is supported.
|
||
|
read_duplication = uvm_va_block_is_hmm(va_block) ?
|
||
|
UVM_READ_DUPLICATION_UNSET :
|
||
|
uvm_va_range_get_policy(va_block->va_range)->read_duplication;
|
||
|
|
||
|
// We only care about migrations due to replayable faults, access
|
||
|
// counters and page prefetching. For non-replayable faults, UVM will
|
||
|
// try not to migrate memory since CE is transferring data anyway.
|
||
|
// However, we can still see migration events due to initial
|
||
|
// population. The rest of migrations are triggered due to user
|
||
|
// commands or advice (such as read duplication) which takes precedence
|
||
|
// over our heuristics. Therefore, we clear our internal tracking
|
||
|
// state.
|
||
|
if ((event_data->migration.cause != UVM_MAKE_RESIDENT_CAUSE_REPLAYABLE_FAULT &&
|
||
|
event_data->migration.cause != UVM_MAKE_RESIDENT_CAUSE_ACCESS_COUNTER &&
|
||
|
event_data->migration.cause != UVM_MAKE_RESIDENT_CAUSE_PREFETCH) ||
|
||
|
(event_data->migration.transfer_mode != UVM_VA_BLOCK_TRANSFER_MODE_MOVE) ||
|
||
|
(read_duplication == UVM_READ_DUPLICATION_ENABLED)) {
|
||
|
thrashing_reset_pages_in_region(va_block, address, bytes);
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
// Assert that we are not migrating pages that are pinned away from
|
||
|
// their pinning residency, or prefetching pages that are thrashing
|
||
|
UVM_ASSERT(!migrating_wrong_pages(va_block, address, bytes, processor_id, event_data->migration.cause));
|
||
|
|
||
|
// If we are being migrated due to pinning just return
|
||
|
if (is_migration_pinned_pages_update(va_block, event_data, address, bytes))
|
||
|
return;
|
||
|
}
|
||
|
else {
|
||
|
va_block = event_data->revocation.block;
|
||
|
address = event_data->revocation.address;
|
||
|
bytes = event_data->revocation.bytes;
|
||
|
processor_id = event_data->revocation.proc_id;
|
||
|
|
||
|
va_space = uvm_va_block_get_va_space(va_block);
|
||
|
va_space_thrashing = va_space_thrashing_info_get(va_space);
|
||
|
if (!va_space_thrashing->params.enable)
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
block_thrashing = thrashing_info_get_create(va_block);
|
||
|
if (!block_thrashing)
|
||
|
return;
|
||
|
|
||
|
time_stamp = NV_GETTIME();
|
||
|
|
||
|
if (!block_thrashing->pages) {
|
||
|
// Don't create the per-page tracking structure unless there is some potential thrashing within the block
|
||
|
NvU16 num_block_pages;
|
||
|
|
||
|
if (block_thrashing->last_time_stamp == 0 ||
|
||
|
uvm_id_equal(block_thrashing->last_processor, processor_id) ||
|
||
|
time_stamp - block_thrashing->last_time_stamp > va_space_thrashing->params.lapse_ns) {
|
||
|
goto done;
|
||
|
}
|
||
|
|
||
|
num_block_pages = uvm_va_block_size(va_block) / PAGE_SIZE;
|
||
|
|
||
|
block_thrashing->pages = uvm_kvmalloc_zero(sizeof(*block_thrashing->pages) * num_block_pages);
|
||
|
if (!block_thrashing->pages)
|
||
|
goto done;
|
||
|
|
||
|
for (page_index = 0; page_index < num_block_pages; ++page_index) {
|
||
|
block_thrashing->pages[page_index].pinned_residency_id = UVM_ID_INVALID;
|
||
|
block_thrashing->pages[page_index].do_not_throttle_processor_id = UVM_ID_INVALID;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
region = uvm_va_block_region_from_start_size(va_block, address, bytes);
|
||
|
|
||
|
// Update all pages in the region
|
||
|
for_each_va_block_page_in_region(page_index, region) {
|
||
|
page_thrashing_info_t *page_thrashing = &block_thrashing->pages[page_index];
|
||
|
NvU64 last_time_stamp = page_thrashing_get_time_stamp(page_thrashing);
|
||
|
|
||
|
// It is not possible that a pinned page is migrated here, since the
|
||
|
// fault that triggered the migration should have unpinned it in its
|
||
|
// call to uvm_perf_thrashing_get_hint. Moreover page prefetching never
|
||
|
// includes pages that are thrashing (including pinning)
|
||
|
if (event_id == UVM_PERF_EVENT_MIGRATION)
|
||
|
UVM_ASSERT(page_thrashing->pinned == 0);
|
||
|
|
||
|
uvm_processor_mask_set(&page_thrashing->processors, processor_id);
|
||
|
page_thrashing_set_time_stamp(page_thrashing, time_stamp);
|
||
|
|
||
|
if (last_time_stamp == 0)
|
||
|
continue;
|
||
|
|
||
|
if (time_stamp - last_time_stamp <= va_space_thrashing->params.lapse_ns) {
|
||
|
UVM_PERF_SATURATING_INC(page_thrashing->num_thrashing_events);
|
||
|
if (page_thrashing->num_thrashing_events == va_space_thrashing->params.threshold)
|
||
|
thrashing_detected(va_block, block_thrashing, page_thrashing, page_index, processor_id);
|
||
|
|
||
|
if (page_thrashing->num_thrashing_events >= va_space_thrashing->params.threshold)
|
||
|
block_thrashing->last_thrashing_time_stamp = time_stamp;
|
||
|
|
||
|
if (event_id == UVM_PERF_EVENT_MIGRATION)
|
||
|
page_thrashing->has_migration_events = true;
|
||
|
else
|
||
|
page_thrashing->has_revocation_events = true;
|
||
|
}
|
||
|
else if (page_thrashing->num_thrashing_events >= va_space_thrashing->params.threshold &&
|
||
|
!page_thrashing->pinned) {
|
||
|
thrashing_reset_page(va_space_thrashing, va_block, block_thrashing, page_index);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
done:
|
||
|
block_thrashing->last_time_stamp = time_stamp;
|
||
|
block_thrashing->last_processor = processor_id;
|
||
|
}
|
||
|
|
||
|
static bool thrashing_processors_can_access(uvm_va_space_t *va_space,
|
||
|
page_thrashing_info_t *page_thrashing,
|
||
|
uvm_processor_id_t to)
|
||
|
{
|
||
|
if (UVM_ID_IS_INVALID(to))
|
||
|
return false;
|
||
|
|
||
|
return uvm_processor_mask_subset(&page_thrashing->processors,
|
||
|
&va_space->accessible_from[uvm_id_value(to)]);
|
||
|
}
|
||
|
|
||
|
static bool thrashing_processors_have_fast_access_to(uvm_va_space_t *va_space,
|
||
|
page_thrashing_info_t *page_thrashing,
|
||
|
uvm_processor_id_t to)
|
||
|
{
|
||
|
uvm_processor_mask_t fast_to;
|
||
|
|
||
|
if (UVM_ID_IS_INVALID(to))
|
||
|
return false;
|
||
|
|
||
|
// Combine NVLINK and native atomics mask since we could have PCIe
|
||
|
// atomics in the future
|
||
|
uvm_processor_mask_and(&fast_to,
|
||
|
&va_space->has_nvlink[uvm_id_value(to)],
|
||
|
&va_space->has_native_atomics[uvm_id_value(to)]);
|
||
|
uvm_processor_mask_set(&fast_to, to);
|
||
|
|
||
|
return uvm_processor_mask_subset(&page_thrashing->processors, &fast_to);
|
||
|
}
|
||
|
|
||
|
static void thrashing_processors_common_locations(uvm_va_space_t *va_space,
|
||
|
page_thrashing_info_t *page_thrashing,
|
||
|
uvm_processor_mask_t *common_locations)
|
||
|
{
|
||
|
bool is_first = true;
|
||
|
uvm_processor_id_t id;
|
||
|
|
||
|
// Find processors that can be accessed from all thrashing processors. For
|
||
|
// example: if A, B and C are thrashing, and A can access B and C can access
|
||
|
// B, too, B would be the common location.
|
||
|
uvm_processor_mask_zero(common_locations);
|
||
|
|
||
|
for_each_id_in_mask(id, &page_thrashing->processors) {
|
||
|
if (is_first)
|
||
|
uvm_processor_mask_copy(common_locations, &va_space->can_access[uvm_id_value(id)]);
|
||
|
else
|
||
|
uvm_processor_mask_and(common_locations, common_locations, &va_space->can_access[uvm_id_value(id)]);
|
||
|
|
||
|
is_first = false;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static bool preferred_location_is_thrashing(uvm_processor_id_t preferred_location,
|
||
|
page_thrashing_info_t *page_thrashing)
|
||
|
{
|
||
|
if (UVM_ID_IS_INVALID(preferred_location))
|
||
|
return false;
|
||
|
|
||
|
return uvm_processor_mask_test(&page_thrashing->processors, preferred_location);
|
||
|
}
|
||
|
|
||
|
static uvm_perf_thrashing_hint_t get_hint_for_migration_thrashing(va_space_thrashing_info_t *va_space_thrashing,
|
||
|
uvm_va_block_t *va_block,
|
||
|
uvm_page_index_t page_index,
|
||
|
page_thrashing_info_t *page_thrashing,
|
||
|
uvm_processor_id_t requester)
|
||
|
{
|
||
|
uvm_perf_thrashing_hint_t hint;
|
||
|
uvm_processor_id_t closest_resident_id;
|
||
|
uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
|
||
|
uvm_processor_id_t do_not_throttle_processor = page_thrashing->do_not_throttle_processor_id;
|
||
|
uvm_processor_id_t pinned_residency = page_thrashing->pinned_residency_id;
|
||
|
uvm_va_policy_t *policy;
|
||
|
uvm_processor_id_t preferred_location;
|
||
|
|
||
|
policy = uvm_va_policy_get(va_block, uvm_va_block_cpu_page_address(va_block, page_index));
|
||
|
|
||
|
preferred_location = policy->preferred_location;
|
||
|
|
||
|
hint.type = UVM_PERF_THRASHING_HINT_TYPE_NONE;
|
||
|
|
||
|
closest_resident_id = uvm_va_block_page_get_closest_resident(va_block, page_index, requester);
|
||
|
UVM_ASSERT(UVM_ID_IS_VALID(closest_resident_id));
|
||
|
|
||
|
if (thrashing_processors_can_access(va_space, page_thrashing, preferred_location)) {
|
||
|
// The logic in uvm_va_block_select_residency chooses the preferred
|
||
|
// location if the requester can access it, so all processors should
|
||
|
// naturally get mapped to the preferred without thrashing. However,
|
||
|
// we can get here if preferred location was set after processors
|
||
|
// started thrashing.
|
||
|
//
|
||
|
// TODO: Bug 2527408. Reset thrashing history when a user policy
|
||
|
// changes in a VA block.
|
||
|
hint.type = UVM_PERF_THRASHING_HINT_TYPE_PIN;
|
||
|
hint.pin.residency = preferred_location;
|
||
|
}
|
||
|
else if (!preferred_location_is_thrashing(preferred_location, page_thrashing) &&
|
||
|
thrashing_processors_have_fast_access_to(va_space, page_thrashing, closest_resident_id)) {
|
||
|
// This is a fast path for those scenarios in which all thrashing
|
||
|
// processors have fast (NVLINK + native atomics) access to the current
|
||
|
// residency. This is skipped if the preferred location is thrashing and
|
||
|
// not accessible by the rest of thrashing processors. Otherwise, we
|
||
|
// would be in the condition above.
|
||
|
if (UVM_ID_IS_CPU(closest_resident_id)) {
|
||
|
// On P9 systems, we prefer the CPU to map vidmem (since it can
|
||
|
// cache it), so don't map the GPU to sysmem.
|
||
|
if (UVM_ID_IS_GPU(requester)) {
|
||
|
hint.type = UVM_PERF_THRASHING_HINT_TYPE_PIN;
|
||
|
hint.pin.residency = requester;
|
||
|
}
|
||
|
}
|
||
|
else {
|
||
|
hint.type = UVM_PERF_THRASHING_HINT_TYPE_PIN;
|
||
|
hint.pin.residency = closest_resident_id;
|
||
|
}
|
||
|
}
|
||
|
else if (uvm_id_equal(requester, preferred_location)) {
|
||
|
if (page_thrashing->pinned) {
|
||
|
// If the faulting processor is the preferred location, we can
|
||
|
// only:
|
||
|
// 1) Pin to the preferred location
|
||
|
// 2) Throttle if it's pinned elsewhere and we are not the
|
||
|
// do_not_throttle_processor
|
||
|
if (uvm_id_equal(preferred_location, pinned_residency) ||
|
||
|
uvm_id_equal(preferred_location, do_not_throttle_processor)) {
|
||
|
hint.type = UVM_PERF_THRASHING_HINT_TYPE_PIN;
|
||
|
hint.pin.residency = preferred_location;
|
||
|
}
|
||
|
else {
|
||
|
hint.type = UVM_PERF_THRASHING_HINT_TYPE_THROTTLE;
|
||
|
}
|
||
|
}
|
||
|
else if (!uvm_id_equal(preferred_location, do_not_throttle_processor)) {
|
||
|
hint.type = UVM_PERF_THRASHING_HINT_TYPE_THROTTLE;
|
||
|
}
|
||
|
else if (page_thrashing->throttling_count >= va_space_thrashing->params.pin_threshold) {
|
||
|
hint.type = UVM_PERF_THRASHING_HINT_TYPE_PIN;
|
||
|
hint.pin.residency = preferred_location;
|
||
|
}
|
||
|
}
|
||
|
else if (page_thrashing->pinned) {
|
||
|
// 1) If the requester is the do_not_throttle_processor pin it to the
|
||
|
// requester if all thrashing processors can access the requester,
|
||
|
// or to a common location, or to the requester anyway if no common
|
||
|
// location found.
|
||
|
// 2) Try to map the current pinned residency.
|
||
|
// 3) Throttle.
|
||
|
if (uvm_id_equal(requester, do_not_throttle_processor)) {
|
||
|
hint.type = UVM_PERF_THRASHING_HINT_TYPE_PIN;
|
||
|
|
||
|
if (thrashing_processors_can_access(va_space, page_thrashing, requester)) {
|
||
|
hint.pin.residency = requester;
|
||
|
}
|
||
|
else {
|
||
|
uvm_processor_mask_t common_locations;
|
||
|
|
||
|
thrashing_processors_common_locations(va_space, page_thrashing, &common_locations);
|
||
|
if (uvm_processor_mask_empty(&common_locations)) {
|
||
|
hint.pin.residency = requester;
|
||
|
}
|
||
|
else {
|
||
|
// Find the common location that is closest to the requester
|
||
|
hint.pin.residency = uvm_processor_mask_find_closest_id(va_space, &common_locations, requester);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
else if (uvm_processor_mask_test(&va_space->accessible_from[uvm_id_value(page_thrashing->pinned_residency_id)], requester)) {
|
||
|
UVM_ASSERT(uvm_id_equal(closest_resident_id, pinned_residency));
|
||
|
|
||
|
hint.type = UVM_PERF_THRASHING_HINT_TYPE_PIN;
|
||
|
hint.pin.residency = pinned_residency;
|
||
|
}
|
||
|
else {
|
||
|
hint.type = UVM_PERF_THRASHING_HINT_TYPE_THROTTLE;
|
||
|
}
|
||
|
}
|
||
|
else if (!uvm_id_equal(requester, do_not_throttle_processor)) {
|
||
|
hint.type = UVM_PERF_THRASHING_HINT_TYPE_THROTTLE;
|
||
|
}
|
||
|
else if (page_thrashing->throttling_count >= va_space_thrashing->params.pin_threshold) {
|
||
|
hint.type = UVM_PERF_THRASHING_HINT_TYPE_PIN;
|
||
|
hint.pin.residency = requester;
|
||
|
}
|
||
|
|
||
|
if (hint.type == UVM_PERF_THRASHING_HINT_TYPE_PIN &&
|
||
|
!uvm_va_space_processor_has_memory(va_space, hint.pin.residency))
|
||
|
hint.pin.residency = UVM_ID_CPU;
|
||
|
|
||
|
return hint;
|
||
|
}
|
||
|
|
||
|
// Function called on fault that tells the fault handler if any operation
|
||
|
// should be performed to minimize thrashing. The logic is as follows:
|
||
|
//
|
||
|
// - Phase0: Block thrashing. If a number of consecutive thrashing events have
|
||
|
// been detected on the VA block, per-page thrashing tracking information is
|
||
|
// created.
|
||
|
// - Phase1: Throttling. When several processors fight over a page, we start a
|
||
|
// "throttling period". During that period, only one processor will be able
|
||
|
// to service faults on the page, and the rest will be throttled. All CPU
|
||
|
// faults are considered to belong to the same device, even if they come from
|
||
|
// different CPU threads.
|
||
|
// - Phase2: Pinning. After a number of consecutive throttling periods, the page
|
||
|
// is pinned on a specific processor which all of the thrashing processors can
|
||
|
// access.
|
||
|
// - Phase3: Revocation throttling. Even if the page is pinned, it can be still
|
||
|
// thrashing due to revocation events (mainly due to system-wide atomics). In
|
||
|
// that case we keep the page pinned while applying the same algorithm as in
|
||
|
// Phase1.
|
||
|
uvm_perf_thrashing_hint_t uvm_perf_thrashing_get_hint(uvm_va_block_t *va_block,
|
||
|
NvU64 address,
|
||
|
uvm_processor_id_t requester)
|
||
|
{
|
||
|
uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
|
||
|
va_space_thrashing_info_t *va_space_thrashing = va_space_thrashing_info_get(va_space);
|
||
|
block_thrashing_info_t *block_thrashing = NULL;
|
||
|
page_thrashing_info_t *page_thrashing = NULL;
|
||
|
uvm_perf_thrashing_hint_t hint;
|
||
|
uvm_page_index_t page_index = uvm_va_block_cpu_page_index(va_block, address);
|
||
|
NvU64 time_stamp;
|
||
|
NvU64 last_time_stamp;
|
||
|
|
||
|
hint.type = UVM_PERF_THRASHING_HINT_TYPE_NONE;
|
||
|
|
||
|
if (!va_space_thrashing->params.enable)
|
||
|
return hint;
|
||
|
|
||
|
// If we don't have enough memory to store thrashing information, we assume
|
||
|
// no thrashing
|
||
|
block_thrashing = thrashing_info_get(va_block);
|
||
|
if (!block_thrashing)
|
||
|
return hint;
|
||
|
|
||
|
// If the per-page tracking structure has not been created yet, we assume
|
||
|
// no thrashing
|
||
|
if (!block_thrashing->pages)
|
||
|
return hint;
|
||
|
|
||
|
time_stamp = NV_GETTIME();
|
||
|
|
||
|
if (block_thrashing->last_thrashing_time_stamp != 0 &&
|
||
|
(time_stamp - block_thrashing->last_thrashing_time_stamp > va_space_thrashing->params.epoch_ns) &&
|
||
|
block_thrashing->pinned_pages.count == 0 &&
|
||
|
block_thrashing->thrashing_reset_count < va_space_thrashing->params.max_resets) {
|
||
|
uvm_page_index_t reset_page_index;
|
||
|
|
||
|
++block_thrashing->thrashing_reset_count;
|
||
|
|
||
|
// Clear the state of throttled processors to make sure that we flush
|
||
|
// any pending ThrottlingEnd events
|
||
|
for_each_va_block_page_in_mask(reset_page_index, &block_thrashing->thrashing_pages, va_block) {
|
||
|
thrashing_throttling_reset_page(va_block,
|
||
|
block_thrashing,
|
||
|
&block_thrashing->pages[reset_page_index],
|
||
|
reset_page_index);
|
||
|
}
|
||
|
|
||
|
// Reset per-page tracking structure
|
||
|
// TODO: Bug 1769904 [uvm] Speculatively unpin pages that were pinned on a specific memory due to thrashing
|
||
|
UVM_ASSERT(uvm_page_mask_empty(&block_thrashing->pinned_pages.mask));
|
||
|
uvm_kvfree(block_thrashing->pages);
|
||
|
block_thrashing->pages = NULL;
|
||
|
block_thrashing->num_thrashing_pages = 0;
|
||
|
block_thrashing->last_processor = UVM_ID_INVALID;
|
||
|
block_thrashing->last_time_stamp = 0;
|
||
|
block_thrashing->last_thrashing_time_stamp = 0;
|
||
|
uvm_page_mask_zero(&block_thrashing->thrashing_pages);
|
||
|
goto done;
|
||
|
}
|
||
|
|
||
|
page_thrashing = &block_thrashing->pages[page_index];
|
||
|
|
||
|
// Not enough thrashing events yet
|
||
|
if (page_thrashing->num_thrashing_events < va_space_thrashing->params.threshold)
|
||
|
goto done;
|
||
|
|
||
|
// If the requesting processor is throttled, check the throttling end time
|
||
|
// stamp
|
||
|
if (uvm_processor_mask_test(&page_thrashing->throttled_processors, requester)) {
|
||
|
NvU64 throttling_end_time_stamp = page_thrashing_get_throttling_end_time_stamp(page_thrashing);
|
||
|
if (time_stamp < throttling_end_time_stamp &&
|
||
|
!uvm_id_equal(requester, page_thrashing->do_not_throttle_processor_id)) {
|
||
|
hint.type = UVM_PERF_THRASHING_HINT_TYPE_THROTTLE;
|
||
|
goto done;
|
||
|
}
|
||
|
|
||
|
thrashing_throttle_end_processor(va_block, block_thrashing, page_thrashing, page_index, requester);
|
||
|
}
|
||
|
|
||
|
UVM_ASSERT(!uvm_processor_mask_test(&page_thrashing->throttled_processors, requester));
|
||
|
|
||
|
last_time_stamp = page_thrashing_get_time_stamp(page_thrashing);
|
||
|
|
||
|
// If the lapse since the last thrashing event is longer than a thrashing
|
||
|
// lapse we are no longer thrashing
|
||
|
if (time_stamp - last_time_stamp > va_space_thrashing->params.lapse_ns &&
|
||
|
!page_thrashing->pinned) {
|
||
|
goto done;
|
||
|
}
|
||
|
|
||
|
// Set the requesting processor in the thrashing processors mask
|
||
|
uvm_processor_mask_set(&page_thrashing->processors, requester);
|
||
|
|
||
|
UVM_ASSERT(page_thrashing->has_migration_events || page_thrashing->has_revocation_events);
|
||
|
|
||
|
// Update throttling heuristics
|
||
|
thrashing_throttle_update(va_space_thrashing, va_block, page_thrashing, requester, time_stamp);
|
||
|
|
||
|
if (page_thrashing->pinned &&
|
||
|
page_thrashing->has_revocation_events &&
|
||
|
!uvm_id_equal(requester, page_thrashing->do_not_throttle_processor_id)) {
|
||
|
|
||
|
// When we get revocation thrashing, this is due to system-wide atomics
|
||
|
// downgrading the permissions of other processors. Revocations only
|
||
|
// happen when several processors are mapping the same page and there
|
||
|
// are no migrations. In this case, the only thing we can do is to
|
||
|
// throttle the execution of the processors.
|
||
|
hint.type = UVM_PERF_THRASHING_HINT_TYPE_THROTTLE;
|
||
|
}
|
||
|
else {
|
||
|
hint = get_hint_for_migration_thrashing(va_space_thrashing,
|
||
|
va_block,
|
||
|
page_index,
|
||
|
page_thrashing,
|
||
|
requester);
|
||
|
}
|
||
|
|
||
|
done:
|
||
|
if (hint.type == UVM_PERF_THRASHING_HINT_TYPE_PIN) {
|
||
|
NV_STATUS status = thrashing_pin_page(va_space_thrashing,
|
||
|
va_block,
|
||
|
block_thrashing,
|
||
|
page_thrashing,
|
||
|
page_index,
|
||
|
time_stamp,
|
||
|
hint.pin.residency,
|
||
|
requester);
|
||
|
|
||
|
// If there was some problem pinning the page (i.e. OOM), demote to
|
||
|
// throttling)
|
||
|
if (status != NV_OK) {
|
||
|
hint.type = UVM_PERF_THRASHING_HINT_TYPE_THROTTLE;
|
||
|
}
|
||
|
else {
|
||
|
if (uvm_id_equal(hint.pin.residency, requester))
|
||
|
PROCESSOR_THRASHING_STATS_INC(va_space, requester, num_pin_local);
|
||
|
else
|
||
|
PROCESSOR_THRASHING_STATS_INC(va_space, requester, num_pin_remote);
|
||
|
|
||
|
uvm_processor_mask_copy(&hint.pin.processors, &page_thrashing->processors);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (hint.type == UVM_PERF_THRASHING_HINT_TYPE_THROTTLE) {
|
||
|
thrashing_throttle_processor(va_block,
|
||
|
block_thrashing,
|
||
|
page_thrashing,
|
||
|
page_index,
|
||
|
requester);
|
||
|
|
||
|
PROCESSOR_THRASHING_STATS_INC(va_space, requester, num_throttle);
|
||
|
|
||
|
hint.throttle.end_time_stamp = page_thrashing_get_throttling_end_time_stamp(page_thrashing);
|
||
|
}
|
||
|
else if (hint.type == UVM_PERF_THRASHING_HINT_TYPE_NONE && page_thrashing) {
|
||
|
UVM_ASSERT(!uvm_processor_mask_test(&page_thrashing->throttled_processors, requester));
|
||
|
UVM_ASSERT(!page_thrashing->pinned);
|
||
|
UVM_ASSERT(UVM_ID_IS_INVALID(page_thrashing->pinned_residency_id));
|
||
|
}
|
||
|
|
||
|
return hint;
|
||
|
}
|
||
|
|
||
|
uvm_processor_mask_t *uvm_perf_thrashing_get_thrashing_processors(uvm_va_block_t *va_block, NvU64 address)
|
||
|
{
|
||
|
uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
|
||
|
va_space_thrashing_info_t *va_space_thrashing = va_space_thrashing_info_get(va_space);
|
||
|
block_thrashing_info_t *block_thrashing = NULL;
|
||
|
page_thrashing_info_t *page_thrashing = NULL;
|
||
|
uvm_page_index_t page_index = uvm_va_block_cpu_page_index(va_block, address);
|
||
|
|
||
|
UVM_ASSERT(g_uvm_perf_thrashing_enable);
|
||
|
UVM_ASSERT(va_space_thrashing->params.enable);
|
||
|
|
||
|
block_thrashing = thrashing_info_get(va_block);
|
||
|
UVM_ASSERT(block_thrashing);
|
||
|
|
||
|
UVM_ASSERT(block_thrashing->pages);
|
||
|
|
||
|
page_thrashing = &block_thrashing->pages[page_index];
|
||
|
|
||
|
return &page_thrashing->processors;
|
||
|
}
|
||
|
|
||
|
const uvm_page_mask_t *uvm_perf_thrashing_get_thrashing_pages(uvm_va_block_t *va_block)
|
||
|
{
|
||
|
uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
|
||
|
va_space_thrashing_info_t *va_space_thrashing = va_space_thrashing_info_get(va_space);
|
||
|
block_thrashing_info_t *block_thrashing = NULL;
|
||
|
|
||
|
if (!va_space_thrashing->params.enable)
|
||
|
return NULL;
|
||
|
|
||
|
block_thrashing = thrashing_info_get(va_block);
|
||
|
if (!block_thrashing)
|
||
|
return NULL;
|
||
|
|
||
|
if (block_thrashing->num_thrashing_pages == 0)
|
||
|
return NULL;
|
||
|
|
||
|
return &block_thrashing->thrashing_pages;
|
||
|
}
|
||
|
|
||
|
bool uvm_perf_thrashing_is_block_thrashing(uvm_va_block_t *va_block)
|
||
|
{
|
||
|
uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
|
||
|
va_space_thrashing_info_t *va_space_thrashing = va_space_thrashing_info_get(va_space);
|
||
|
block_thrashing_info_t *block_thrashing = NULL;
|
||
|
|
||
|
if (!va_space_thrashing->params.enable)
|
||
|
return false;
|
||
|
|
||
|
block_thrashing = thrashing_info_get(va_block);
|
||
|
if (!block_thrashing)
|
||
|
return false;
|
||
|
|
||
|
return block_thrashing->num_thrashing_pages > 0;
|
||
|
}
|
||
|
|
||
|
#define TIMER_GRANULARITY_NS 20000ULL
|
||
|
static void thrashing_unpin_pages(struct work_struct *work)
|
||
|
{
|
||
|
struct delayed_work *dwork = to_delayed_work(work);
|
||
|
va_space_thrashing_info_t *va_space_thrashing = container_of(dwork, va_space_thrashing_info_t, pinned_pages.dwork);
|
||
|
uvm_va_space_t *va_space = va_space_thrashing->va_space;
|
||
|
|
||
|
UVM_ASSERT(uvm_va_space_initialized(va_space) == NV_OK);
|
||
|
|
||
|
// Take the VA space lock so that VA blocks don't go away during this
|
||
|
// operation.
|
||
|
uvm_va_space_down_read(va_space);
|
||
|
|
||
|
if (va_space_thrashing->pinned_pages.in_va_space_teardown)
|
||
|
goto exit_no_list_lock;
|
||
|
|
||
|
while (1) {
|
||
|
pinned_page_t *pinned_page;
|
||
|
uvm_va_block_t *va_block;
|
||
|
|
||
|
uvm_spin_lock(&va_space_thrashing->pinned_pages.lock);
|
||
|
pinned_page = list_first_entry_or_null(&va_space_thrashing->pinned_pages.list,
|
||
|
pinned_page_t,
|
||
|
va_space_list_entry);
|
||
|
|
||
|
if (pinned_page) {
|
||
|
NvU64 now = NV_GETTIME();
|
||
|
|
||
|
if (pinned_page->deadline <= (now + TIMER_GRANULARITY_NS)) {
|
||
|
list_del_init(&pinned_page->va_space_list_entry);
|
||
|
|
||
|
// Work cancellation is left to thrashing_unpin_page() as this
|
||
|
// would only catch the following pattern:
|
||
|
// - Worker thread A is in thrashing_unpin_pages but hasn't
|
||
|
// looked at the list yet
|
||
|
// - Thread B then removes the last entry
|
||
|
// - Thread C then adds a new entry and re-schedules work
|
||
|
// - Worker thread A removes the entry added by C because the
|
||
|
// deadline has passed (unlikely), then cancels the work
|
||
|
// scheduled by C.
|
||
|
}
|
||
|
else {
|
||
|
NvU64 elapsed_us = (pinned_page->deadline - now) / 1000;
|
||
|
|
||
|
schedule_delayed_work(&va_space_thrashing->pinned_pages.dwork, usecs_to_jiffies(elapsed_us));
|
||
|
uvm_spin_unlock(&va_space_thrashing->pinned_pages.lock);
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
uvm_spin_unlock(&va_space_thrashing->pinned_pages.lock);
|
||
|
|
||
|
if (!pinned_page)
|
||
|
break;
|
||
|
|
||
|
va_block = pinned_page->va_block;
|
||
|
uvm_mutex_lock(&va_block->lock);
|
||
|
|
||
|
// Only operate if the pinned page's tracking state isn't already
|
||
|
// cleared by thrashing_unpin_page()
|
||
|
if (!list_empty(&pinned_page->va_block_list_entry)) {
|
||
|
uvm_page_index_t page_index = pinned_page->page_index;
|
||
|
block_thrashing_info_t *block_thrashing = thrashing_info_get(va_block);
|
||
|
|
||
|
UVM_ASSERT(block_thrashing);
|
||
|
UVM_ASSERT(uvm_page_mask_test(&block_thrashing->pinned_pages.mask, page_index));
|
||
|
|
||
|
va_space_thrashing->pinned_pages.va_block_context.policy =
|
||
|
uvm_va_policy_get(va_block, uvm_va_block_cpu_page_address(va_block, page_index));
|
||
|
|
||
|
unmap_remote_pinned_pages_from_all_processors(va_block,
|
||
|
&va_space_thrashing->pinned_pages.va_block_context,
|
||
|
uvm_va_block_region_for_page(page_index));
|
||
|
thrashing_reset_page(va_space_thrashing, va_block, block_thrashing, page_index);
|
||
|
}
|
||
|
|
||
|
uvm_mutex_unlock(&va_block->lock);
|
||
|
kmem_cache_free(g_pinned_page_cache, pinned_page);
|
||
|
}
|
||
|
|
||
|
exit_no_list_lock:
|
||
|
uvm_va_space_up_read(va_space);
|
||
|
}
|
||
|
|
||
|
static void thrashing_unpin_pages_entry(struct work_struct *work)
|
||
|
{
|
||
|
UVM_ENTRY_VOID(thrashing_unpin_pages(work));
|
||
|
}
|
||
|
|
||
|
NV_STATUS uvm_perf_thrashing_load(uvm_va_space_t *va_space)
|
||
|
{
|
||
|
va_space_thrashing_info_t *va_space_thrashing;
|
||
|
NV_STATUS status;
|
||
|
|
||
|
status = uvm_perf_module_load(&g_module_thrashing, va_space);
|
||
|
if (status != NV_OK)
|
||
|
return status;
|
||
|
|
||
|
va_space_thrashing = va_space_thrashing_info_create(va_space);
|
||
|
if (!va_space_thrashing)
|
||
|
return NV_ERR_NO_MEMORY;
|
||
|
|
||
|
uvm_spin_lock_init(&va_space_thrashing->pinned_pages.lock, UVM_LOCK_ORDER_LEAF);
|
||
|
INIT_LIST_HEAD(&va_space_thrashing->pinned_pages.list);
|
||
|
INIT_DELAYED_WORK(&va_space_thrashing->pinned_pages.dwork, thrashing_unpin_pages_entry);
|
||
|
|
||
|
return NV_OK;
|
||
|
}
|
||
|
|
||
|
void uvm_perf_thrashing_stop(uvm_va_space_t *va_space)
|
||
|
{
|
||
|
va_space_thrashing_info_t *va_space_thrashing;
|
||
|
|
||
|
uvm_va_space_down_write(va_space);
|
||
|
va_space_thrashing = va_space_thrashing_info_get_or_null(va_space);
|
||
|
|
||
|
// Prevent further unpinning operations from being scheduled
|
||
|
if (va_space_thrashing)
|
||
|
va_space_thrashing->pinned_pages.in_va_space_teardown = true;
|
||
|
|
||
|
uvm_va_space_up_write(va_space);
|
||
|
|
||
|
// Cancel any pending work. We can safely access va_space_thrashing
|
||
|
// because this function is called once from the VA space teardown path,
|
||
|
// and the only function that frees it is uvm_perf_thrashing_unload,
|
||
|
// which is called later in the teardown path.
|
||
|
if (va_space_thrashing)
|
||
|
(void)cancel_delayed_work_sync(&va_space_thrashing->pinned_pages.dwork);
|
||
|
}
|
||
|
|
||
|
void uvm_perf_thrashing_unload(uvm_va_space_t *va_space)
|
||
|
{
|
||
|
va_space_thrashing_info_t *va_space_thrashing = va_space_thrashing_info_get_or_null(va_space);
|
||
|
|
||
|
uvm_perf_module_unload(&g_module_thrashing, va_space);
|
||
|
|
||
|
// Make sure that there are not pending work items
|
||
|
if (va_space_thrashing) {
|
||
|
UVM_ASSERT(va_space_thrashing->pinned_pages.in_va_space_teardown);
|
||
|
UVM_ASSERT(list_empty(&va_space_thrashing->pinned_pages.list));
|
||
|
|
||
|
va_space_thrashing_info_destroy(va_space);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
NV_STATUS uvm_perf_thrashing_register_gpu(uvm_va_space_t *va_space, uvm_gpu_t *gpu)
|
||
|
{
|
||
|
// If a simulated GPU is registered, re-initialize thrashing parameters in
|
||
|
// case they need to be adjusted
|
||
|
if (g_uvm_global.num_simulated_devices > 0) {
|
||
|
va_space_thrashing_info_t *va_space_thrashing = va_space_thrashing_info_get(va_space);
|
||
|
|
||
|
if (!va_space_thrashing->params.test_overrides)
|
||
|
va_space_thrashing_info_init_params(va_space_thrashing);
|
||
|
}
|
||
|
|
||
|
return NV_OK;
|
||
|
}
|
||
|
|
||
|
NV_STATUS uvm_perf_thrashing_init()
|
||
|
{
|
||
|
NV_STATUS status;
|
||
|
|
||
|
INIT_THRASHING_PARAMETER_TOGGLE(uvm_perf_thrashing_enable, UVM_PERF_THRASHING_ENABLE_DEFAULT);
|
||
|
if (!g_uvm_perf_thrashing_enable)
|
||
|
return NV_OK;
|
||
|
|
||
|
uvm_perf_module_init("perf_thrashing",
|
||
|
UVM_PERF_MODULE_TYPE_THRASHING,
|
||
|
g_callbacks_thrashing,
|
||
|
ARRAY_SIZE(g_callbacks_thrashing),
|
||
|
&g_module_thrashing);
|
||
|
|
||
|
INIT_THRASHING_PARAMETER_NONZERO_MAX(uvm_perf_thrashing_threshold,
|
||
|
UVM_PERF_THRASHING_THRESHOLD_DEFAULT,
|
||
|
UVM_PERF_THRASHING_THRESHOLD_MAX);
|
||
|
|
||
|
INIT_THRASHING_PARAMETER_NONZERO_MAX(uvm_perf_thrashing_pin_threshold,
|
||
|
UVM_PERF_THRASHING_PIN_THRESHOLD_DEFAULT,
|
||
|
UVM_PERF_THRASHING_PIN_THRESHOLD_MAX);
|
||
|
|
||
|
INIT_THRASHING_PARAMETER_NONZERO(uvm_perf_thrashing_lapse_usec, UVM_PERF_THRASHING_LAPSE_USEC_DEFAULT);
|
||
|
|
||
|
INIT_THRASHING_PARAMETER_NONZERO_MAX(uvm_perf_thrashing_nap,
|
||
|
UVM_PERF_THRASHING_NAP_DEFAULT,
|
||
|
UVM_PERF_THRASHING_NAP_MAX);
|
||
|
|
||
|
|
||
|
INIT_THRASHING_PARAMETER_NONZERO(uvm_perf_thrashing_epoch, UVM_PERF_THRASHING_EPOCH_DEFAULT);
|
||
|
|
||
|
INIT_THRASHING_PARAMETER(uvm_perf_thrashing_pin, UVM_PERF_THRASHING_PIN_DEFAULT);
|
||
|
|
||
|
INIT_THRASHING_PARAMETER(uvm_perf_thrashing_max_resets, UVM_PERF_THRASHING_MAX_RESETS_DEFAULT);
|
||
|
|
||
|
g_va_block_thrashing_info_cache = NV_KMEM_CACHE_CREATE("uvm_block_thrashing_info_t", block_thrashing_info_t);
|
||
|
if (!g_va_block_thrashing_info_cache) {
|
||
|
status = NV_ERR_NO_MEMORY;
|
||
|
goto error;
|
||
|
}
|
||
|
|
||
|
g_pinned_page_cache = NV_KMEM_CACHE_CREATE("uvm_pinned_page_t", pinned_page_t);
|
||
|
if (!g_pinned_page_cache) {
|
||
|
status = NV_ERR_NO_MEMORY;
|
||
|
goto error;
|
||
|
}
|
||
|
|
||
|
status = cpu_thrashing_stats_init();
|
||
|
if (status != NV_OK)
|
||
|
goto error;
|
||
|
|
||
|
return NV_OK;
|
||
|
|
||
|
error:
|
||
|
uvm_perf_thrashing_exit();
|
||
|
|
||
|
return status;
|
||
|
}
|
||
|
|
||
|
void uvm_perf_thrashing_exit()
|
||
|
{
|
||
|
cpu_thrashing_stats_exit();
|
||
|
|
||
|
kmem_cache_destroy_safe(&g_va_block_thrashing_info_cache);
|
||
|
kmem_cache_destroy_safe(&g_pinned_page_cache);
|
||
|
}
|
||
|
|
||
|
NV_STATUS uvm_perf_thrashing_add_gpu(uvm_gpu_t *gpu)
|
||
|
{
|
||
|
if (!uvm_procfs_is_debug_enabled())
|
||
|
return NV_OK;
|
||
|
|
||
|
return gpu_thrashing_stats_create(gpu);
|
||
|
}
|
||
|
|
||
|
void uvm_perf_thrashing_remove_gpu(uvm_gpu_t *gpu)
|
||
|
{
|
||
|
gpu_thrashing_stats_destroy(gpu);
|
||
|
}
|
||
|
|
||
|
NV_STATUS uvm_test_get_page_thrashing_policy(UVM_TEST_GET_PAGE_THRASHING_POLICY_PARAMS *params, struct file *filp)
|
||
|
{
|
||
|
uvm_va_space_t *va_space = uvm_va_space_get(filp);
|
||
|
va_space_thrashing_info_t *va_space_thrashing;
|
||
|
|
||
|
uvm_va_space_down_read(va_space);
|
||
|
|
||
|
va_space_thrashing = va_space_thrashing_info_get(va_space);
|
||
|
|
||
|
if (va_space_thrashing->params.enable) {
|
||
|
params->policy = UVM_TEST_PAGE_THRASHING_POLICY_ENABLE;
|
||
|
params->nap_ns = va_space_thrashing->params.nap_ns;
|
||
|
params->pin_ns = va_space_thrashing->params.pin_ns;
|
||
|
params->map_remote_on_native_atomics_fault = uvm_perf_map_remote_on_native_atomics_fault != 0;
|
||
|
}
|
||
|
else {
|
||
|
params->policy = UVM_TEST_PAGE_THRASHING_POLICY_DISABLE;
|
||
|
}
|
||
|
|
||
|
uvm_va_space_up_read(va_space);
|
||
|
|
||
|
return NV_OK;
|
||
|
}
|
||
|
|
||
|
NV_STATUS uvm_test_set_page_thrashing_policy(UVM_TEST_SET_PAGE_THRASHING_POLICY_PARAMS *params, struct file *filp)
|
||
|
{
|
||
|
NV_STATUS status = NV_OK;
|
||
|
uvm_va_space_t *va_space = uvm_va_space_get(filp);
|
||
|
va_space_thrashing_info_t *va_space_thrashing;
|
||
|
|
||
|
if (params->policy >= UVM_TEST_PAGE_THRASHING_POLICY_MAX)
|
||
|
return NV_ERR_INVALID_ARGUMENT;
|
||
|
|
||
|
if (!g_uvm_perf_thrashing_enable)
|
||
|
return NV_ERR_INVALID_STATE;
|
||
|
|
||
|
uvm_va_space_down_write(va_space);
|
||
|
|
||
|
va_space_thrashing = va_space_thrashing_info_get(va_space);
|
||
|
va_space_thrashing->params.test_overrides = true;
|
||
|
|
||
|
if (params->policy == UVM_TEST_PAGE_THRASHING_POLICY_ENABLE) {
|
||
|
if (va_space_thrashing->params.enable)
|
||
|
goto done_unlock_va_space;
|
||
|
|
||
|
va_space_thrashing->params.pin_ns = params->pin_ns;
|
||
|
va_space_thrashing->params.enable = true;
|
||
|
}
|
||
|
else {
|
||
|
if (!va_space_thrashing->params.enable)
|
||
|
goto done_unlock_va_space;
|
||
|
|
||
|
va_space_thrashing->params.enable = false;
|
||
|
}
|
||
|
|
||
|
// When disabling thrashing detection, destroy the thrashing tracking
|
||
|
// information for all VA blocks and unpin pages
|
||
|
if (!va_space_thrashing->params.enable) {
|
||
|
uvm_va_range_t *va_range;
|
||
|
|
||
|
uvm_for_each_va_range(va_range, va_space) {
|
||
|
uvm_va_block_t *va_block;
|
||
|
|
||
|
if (va_range->type != UVM_VA_RANGE_TYPE_MANAGED)
|
||
|
continue;
|
||
|
|
||
|
for_each_va_block_in_va_range(va_range, va_block) {
|
||
|
uvm_va_block_region_t va_block_region = uvm_va_block_region_from_block(va_block);
|
||
|
uvm_va_block_context_t *block_context = uvm_va_space_block_context(va_space, NULL);
|
||
|
|
||
|
block_context->policy = uvm_va_range_get_policy(va_range);
|
||
|
|
||
|
uvm_mutex_lock(&va_block->lock);
|
||
|
|
||
|
// Unmap may split PTEs and require a retry. Needs to be called
|
||
|
// before the pinned pages information is destroyed.
|
||
|
status = UVM_VA_BLOCK_RETRY_LOCKED(va_block,
|
||
|
NULL,
|
||
|
unmap_remote_pinned_pages_from_all_processors(va_block,
|
||
|
block_context,
|
||
|
va_block_region));
|
||
|
|
||
|
thrashing_info_destroy(va_block);
|
||
|
|
||
|
uvm_mutex_unlock(&va_block->lock);
|
||
|
|
||
|
// Re-enable thrashing on failure to avoid getting asserts
|
||
|
// about having state while thrashing is disabled
|
||
|
if (status != NV_OK) {
|
||
|
va_space_thrashing->params.enable = true;
|
||
|
goto done_unlock_va_space;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
done_unlock_va_space:
|
||
|
uvm_va_space_up_write(va_space);
|
||
|
|
||
|
return status;
|
||
|
}
|