2022-05-09 22:18:59 +02:00
|
|
|
/*******************************************************************************
|
|
|
|
Copyright (c) 2015-2022 NVIDIA Corporation
|
|
|
|
|
|
|
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
|
|
of this software and associated documentation files (the "Software"), to
|
|
|
|
deal in the Software without restriction, including without limitation the
|
|
|
|
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
|
|
|
sell copies of the Software, and to permit persons to whom the Software is
|
|
|
|
furnished to do so, subject to the following conditions:
|
|
|
|
|
|
|
|
The above copyright notice and this permission notice shall be
|
|
|
|
included in all copies or substantial portions of the Software.
|
|
|
|
|
|
|
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
|
|
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
|
|
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
|
|
DEALINGS IN THE SOFTWARE.
|
|
|
|
|
|
|
|
*******************************************************************************/
|
|
|
|
|
|
|
|
#ifndef __UVM_PMM_GPU_H__
|
|
|
|
#define __UVM_PMM_GPU_H__
|
|
|
|
|
|
|
|
//
|
|
|
|
// The Physical Memory Manager (PMM) manages the life cycle of GPU physical
|
|
|
|
// memory.
|
|
|
|
//
|
|
|
|
// The memory is managed in GPU chunks of different sizes (uvm_chunk_size_t) and
|
|
|
|
// users of PMM need to explicitly register the chunk sizes they need to be
|
|
|
|
// supported (see chunk_size_init_func in uvm_pmm_gpu_init()).
|
|
|
|
//
|
|
|
|
// Two memory types (uvm_pmm_gpu_memory_type_t) are supported, one for user and
|
|
|
|
// one for kernel allocations. The user memory type is used only for backing
|
|
|
|
// user data managed by VA blocks and kernel memory type is used for everything
|
|
|
|
// else. The distinction exists to support oversubscription, which requires the
|
|
|
|
// ability to evict already allocated memory from its users on-demand to satisfy
|
|
|
|
// new memory allocations when no more unused memory is available. Eviction is
|
|
|
|
// limited to the user memory type as it's a very complex operation requiring
|
|
|
|
// integration between PMM and other UVM driver modules. The assumption is that
|
|
|
|
// the vast majority of memory should be used for user data as everything else
|
|
|
|
// can be considered overhead and should be minimized. Two flavors of
|
|
|
|
// oversubscription exist: internal oversubscription allowing PMM allocations to
|
|
|
|
// evict other PMM allocations and external oversubscription allowing other PMA
|
|
|
|
// clients to evict memory used by PMM.
|
|
|
|
//
|
|
|
|
// Both allocation and freeing of memory support asynchronous operations where
|
|
|
|
// the allocated/freed GPU memory chunks can have pending GPU operations
|
|
|
|
// returned when allocating memory and passed in when freeing it via trackers.
|
|
|
|
//
|
|
|
|
|
|
|
|
#include "uvm_forward_decl.h"
|
|
|
|
#include "uvm_lock.h"
|
|
|
|
#include "uvm_processors.h"
|
|
|
|
#include "uvm_tracker.h"
|
|
|
|
#include "uvm_va_block_types.h"
|
|
|
|
#include "uvm_linux.h"
|
|
|
|
#include "uvm_types.h"
|
|
|
|
#include "nv_uvm_types.h"
|
2023-02-28 20:12:44 +01:00
|
|
|
#if UVM_IS_CONFIG_HMM()
|
|
|
|
#include <linux/memremap.h>
|
|
|
|
#endif
|
2022-05-09 22:18:59 +02:00
|
|
|
|
|
|
|
typedef enum
|
|
|
|
{
|
|
|
|
UVM_CHUNK_SIZE_1 = 1ULL,
|
|
|
|
UVM_CHUNK_SIZE_2 = 2ULL,
|
|
|
|
UVM_CHUNK_SIZE_4 = 4ULL,
|
|
|
|
UVM_CHUNK_SIZE_8 = 8ULL,
|
|
|
|
UVM_CHUNK_SIZE_16 = 16ULL,
|
|
|
|
UVM_CHUNK_SIZE_32 = 32ULL,
|
|
|
|
UVM_CHUNK_SIZE_64 = 64ULL,
|
|
|
|
UVM_CHUNK_SIZE_128 = 128ULL,
|
|
|
|
UVM_CHUNK_SIZE_256 = 256ULL,
|
|
|
|
UVM_CHUNK_SIZE_512 = 512ULL,
|
|
|
|
UVM_CHUNK_SIZE_1K = 1024ULL,
|
|
|
|
UVM_CHUNK_SIZE_2K = 2*1024ULL,
|
|
|
|
UVM_CHUNK_SIZE_4K = 4*1024ULL,
|
|
|
|
UVM_CHUNK_SIZE_8K = 8*1024ULL,
|
|
|
|
UVM_CHUNK_SIZE_16K = 16*1024ULL,
|
|
|
|
UVM_CHUNK_SIZE_32K = 32*1024ULL,
|
|
|
|
UVM_CHUNK_SIZE_64K = 64*1024ULL,
|
|
|
|
UVM_CHUNK_SIZE_128K = 128*1024ULL,
|
|
|
|
UVM_CHUNK_SIZE_256K = 256*1024ULL,
|
|
|
|
UVM_CHUNK_SIZE_512K = 512*1024ULL,
|
|
|
|
UVM_CHUNK_SIZE_1M = 1024*1024ULL,
|
|
|
|
UVM_CHUNK_SIZE_2M = 2*1024*1024ULL,
|
|
|
|
UVM_CHUNK_SIZE_MAX = UVM_CHUNK_SIZE_2M,
|
|
|
|
UVM_CHUNK_SIZE_INVALID = UVM_CHUNK_SIZE_MAX * 2ULL
|
|
|
|
} uvm_chunk_size_t;
|
|
|
|
|
|
|
|
#define UVM_CHUNK_SIZES_MASK (uvm_chunk_sizes_mask_t)(UVM_CHUNK_SIZE_MAX | (UVM_CHUNK_SIZE_MAX-1))
|
|
|
|
|
|
|
|
typedef enum
|
|
|
|
{
|
|
|
|
// Memory type for backing user pages. On Pascal+ it can be evicted.
|
|
|
|
UVM_PMM_GPU_MEMORY_TYPE_USER,
|
2023-05-30 19:11:36 +02:00
|
|
|
// When the Confidential Computing feature is enabled, the protected flavor
|
|
|
|
// allocates memory out of the VPR region. When it's disabled, all flavors
|
|
|
|
// have no effects and are equivalent to the base type.
|
|
|
|
UVM_PMM_GPU_MEMORY_TYPE_USER_PROTECTED = UVM_PMM_GPU_MEMORY_TYPE_USER,
|
|
|
|
UVM_PMM_GPU_MEMORY_TYPE_USER_UNPROTECTED,
|
2022-05-09 22:18:59 +02:00
|
|
|
|
|
|
|
// Memory type for internal UVM allocations. It cannot be evicted.
|
|
|
|
UVM_PMM_GPU_MEMORY_TYPE_KERNEL,
|
2023-05-30 19:11:36 +02:00
|
|
|
// See user types for the behavior description when the Confidential
|
|
|
|
// Computing feature is ON or OFF.
|
|
|
|
UVM_PMM_GPU_MEMORY_TYPE_KERNEL_PROTECTED = UVM_PMM_GPU_MEMORY_TYPE_KERNEL,
|
|
|
|
UVM_PMM_GPU_MEMORY_TYPE_KERNEL_UNPROTECTED,
|
2022-05-09 22:18:59 +02:00
|
|
|
|
|
|
|
// Number of types - MUST BE LAST.
|
|
|
|
UVM_PMM_GPU_MEMORY_TYPE_COUNT
|
|
|
|
} uvm_pmm_gpu_memory_type_t;
|
|
|
|
|
|
|
|
const char *uvm_pmm_gpu_memory_type_string(uvm_pmm_gpu_memory_type_t type);
|
|
|
|
|
|
|
|
// Returns true if the given memory type is used to back user pages.
|
|
|
|
bool uvm_pmm_gpu_memory_type_is_user(uvm_pmm_gpu_memory_type_t type);
|
|
|
|
|
|
|
|
// Returns true if the given memory type is used to back internal UVM
|
|
|
|
// allocations.
|
|
|
|
static bool uvm_pmm_gpu_memory_type_is_kernel(uvm_pmm_gpu_memory_type_t type)
|
|
|
|
{
|
|
|
|
return !uvm_pmm_gpu_memory_type_is_user(type);
|
|
|
|
}
|
|
|
|
|
|
|
|
typedef enum
|
|
|
|
{
|
|
|
|
// Chunk belongs to PMA. Code outside PMM should not have access to
|
|
|
|
// it and it is likely a bug in UVM code (either in PMM or outside)
|
|
|
|
// if that happens.
|
|
|
|
UVM_PMM_GPU_CHUNK_STATE_PMA_OWNED,
|
|
|
|
|
|
|
|
// Chunk is on free list. That is it can be reused or returned to PMA
|
|
|
|
// as soon as its tracker is done. Code outside PMM should not have
|
|
|
|
// access to this chunk and it is likely a bug in UVM code (either in
|
|
|
|
// PMM or outside) if that happens.
|
|
|
|
UVM_PMM_GPU_CHUNK_STATE_FREE,
|
|
|
|
|
|
|
|
// Chunk is split into subchunks.
|
|
|
|
UVM_PMM_GPU_CHUNK_STATE_IS_SPLIT,
|
|
|
|
|
|
|
|
// Chunk is temporarily pinned.
|
|
|
|
//
|
|
|
|
// This state is used for user memory chunks that have been allocated, but haven't
|
|
|
|
// been unpinned yet and also internally when a chunk is about to be split.
|
|
|
|
UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED,
|
|
|
|
|
|
|
|
// Chunk is allocated. That is it is backing some VA block
|
|
|
|
UVM_PMM_GPU_CHUNK_STATE_ALLOCATED,
|
|
|
|
|
|
|
|
// Number of states - MUST BE LAST
|
|
|
|
UVM_PMM_GPU_CHUNK_STATE_COUNT
|
|
|
|
} uvm_pmm_gpu_chunk_state_t;
|
|
|
|
|
|
|
|
const char *uvm_pmm_gpu_chunk_state_string(uvm_pmm_gpu_chunk_state_t state);
|
|
|
|
|
|
|
|
typedef enum
|
|
|
|
{
|
|
|
|
// No flags passed
|
|
|
|
UVM_PMM_ALLOC_FLAGS_NONE,
|
|
|
|
|
|
|
|
// If there is no free memory, allocation may evict chunks instead of
|
|
|
|
// returning error immediately. Therefore it must not be called under the
|
|
|
|
// VA block lock.
|
|
|
|
UVM_PMM_ALLOC_FLAGS_EVICT = (1 << 0),
|
|
|
|
|
|
|
|
// Do not use batching in this call if PMA page allocaion is required
|
|
|
|
UVM_PMM_ALLOC_FLAGS_DONT_BATCH = (1 << 1),
|
|
|
|
|
|
|
|
UVM_PMM_ALLOC_FLAGS_MASK = (1 << 2) - 1
|
|
|
|
} uvm_pmm_alloc_flags_t;
|
|
|
|
|
|
|
|
|
|
|
|
typedef enum
|
|
|
|
{
|
|
|
|
// Identifier for lists with zeroed chunks
|
|
|
|
UVM_PMM_LIST_ZERO,
|
|
|
|
|
|
|
|
// Identifier for lists with non-zeroed chunks
|
|
|
|
UVM_PMM_LIST_NO_ZERO,
|
|
|
|
|
|
|
|
// Number of states for zeroed/non-zeroed chunk lists - MUST BE LAST
|
|
|
|
UVM_PMM_LIST_ZERO_COUNT
|
|
|
|
} uvm_pmm_list_zero_t;
|
|
|
|
|
|
|
|
static void uvm_pmm_list_zero_checks(void)
|
|
|
|
{
|
|
|
|
BUILD_BUG_ON(UVM_PMM_LIST_ZERO_COUNT > 2);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Maximum chunk sizes per type of allocation in single GPU.
|
|
|
|
// The worst case today is Maxwell with 4 allocations sizes for page tables and
|
|
|
|
// 2 page sizes used by uvm_mem_t. Notably one of the allocations for page
|
|
|
|
// tables is 2M which is our common root chunk size.
|
|
|
|
#define UVM_MAX_CHUNK_SIZES 6
|
|
|
|
|
|
|
|
// This specifies a maximum GAP between 2 allocation levels.
|
|
|
|
#define UVM_PMM_MAX_SUBCHUNKS UVM_CHUNK_SIZE_MAX
|
|
|
|
|
|
|
|
#define UVM_PMM_CHUNK_SPLIT_CACHE_SIZES (ilog2(UVM_PMM_MAX_SUBCHUNKS) + 1)
|
|
|
|
#define UVM_CHUNK_SIZE_MASK_SIZE (ilog2(UVM_CHUNK_SIZE_MAX) + 1)
|
|
|
|
|
|
|
|
typedef uvm_chunk_size_t uvm_chunk_sizes_mask_t;
|
|
|
|
|
|
|
|
typedef struct uvm_pmm_gpu_chunk_suballoc_struct uvm_pmm_gpu_chunk_suballoc_t;
|
|
|
|
|
2023-02-28 20:12:44 +01:00
|
|
|
#if UVM_IS_CONFIG_HMM()
|
|
|
|
|
|
|
|
typedef struct uvm_pmm_gpu_struct uvm_pmm_gpu_t;
|
|
|
|
|
|
|
|
typedef struct
|
|
|
|
{
|
|
|
|
struct dev_pagemap pagemap;
|
|
|
|
} uvm_pmm_gpu_devmem_t;
|
|
|
|
|
|
|
|
// Return the GPU chunk for a given device private struct page.
|
|
|
|
uvm_gpu_chunk_t *uvm_pmm_devmem_page_to_chunk(struct page *page);
|
|
|
|
|
|
|
|
// Return the GPU id for a given device private struct page.
|
|
|
|
uvm_gpu_id_t uvm_pmm_devmem_page_to_gpu_id(struct page *page);
|
|
|
|
|
|
|
|
// Return the PFN of the device private struct page for the given GPU chunk.
|
|
|
|
unsigned long uvm_pmm_gpu_devmem_get_pfn(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk);
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
2022-05-09 22:18:59 +02:00
|
|
|
struct uvm_gpu_chunk_struct
|
|
|
|
{
|
|
|
|
// Physical address of GPU chunk. This may be removed to save memory
|
|
|
|
// if we will be able to get it from reverse map and changed
|
|
|
|
// into smaller index for subchunks.
|
|
|
|
NvU64 address;
|
|
|
|
|
|
|
|
struct
|
|
|
|
{
|
|
|
|
// We use +1 in the order_base_2 calls appropriately to avoid compiler
|
|
|
|
// warnings due to the bitfields being too narrow for the values of
|
|
|
|
// their types.
|
2023-02-28 20:12:44 +01:00
|
|
|
uvm_pmm_gpu_memory_type_t type : order_base_2(UVM_PMM_GPU_MEMORY_TYPE_COUNT + 1);
|
2022-05-09 22:18:59 +02:00
|
|
|
|
|
|
|
// The eviction flag is internal and used only for root chunks. It's
|
|
|
|
// set by the eviction path once a chunk is chosen for eviction in
|
|
|
|
// chunk_start_eviction(). Also see the (root_)chunk_is_in_eviction()
|
|
|
|
// helpers.
|
2023-02-28 20:12:44 +01:00
|
|
|
bool in_eviction : 1;
|
2022-05-09 22:18:59 +02:00
|
|
|
|
2023-02-28 20:12:44 +01:00
|
|
|
bool inject_split_error : 1;
|
2022-05-09 22:18:59 +02:00
|
|
|
|
|
|
|
// This flag is initalized when allocating a new root chunk from PMA.
|
|
|
|
// It is set to true, if PMA already scrubbed the chunk. The flag is
|
|
|
|
// only valid at allocation time (after uvm_pmm_gpu_alloc call), and
|
|
|
|
// the caller is not required to clear it before freeing the chunk. The
|
|
|
|
// VA block chunk population code can query it to skip zeroing the
|
|
|
|
// chunk.
|
2023-02-28 20:12:44 +01:00
|
|
|
bool is_zero : 1;
|
|
|
|
|
|
|
|
// This flag indicates an allocated chunk is referenced by a device
|
|
|
|
// private struct page PTE and therefore expects a page_free() callback.
|
|
|
|
bool is_referenced : 1;
|
2022-05-09 22:18:59 +02:00
|
|
|
|
2023-02-28 20:12:44 +01:00
|
|
|
uvm_pmm_gpu_chunk_state_t state : order_base_2(UVM_PMM_GPU_CHUNK_STATE_COUNT + 1);
|
2022-05-09 22:18:59 +02:00
|
|
|
|
2023-02-28 20:12:44 +01:00
|
|
|
size_t log2_size : order_base_2(UVM_CHUNK_SIZE_MASK_SIZE);
|
2022-05-09 22:18:59 +02:00
|
|
|
|
|
|
|
// Start page index within va_block
|
2023-02-28 20:12:44 +01:00
|
|
|
uvm_page_index_t va_block_page_index : order_base_2(PAGES_PER_UVM_VA_BLOCK + 1);
|
2022-05-09 22:18:59 +02:00
|
|
|
|
|
|
|
// This allows determining what PMM owns the chunk. Users of this field
|
|
|
|
// must only use it if the owning GPU is retained.
|
|
|
|
// TODO: Bug 2008200: Enforce single PMM instance per GPU
|
2023-02-28 20:12:44 +01:00
|
|
|
NvU32 gpu_global_index : order_base_2(UVM_GLOBAL_ID_MAX_PROCESSORS);
|
2022-05-09 22:18:59 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
// List entry.
|
|
|
|
//
|
|
|
|
// Guaranteed to be a valid list node at all times for simplicity.
|
|
|
|
//
|
|
|
|
// Protected by PMM's list_lock when managed by PMM. Notably the list node
|
|
|
|
// can be used by the allocator of the chunk after alloc and before the
|
|
|
|
// chunk is unpinned or freed.
|
|
|
|
struct list_head list;
|
|
|
|
|
|
|
|
// The VA block using the chunk, if any.
|
|
|
|
// User chunks that are not backed by a VA block are considered to be
|
|
|
|
// temporarily pinned and cannot be evicted.
|
|
|
|
uvm_va_block_t *va_block;
|
|
|
|
|
|
|
|
// If this is subchunk it points to the parent - in other words
|
|
|
|
// chunk of bigger size which contains this chunk.
|
|
|
|
uvm_gpu_chunk_t *parent;
|
|
|
|
|
|
|
|
// Array describing suballocations
|
|
|
|
uvm_pmm_gpu_chunk_suballoc_t *suballoc;
|
|
|
|
};
|
|
|
|
|
|
|
|
typedef struct uvm_gpu_root_chunk_struct
|
|
|
|
{
|
|
|
|
uvm_gpu_chunk_t chunk;
|
|
|
|
|
|
|
|
// Pending operations for all GPU chunks under the root chunk.
|
|
|
|
//
|
|
|
|
// Protected by the corresponding root chunk bit lock.
|
|
|
|
uvm_tracker_t tracker;
|
|
|
|
|
|
|
|
// Indirect peers which have IOMMU mappings to this root chunk. The mapped
|
|
|
|
// addresses are stored in this root chunk's index in
|
|
|
|
// uvm_pmm_gpu_t::root_chunks.indirect_peer[id].dma_addrs.
|
|
|
|
//
|
|
|
|
// Protected by the corresponding root chunk bit lock.
|
|
|
|
//
|
|
|
|
// We can use a regular processor id because indirect peers are not allowed
|
|
|
|
// between partitioned GPUs when SMC is enabled.
|
|
|
|
uvm_processor_mask_t indirect_peers_mapped;
|
|
|
|
} uvm_gpu_root_chunk_t;
|
|
|
|
|
|
|
|
typedef struct
|
|
|
|
{
|
|
|
|
// Indirect peers are GPUs which can coherently access this GPU's memory,
|
|
|
|
// but are routed through an intermediate processor. Indirect peers access
|
|
|
|
// each others' memory with the SYS aperture rather then a PEER aperture,
|
|
|
|
// meaning they need IOMMU mappings:
|
|
|
|
//
|
|
|
|
// accessing_gpu ==> IOMMU ==> CPU ==> owning_gpu (this GPU)
|
|
|
|
//
|
|
|
|
// This array has one entry per root chunk on this GPU. Each entry
|
|
|
|
// contains the IOMMU address accessing_gpu needs to use in order to
|
|
|
|
// access this GPU's root chunk. The root chunks are mapped as whole
|
|
|
|
// regions both for tracking simplicity and to allow GPUs to map with
|
|
|
|
// large PTEs.
|
|
|
|
//
|
|
|
|
// An array entry is valid iff accessing_gpu's ID is set in the
|
|
|
|
// corresponding root chunk's indirect_peers_mapped mask.
|
|
|
|
//
|
|
|
|
// Management of these addresses would be simpler if they were stored
|
|
|
|
// in the root chunks themselves, but in the common case there are only
|
|
|
|
// a small number of indirect peers in a system. Dynamic array
|
|
|
|
// allocation per indirect peer wastes less memory.
|
|
|
|
NvU64 *dma_addrs;
|
|
|
|
|
|
|
|
// Number of this GPU's root chunks mapped for each indirect peer.
|
|
|
|
atomic64_t map_count;
|
|
|
|
} uvm_gpu_root_chunk_indirect_peer_t;
|
|
|
|
|
2023-02-28 20:12:44 +01:00
|
|
|
typedef struct uvm_pmm_gpu_struct
|
2022-05-09 22:18:59 +02:00
|
|
|
{
|
|
|
|
// Sizes of the MMU
|
|
|
|
uvm_chunk_sizes_mask_t chunk_sizes[UVM_PMM_GPU_MEMORY_TYPE_COUNT];
|
|
|
|
|
|
|
|
// PMA (Physical Memory Allocator) opaque handle
|
|
|
|
void *pma;
|
|
|
|
|
|
|
|
// PMA statistics used for eviction heuristics
|
|
|
|
const UvmPmaStatistics *pma_stats;
|
|
|
|
|
|
|
|
struct
|
|
|
|
{
|
|
|
|
// Array of all root chunks indexed by their physical address divided by
|
|
|
|
// UVM_CHUNK_SIZE_MAX.
|
|
|
|
//
|
|
|
|
// This array is pre-allocated during uvm_pmm_gpu_init() for all
|
|
|
|
// possible physical addresses (based on
|
|
|
|
// gpu::vidmem_max_physical_address).
|
|
|
|
size_t count;
|
|
|
|
uvm_gpu_root_chunk_t *array;
|
|
|
|
|
|
|
|
// Bit locks for the root chunks with 1 bit per each root chunk
|
|
|
|
uvm_bit_locks_t bitlocks;
|
|
|
|
|
|
|
|
// List of root chunks unused by VA blocks, i.e. allocated, but not
|
|
|
|
// holding any resident pages. These take priority when evicting as no
|
|
|
|
// data needs to be migrated for them to be evicted.
|
|
|
|
//
|
|
|
|
// For simplicity, the list is approximate, tracking unused chunks only
|
|
|
|
// from root chunk sized (2M) VA blocks.
|
|
|
|
//
|
|
|
|
// Updated by the VA block code with
|
|
|
|
// uvm_pmm_gpu_mark_root_chunk_(un)used().
|
|
|
|
struct list_head va_block_unused;
|
|
|
|
|
|
|
|
// List of root chunks used by VA blocks
|
|
|
|
struct list_head va_block_used;
|
|
|
|
|
2023-02-28 20:12:44 +01:00
|
|
|
// List of chunks needing to be lazily freed and a queue for processing
|
|
|
|
// the list. TODO: Bug 3881835: revisit whether to use nv_kthread_q_t
|
|
|
|
// or workqueue.
|
|
|
|
struct list_head va_block_lazy_free;
|
|
|
|
nv_kthread_q_item_t va_block_lazy_free_q_item;
|
|
|
|
|
2022-05-09 22:18:59 +02:00
|
|
|
uvm_gpu_root_chunk_indirect_peer_t indirect_peer[UVM_ID_MAX_GPUS];
|
|
|
|
} root_chunks;
|
|
|
|
|
2023-02-28 20:12:44 +01:00
|
|
|
#if UVM_IS_CONFIG_HMM()
|
|
|
|
uvm_pmm_gpu_devmem_t devmem;
|
|
|
|
#endif
|
|
|
|
|
2022-05-09 22:18:59 +02:00
|
|
|
// Lock protecting PMA allocation, freeing and eviction
|
|
|
|
uvm_rw_semaphore_t pma_lock;
|
|
|
|
|
|
|
|
// Lock protecting splits, merges and walks of chunks.
|
|
|
|
uvm_mutex_t lock;
|
|
|
|
|
|
|
|
// Lock protecting lists and chunk's state transitions.
|
|
|
|
uvm_spinlock_t list_lock;
|
|
|
|
|
|
|
|
// Free chunk lists. There are separate lists for non-zero and zero chunks.
|
|
|
|
struct list_head free_list[UVM_PMM_GPU_MEMORY_TYPE_COUNT][UVM_MAX_CHUNK_SIZES][UVM_PMM_LIST_ZERO_COUNT];
|
|
|
|
|
|
|
|
// Inject an error after evicting a number of chunks. 0 means no error left
|
|
|
|
// to be injected.
|
|
|
|
NvU32 inject_pma_evict_error_after_num_chunks;
|
|
|
|
|
|
|
|
// The mask of the initialized chunk sizes
|
|
|
|
DECLARE_BITMAP(chunk_split_cache_initialized, UVM_PMM_CHUNK_SPLIT_CACHE_SIZES);
|
|
|
|
|
|
|
|
bool initialized;
|
|
|
|
|
|
|
|
bool pma_address_cache_initialized;
|
|
|
|
} uvm_pmm_gpu_t;
|
|
|
|
|
|
|
|
// Return containing GPU
|
|
|
|
uvm_gpu_t *uvm_pmm_to_gpu(uvm_pmm_gpu_t *pmm);
|
|
|
|
|
|
|
|
// Initialize PMM on GPU
|
|
|
|
NV_STATUS uvm_pmm_gpu_init(uvm_pmm_gpu_t *pmm);
|
|
|
|
|
|
|
|
// Deinitialize the PMM on GPU
|
|
|
|
void uvm_pmm_gpu_deinit(uvm_pmm_gpu_t *pmm);
|
|
|
|
|
|
|
|
static uvm_chunk_size_t uvm_gpu_chunk_get_size(uvm_gpu_chunk_t *chunk)
|
|
|
|
{
|
|
|
|
return ((uvm_chunk_size_t)1) << chunk->log2_size;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void uvm_gpu_chunk_set_size(uvm_gpu_chunk_t *chunk, uvm_chunk_size_t size)
|
|
|
|
{
|
|
|
|
chunk->log2_size = ilog2(size);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Retrieve the GPU associated with the chunk. Users of this helper must only
|
|
|
|
// use it if the owning GPU is retained.
|
|
|
|
uvm_gpu_t *uvm_gpu_chunk_get_gpu(const uvm_gpu_chunk_t *chunk);
|
|
|
|
|
|
|
|
// Return the first struct page corresponding to the physical address range
|
|
|
|
// of the given chunk.
|
|
|
|
//
|
|
|
|
// Notes:
|
|
|
|
// - The GPU must have NUMA support enabled.
|
|
|
|
// - For chunks smaller than a system page, this function returns the struct
|
|
|
|
// page containing the chunk's starting address.
|
|
|
|
struct page *uvm_gpu_chunk_to_page(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk);
|
|
|
|
|
|
|
|
// Allocates num_chunks chunks of size chunk_size in caller-supplied array (chunks).
|
|
|
|
//
|
|
|
|
// Returned chunks are in the TEMP_PINNED state, requiring a call to either
|
2023-02-28 20:12:44 +01:00
|
|
|
// uvm_pmm_gpu_unpin_allocated, uvm_pmm_gpu_unpin_referenced, or
|
|
|
|
// uvm_pmm_gpu_free. If a tracker is passed in, all
|
2022-05-09 22:18:59 +02:00
|
|
|
// the pending operations on the allocated chunks will be added to it
|
|
|
|
// guaranteeing that all the entries come from the same GPU as the PMM.
|
|
|
|
// Otherwise, when tracker is NULL, all the pending operations will be
|
|
|
|
// synchronized before returning to the caller.
|
|
|
|
//
|
|
|
|
// Each of the allocated chunks list nodes (uvm_gpu_chunk_t::list) can be used
|
2023-02-28 20:12:44 +01:00
|
|
|
// by the caller until the chunk is unpinned (uvm_pmm_gpu_unpin_allocated,
|
|
|
|
// uvm_pmm_gpu_unpin_referenced) or freed (uvm_pmm_gpu_free). If used, the list
|
|
|
|
// node has to be returned to a valid state before calling either of the APIs.
|
2022-05-09 22:18:59 +02:00
|
|
|
//
|
|
|
|
// In case of an error, the chunks array is guaranteed to be cleared.
|
2023-05-30 19:11:36 +02:00
|
|
|
//
|
|
|
|
// If the memory returned by the PMM allocator cannot be physically addressed,
|
|
|
|
// the MMU interface provides user chunk mapping and unmapping functions
|
|
|
|
// (uvm_mmu_chunk_map/unmap) that enable virtual addressing.
|
2022-05-09 22:18:59 +02:00
|
|
|
NV_STATUS uvm_pmm_gpu_alloc(uvm_pmm_gpu_t *pmm,
|
|
|
|
size_t num_chunks,
|
|
|
|
uvm_chunk_size_t chunk_size,
|
|
|
|
uvm_pmm_gpu_memory_type_t mem_type,
|
|
|
|
uvm_pmm_alloc_flags_t flags,
|
|
|
|
uvm_gpu_chunk_t **chunks,
|
|
|
|
uvm_tracker_t *out_tracker);
|
|
|
|
|
|
|
|
// Helper for allocating kernel memory
|
|
|
|
//
|
|
|
|
// Internally calls uvm_pmm_gpu_alloc() and sets the state of all chunks to
|
|
|
|
// allocated on success.
|
2023-05-30 19:11:36 +02:00
|
|
|
//
|
|
|
|
// If Confidential Computing is enabled, this helper allocates protected kernel
|
|
|
|
// memory.
|
|
|
|
static NV_STATUS uvm_pmm_gpu_alloc_kernel(uvm_pmm_gpu_t *pmm,
|
|
|
|
size_t num_chunks,
|
|
|
|
uvm_chunk_size_t chunk_size,
|
|
|
|
uvm_pmm_alloc_flags_t flags,
|
|
|
|
uvm_gpu_chunk_t **chunks,
|
|
|
|
uvm_tracker_t *out_tracker)
|
|
|
|
{
|
|
|
|
return uvm_pmm_gpu_alloc(pmm, num_chunks, chunk_size, UVM_PMM_GPU_MEMORY_TYPE_KERNEL, flags, chunks, out_tracker);
|
|
|
|
}
|
2022-05-09 22:18:59 +02:00
|
|
|
|
|
|
|
// Helper for allocating user memory
|
|
|
|
//
|
|
|
|
// Simple wrapper that just uses UVM_PMM_GPU_MEMORY_TYPE_USER for the memory
|
|
|
|
// type.
|
|
|
|
//
|
2023-05-30 19:11:36 +02:00
|
|
|
// If Confidential Computing is enabled, this helper allocates protected user
|
|
|
|
// memory.
|
2022-05-09 22:18:59 +02:00
|
|
|
static NV_STATUS uvm_pmm_gpu_alloc_user(uvm_pmm_gpu_t *pmm,
|
|
|
|
size_t num_chunks,
|
|
|
|
uvm_chunk_size_t chunk_size,
|
|
|
|
uvm_pmm_alloc_flags_t flags,
|
|
|
|
uvm_gpu_chunk_t **chunks,
|
|
|
|
uvm_tracker_t *out_tracker)
|
|
|
|
{
|
|
|
|
return uvm_pmm_gpu_alloc(pmm, num_chunks, chunk_size, UVM_PMM_GPU_MEMORY_TYPE_USER, flags, chunks, out_tracker);
|
|
|
|
}
|
|
|
|
|
2023-02-28 20:12:44 +01:00
|
|
|
// Unpin a temporarily pinned chunk, set its reverse map to a VA block, and
|
|
|
|
// mark it as allocated.
|
|
|
|
//
|
|
|
|
// Can only be used on user memory.
|
|
|
|
void uvm_pmm_gpu_unpin_allocated(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk, uvm_va_block_t *va_block);
|
|
|
|
|
|
|
|
// Unpin a temporarily pinned chunk, set its reverse map to a VA block, and
|
|
|
|
// mark it as referenced.
|
2022-05-09 22:18:59 +02:00
|
|
|
//
|
|
|
|
// Can only be used on user memory.
|
2023-02-28 20:12:44 +01:00
|
|
|
void uvm_pmm_gpu_unpin_referenced(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk, uvm_va_block_t *va_block);
|
2022-05-09 22:18:59 +02:00
|
|
|
|
|
|
|
// Frees the chunk. This also unpins the chunk if it is temporarily pinned.
|
|
|
|
//
|
|
|
|
// The tracker is optional and a NULL tracker indicates that no new operation
|
|
|
|
// has been pushed for the chunk, but the tracker returned as part of
|
|
|
|
// its allocation doesn't have to be completed as PMM will synchronize it
|
|
|
|
// internally if needed. A non-NULL tracker indicates any additional pending
|
|
|
|
// operations on the chunk pushed by the caller that need to be synchronized
|
|
|
|
// before freeing or re-using the chunk.
|
|
|
|
void uvm_pmm_gpu_free(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk, uvm_tracker_t *tracker);
|
|
|
|
|
|
|
|
// Splits the input chunk in-place into smaller chunks of subchunk_size. No data
|
|
|
|
// is moved, and the smaller chunks remain allocated.
|
|
|
|
//
|
|
|
|
// If the subchunks array is non-NULL, it will be filled with
|
|
|
|
// (uvm_gpu_chunk_get_size(chunk) / subchunk_size) chunks in address order. The
|
|
|
|
// new chunks must all be freed individually.
|
|
|
|
//
|
|
|
|
// If the subchunks array is NULL, the split chunks can be retrieved later by
|
|
|
|
// passing the original parent chunk to uvm_pmm_gpu_get_subchunks.
|
|
|
|
//
|
|
|
|
// On error, the original chunk remains unmodified.
|
|
|
|
//
|
|
|
|
// The chunk must be in the ALLOCATED state with the owning VA block lock held,
|
|
|
|
// or the TEMP_PINNED state.
|
|
|
|
//
|
|
|
|
// subchunk_size must be a valid chunk size for the given type.
|
|
|
|
//
|
|
|
|
// The chunk can be re-merged if desired using uvm_pmm_gpu_merge_chunk.
|
|
|
|
NV_STATUS uvm_pmm_gpu_split_chunk(uvm_pmm_gpu_t *pmm,
|
|
|
|
uvm_gpu_chunk_t *chunk,
|
|
|
|
uvm_chunk_size_t subchunk_size,
|
|
|
|
uvm_gpu_chunk_t **subchunks);
|
|
|
|
|
|
|
|
// Retrieve leaf subchunks under parent. Up to num_subchunks chunks are copied
|
|
|
|
// into the subchunks array in address order, starting with the subchunk at
|
|
|
|
// start_index. start_index can be thought of as the number of leaf subchunks to
|
|
|
|
// skip before beginning the copy.
|
|
|
|
//
|
|
|
|
// parent can be in the ALLOCATED state, in which case parent is the only chunk
|
|
|
|
// which may be copied into the subchunks array.
|
|
|
|
//
|
|
|
|
// num_subchunks may be 0.
|
|
|
|
//
|
|
|
|
// Returns the number of subchunks written to the array. This may be less than
|
|
|
|
// num_subchunks depending on the value of start_index and how many subchunks
|
|
|
|
// are present under parent.
|
|
|
|
size_t uvm_pmm_gpu_get_subchunks(uvm_pmm_gpu_t *pmm,
|
|
|
|
uvm_gpu_chunk_t *parent,
|
|
|
|
size_t start_index,
|
|
|
|
size_t num_subchunks,
|
|
|
|
uvm_gpu_chunk_t **subchunks);
|
|
|
|
|
|
|
|
// Merges a chunk previously split with uvm_pmm_gpu_split_chunk. All of chunk's
|
|
|
|
// leaf children must be allocated.
|
|
|
|
void uvm_pmm_gpu_merge_chunk(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk);
|
|
|
|
|
|
|
|
// Waits for all free chunk trackers (removing their completed entries) to complete.
|
|
|
|
//
|
|
|
|
// This inherently races with any chunks being freed to this PMM. The assumption
|
|
|
|
// is that the caller doesn't care about preventing new chunks from being freed,
|
|
|
|
// just that any already-freed chunks will be synced.
|
|
|
|
void uvm_pmm_gpu_sync(uvm_pmm_gpu_t *pmm);
|
|
|
|
|
|
|
|
// Mark an allocated chunk as evicted
|
|
|
|
void uvm_pmm_gpu_mark_chunk_evicted(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk);
|
|
|
|
|
|
|
|
// Initialize indirect peer state so accessing_gpu is ready to create mappings
|
|
|
|
// to pmm's root chunks.
|
|
|
|
//
|
|
|
|
// Locking: The global lock must be held.
|
|
|
|
NV_STATUS uvm_pmm_gpu_indirect_peer_init(uvm_pmm_gpu_t *pmm, uvm_gpu_t *accessing_gpu);
|
|
|
|
|
|
|
|
// Tear down indirect peer state from other_gpu to pmm's GPU. Any existing IOMMU
|
|
|
|
// mappings from other_gpu to this GPU are torn down.
|
|
|
|
//
|
|
|
|
// Locking: The global lock must be held.
|
|
|
|
void uvm_pmm_gpu_indirect_peer_destroy(uvm_pmm_gpu_t *pmm, uvm_gpu_t *other_gpu);
|
|
|
|
|
|
|
|
// Create an IOMMU mapping to allow accessing_gpu to access chunk on pmm's GPU.
|
|
|
|
// chunk can be any size, and can be mapped more than once (the address will not
|
|
|
|
// change). The address can be retrieved using uvm_pmm_gpu_indirect_peer_addr.
|
|
|
|
//
|
|
|
|
// Note that there is no corresponding unmap call. The mappings will be removed
|
|
|
|
// automatically as necessary when the chunk is freed. This allows mappings to
|
|
|
|
// be reused as much as possible.
|
|
|
|
NV_STATUS uvm_pmm_gpu_indirect_peer_map(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk, uvm_gpu_t *accessing_gpu);
|
|
|
|
|
|
|
|
// Retrieve the system address accessing_gpu must use to access this chunk.
|
|
|
|
// uvm_pmm_gpu_indirect_peer_map must have been called first.
|
|
|
|
NvU64 uvm_pmm_gpu_indirect_peer_addr(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk, uvm_gpu_t *accessing_gpu);
|
|
|
|
|
|
|
|
// Returns the physical address for use by accessing_gpu of a vidmem allocation
|
|
|
|
// on the peer pmm->gpu. This address can be used for making PTEs on
|
|
|
|
// accessing_gpu, but not for copying between the two GPUs. For that, use
|
|
|
|
// uvm_gpu_peer_copy_address.
|
|
|
|
uvm_gpu_phys_address_t uvm_pmm_gpu_peer_phys_address(uvm_pmm_gpu_t *pmm,
|
|
|
|
uvm_gpu_chunk_t *chunk,
|
|
|
|
uvm_gpu_t *accessing_gpu);
|
|
|
|
|
|
|
|
// Returns the physical or virtual address for use by accessing_gpu to copy to/
|
|
|
|
// from a vidmem allocation on the peer pmm->gpu. This may be different from
|
|
|
|
// uvm_gpu_peer_phys_address to handle CE limitations in addressing peer
|
|
|
|
// physical memory directly.
|
|
|
|
uvm_gpu_address_t uvm_pmm_gpu_peer_copy_address(uvm_pmm_gpu_t *pmm,
|
|
|
|
uvm_gpu_chunk_t *chunk,
|
|
|
|
uvm_gpu_t *accessing_gpu);
|
|
|
|
|
|
|
|
// Mark a user chunk as used
|
|
|
|
//
|
|
|
|
// If the chunk is pinned or selected for eviction, this won't do anything. The
|
|
|
|
// chunk can be pinned when it's being initially populated by the VA block.
|
|
|
|
// Allow that state to make this API easy to use for the caller.
|
|
|
|
void uvm_pmm_gpu_mark_root_chunk_used(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk);
|
|
|
|
|
|
|
|
// Mark an allocated user chunk as unused
|
|
|
|
void uvm_pmm_gpu_mark_root_chunk_unused(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk);
|
|
|
|
|
|
|
|
static bool uvm_gpu_chunk_same_root(uvm_gpu_chunk_t *chunk1, uvm_gpu_chunk_t *chunk2)
|
|
|
|
{
|
|
|
|
return UVM_ALIGN_DOWN(chunk1->address, UVM_CHUNK_SIZE_MAX) == UVM_ALIGN_DOWN(chunk2->address, UVM_CHUNK_SIZE_MAX);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Finds the first (smallest) size in the chunk_sizes mask
|
|
|
|
static uvm_chunk_size_t uvm_chunk_find_first_size(uvm_chunk_sizes_mask_t chunk_sizes)
|
|
|
|
{
|
|
|
|
UVM_ASSERT(chunk_sizes);
|
|
|
|
return (uvm_chunk_size_t)1 << __ffs(chunk_sizes);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Finds the last (biggest) size in the chunk_sizes mask
|
|
|
|
static uvm_chunk_size_t uvm_chunk_find_last_size(uvm_chunk_sizes_mask_t chunk_sizes)
|
|
|
|
{
|
|
|
|
UVM_ASSERT(chunk_sizes);
|
|
|
|
return (uvm_chunk_size_t)1 << __fls(chunk_sizes);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Finds the smallest size in the chunk_sizes mask which is larger than
|
|
|
|
// chunk_size. If there is no such value returns UVM_CHUNK_SIZE_INVALID.
|
|
|
|
static uvm_chunk_size_t uvm_chunk_find_next_size(uvm_chunk_sizes_mask_t chunk_sizes, uvm_chunk_size_t chunk_size)
|
|
|
|
{
|
|
|
|
UVM_ASSERT(is_power_of_2(chunk_size));
|
|
|
|
UVM_ASSERT(chunk_sizes & chunk_size);
|
|
|
|
BUILD_BUG_ON(sizeof(chunk_sizes) > sizeof(unsigned long));
|
|
|
|
return (uvm_chunk_size_t)1 << __ffs((chunk_sizes & ~((chunk_size << 1) - 1)) | UVM_CHUNK_SIZE_INVALID);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Finds the largest size in the chunk_sizes mask which is smaller than
|
|
|
|
// chunk_size. If there is no such value returns UVM_CHUNK_SIZE_INVALID.
|
|
|
|
static uvm_chunk_size_t uvm_chunk_find_prev_size(uvm_chunk_sizes_mask_t chunk_sizes, uvm_chunk_size_t chunk_size)
|
|
|
|
{
|
|
|
|
UVM_ASSERT(is_power_of_2(chunk_size));
|
|
|
|
UVM_ASSERT(chunk_sizes & chunk_size);
|
|
|
|
chunk_sizes = chunk_sizes & (chunk_size - 1);
|
|
|
|
if (!chunk_sizes)
|
|
|
|
return UVM_CHUNK_SIZE_INVALID;
|
|
|
|
return (uvm_chunk_size_t)1 << __fls(chunk_sizes);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Obtain the {va_block, virt_addr} information for the chunks in the given
|
|
|
|
// [phys_addr:phys_addr + region_size) range. One entry per chunk is returned.
|
|
|
|
// phys_addr and region_size must be page-aligned.
|
|
|
|
//
|
|
|
|
// Valid translations are written to out_mappings sequentially (there are no
|
|
|
|
// gaps). The caller is required to provide enough entries in out_pages for the
|
|
|
|
// whole region. The function returns the number of entries written to
|
|
|
|
// out_mappings.
|
|
|
|
//
|
|
|
|
// The returned reverse map is a snapshot: it is stale as soon as it is
|
|
|
|
// returned, and the caller is responsible for locking the VA block(s) and
|
|
|
|
// checking that the chunks are still there. Also, the VA block(s) are
|
|
|
|
// retained, and it's up to the caller to release them.
|
|
|
|
NvU32 uvm_pmm_gpu_phys_to_virt(uvm_pmm_gpu_t *pmm, NvU64 phys_addr, NvU64 region_size, uvm_reverse_map_t *out_mappings);
|
|
|
|
|
|
|
|
// Iterates over every size in the input mask from smallest to largest
|
|
|
|
#define for_each_chunk_size(__size, __chunk_sizes) \
|
|
|
|
for ((__size) = (__chunk_sizes) ? uvm_chunk_find_first_size(__chunk_sizes) : \
|
|
|
|
UVM_CHUNK_SIZE_INVALID; \
|
|
|
|
(__size) != UVM_CHUNK_SIZE_INVALID; \
|
|
|
|
(__size) = uvm_chunk_find_next_size((__chunk_sizes), (__size)))
|
|
|
|
|
|
|
|
// Iterates over every size in the input mask from largest to smallest
|
|
|
|
#define for_each_chunk_size_rev(__size, __chunk_sizes) \
|
|
|
|
for ((__size) = (__chunk_sizes) ? uvm_chunk_find_last_size(__chunk_sizes) : \
|
|
|
|
UVM_CHUNK_SIZE_INVALID; \
|
|
|
|
(__size) != UVM_CHUNK_SIZE_INVALID; \
|
|
|
|
(__size) = uvm_chunk_find_prev_size((__chunk_sizes), (__size)))
|
|
|
|
|
|
|
|
// Iterates over every size in the input mask from smallest to largest, starting
|
|
|
|
// from and including __size. __size must be present in the mask.
|
|
|
|
#define for_each_chunk_size_from(__size, __chunk_sizes) \
|
|
|
|
for (; (__size) != UVM_CHUNK_SIZE_INVALID; \
|
|
|
|
(__size) = uvm_chunk_find_next_size((__chunk_sizes), (__size)))
|
|
|
|
|
|
|
|
// Iterates over every size in the input mask from largest to smallest, starting
|
|
|
|
// from and including __size. __size must be present in the mask.
|
|
|
|
#define for_each_chunk_size_rev_from(__size, __chunk_sizes) \
|
|
|
|
for (; (__size) != UVM_CHUNK_SIZE_INVALID; \
|
|
|
|
(__size) = uvm_chunk_find_prev_size((__chunk_sizes), (__size)))
|
|
|
|
|
|
|
|
#endif
|