mirror of
https://github.com/NVIDIA/open-gpu-kernel-modules.git
synced 2025-01-19 03:52:11 +01:00
550.40.07
This commit is contained in:
parent
bb2dac1f20
commit
91676d6628
@ -1,5 +1,13 @@
|
||||
# Changelog
|
||||
|
||||
## Release 550 Entries
|
||||
|
||||
### [550.40.07] 2024-01-24
|
||||
|
||||
#### Fixed
|
||||
|
||||
- Set INSTALL_MOD_DIR only if it's not defined, [#570](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/570) by @keelung-yang
|
||||
|
||||
## Release 545 Entries
|
||||
|
||||
### [545.29.06] 2023-11-22
|
||||
|
15
README.md
15
README.md
@ -1,7 +1,7 @@
|
||||
# NVIDIA Linux Open GPU Kernel Module Source
|
||||
|
||||
This is the source release of the NVIDIA Linux open GPU kernel modules,
|
||||
version 545.29.06.
|
||||
version 550.40.07.
|
||||
|
||||
|
||||
## How to Build
|
||||
@ -17,7 +17,7 @@ as root:
|
||||
|
||||
Note that the kernel modules built here must be used with GSP
|
||||
firmware and user-space NVIDIA GPU driver components from a corresponding
|
||||
545.29.06 driver release. This can be achieved by installing
|
||||
550.40.07 driver release. This can be achieved by installing
|
||||
the NVIDIA GPU driver from the .run file using the `--no-kernel-modules`
|
||||
option. E.g.,
|
||||
|
||||
@ -188,7 +188,7 @@ encountered specific to them.
|
||||
For details on feature support and limitations, see the NVIDIA GPU driver
|
||||
end user README here:
|
||||
|
||||
https://us.download.nvidia.com/XFree86/Linux-x86_64/545.29.06/README/kernel_open.html
|
||||
https://us.download.nvidia.com/XFree86/Linux-x86_64/550.40.07/README/kernel_open.html
|
||||
|
||||
In the below table, if three IDs are listed, the first is the PCI Device
|
||||
ID, the second is the PCI Subsystem Vendor ID, and the third is the PCI
|
||||
@ -683,6 +683,7 @@ Subsystem Device ID.
|
||||
| NVIDIA A800 40GB Active | 20F6 103C 180A |
|
||||
| NVIDIA A800 40GB Active | 20F6 10DE 180A |
|
||||
| NVIDIA A800 40GB Active | 20F6 17AA 180A |
|
||||
| NVIDIA AX800 | 20FD 10DE 17F8 |
|
||||
| NVIDIA GeForce GTX 1660 Ti | 2182 |
|
||||
| NVIDIA GeForce GTX 1660 | 2184 |
|
||||
| NVIDIA GeForce GTX 1650 SUPER | 2187 |
|
||||
@ -836,6 +837,7 @@ Subsystem Device ID.
|
||||
| NVIDIA RTX A2000 Embedded GPU | 25FA |
|
||||
| NVIDIA RTX A500 Embedded GPU | 25FB |
|
||||
| NVIDIA GeForce RTX 4090 | 2684 |
|
||||
| NVIDIA GeForce RTX 4090 D | 2685 |
|
||||
| NVIDIA RTX 6000 Ada Generation | 26B1 1028 16A1 |
|
||||
| NVIDIA RTX 6000 Ada Generation | 26B1 103C 16A1 |
|
||||
| NVIDIA RTX 6000 Ada Generation | 26B1 10DE 16A1 |
|
||||
@ -844,16 +846,22 @@ Subsystem Device ID.
|
||||
| NVIDIA RTX 5000 Ada Generation | 26B2 103C 17FA |
|
||||
| NVIDIA RTX 5000 Ada Generation | 26B2 10DE 17FA |
|
||||
| NVIDIA RTX 5000 Ada Generation | 26B2 17AA 17FA |
|
||||
| NVIDIA RTX 5880 Ada Generation | 26B3 103C 1934 |
|
||||
| NVIDIA RTX 5880 Ada Generation | 26B3 10DE 1934 |
|
||||
| NVIDIA RTX 5880 Ada Generation | 26B3 17AA 1934 |
|
||||
| NVIDIA L40 | 26B5 10DE 169D |
|
||||
| NVIDIA L40 | 26B5 10DE 17DA |
|
||||
| NVIDIA L40S | 26B9 10DE 1851 |
|
||||
| NVIDIA L40S | 26B9 10DE 18CF |
|
||||
| NVIDIA L20 | 26BA 10DE 1957 |
|
||||
| NVIDIA GeForce RTX 4080 | 2704 |
|
||||
| NVIDIA GeForce RTX 4070 Ti SUPER | 2705 |
|
||||
| NVIDIA GeForce RTX 4090 Laptop GPU | 2717 |
|
||||
| NVIDIA RTX 5000 Ada Generation Laptop GPU | 2730 |
|
||||
| NVIDIA GeForce RTX 4090 Laptop GPU | 2757 |
|
||||
| NVIDIA RTX 5000 Ada Generation Embedded GPU | 2770 |
|
||||
| NVIDIA GeForce RTX 4070 Ti | 2782 |
|
||||
| NVIDIA GeForce RTX 4070 SUPER | 2783 |
|
||||
| NVIDIA GeForce RTX 4070 | 2786 |
|
||||
| NVIDIA GeForce RTX 4080 Laptop GPU | 27A0 |
|
||||
| NVIDIA RTX 4000 SFF Ada Generation | 27B0 1028 16FA |
|
||||
@ -868,6 +876,7 @@ Subsystem Device ID.
|
||||
| NVIDIA RTX 4000 Ada Generation | 27B2 103C 181B |
|
||||
| NVIDIA RTX 4000 Ada Generation | 27B2 10DE 181B |
|
||||
| NVIDIA RTX 4000 Ada Generation | 27B2 17AA 181B |
|
||||
| NVIDIA L2 | 27B6 10DE 1933 |
|
||||
| NVIDIA L4 | 27B8 10DE 16CA |
|
||||
| NVIDIA L4 | 27B8 10DE 16EE |
|
||||
| NVIDIA RTX 4000 Ada Generation Laptop GPU | 27BA |
|
||||
|
@ -70,9 +70,9 @@ $(foreach _module, $(NV_KERNEL_MODULES), \
|
||||
|
||||
EXTRA_CFLAGS += -I$(src)/common/inc
|
||||
EXTRA_CFLAGS += -I$(src)
|
||||
EXTRA_CFLAGS += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-error -Wno-format-extra-args
|
||||
EXTRA_CFLAGS += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-format-extra-args
|
||||
EXTRA_CFLAGS += -D__KERNEL__ -DMODULE -DNVRM
|
||||
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"545.29.06\"
|
||||
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"550.40.07\"
|
||||
|
||||
ifneq ($(SYSSRCHOST1X),)
|
||||
EXTRA_CFLAGS += -I$(SYSSRCHOST1X)
|
||||
@ -134,6 +134,16 @@ ifneq ($(wildcard /proc/sgi_uv),)
|
||||
EXTRA_CFLAGS += -DNV_CONFIG_X86_UV
|
||||
endif
|
||||
|
||||
ifdef VGX_FORCE_VFIO_PCI_CORE
|
||||
EXTRA_CFLAGS += -DNV_VGPU_FORCE_VFIO_PCI_CORE
|
||||
endif
|
||||
|
||||
WARNINGS_AS_ERRORS ?=
|
||||
ifeq ($(WARNINGS_AS_ERRORS),1)
|
||||
ccflags-y += -Werror
|
||||
else
|
||||
ccflags-y += -Wno-error
|
||||
endif
|
||||
|
||||
#
|
||||
# The conftest.sh script tests various aspects of the target kernel.
|
||||
@ -160,6 +170,7 @@ NV_CONFTEST_CMD := /bin/sh $(NV_CONFTEST_SCRIPT) \
|
||||
NV_CFLAGS_FROM_CONFTEST := $(shell $(NV_CONFTEST_CMD) build_cflags)
|
||||
|
||||
NV_CONFTEST_CFLAGS = $(NV_CFLAGS_FROM_CONFTEST) $(EXTRA_CFLAGS) -fno-pie
|
||||
NV_CONFTEST_CFLAGS += -Wno-error
|
||||
|
||||
NV_CONFTEST_COMPILE_TEST_HEADERS := $(obj)/conftest/macros.h
|
||||
NV_CONFTEST_COMPILE_TEST_HEADERS += $(obj)/conftest/functions.h
|
||||
@ -219,106 +230,7 @@ $(obj)/conftest/patches.h: $(NV_CONFTEST_SCRIPT)
|
||||
@mkdir -p $(obj)/conftest
|
||||
@$(NV_CONFTEST_CMD) patch_check > $@
|
||||
|
||||
|
||||
# Each of these headers is checked for presence with a test #include; a
|
||||
# corresponding #define will be generated in conftest/headers.h.
|
||||
NV_HEADER_PRESENCE_TESTS = \
|
||||
asm/system.h \
|
||||
drm/drmP.h \
|
||||
drm/drm_aperture.h \
|
||||
drm/drm_auth.h \
|
||||
drm/drm_gem.h \
|
||||
drm/drm_crtc.h \
|
||||
drm/drm_color_mgmt.h \
|
||||
drm/drm_atomic.h \
|
||||
drm/drm_atomic_helper.h \
|
||||
drm/drm_atomic_state_helper.h \
|
||||
drm/drm_encoder.h \
|
||||
drm/drm_atomic_uapi.h \
|
||||
drm/drm_drv.h \
|
||||
drm/drm_fbdev_generic.h \
|
||||
drm/drm_framebuffer.h \
|
||||
drm/drm_connector.h \
|
||||
drm/drm_probe_helper.h \
|
||||
drm/drm_blend.h \
|
||||
drm/drm_fourcc.h \
|
||||
drm/drm_prime.h \
|
||||
drm/drm_plane.h \
|
||||
drm/drm_vblank.h \
|
||||
drm/drm_file.h \
|
||||
drm/drm_ioctl.h \
|
||||
drm/drm_device.h \
|
||||
drm/drm_mode_config.h \
|
||||
drm/drm_modeset_lock.h \
|
||||
dt-bindings/interconnect/tegra_icc_id.h \
|
||||
generated/autoconf.h \
|
||||
generated/compile.h \
|
||||
generated/utsrelease.h \
|
||||
linux/efi.h \
|
||||
linux/kconfig.h \
|
||||
linux/platform/tegra/mc_utils.h \
|
||||
linux/printk.h \
|
||||
linux/ratelimit.h \
|
||||
linux/prio_tree.h \
|
||||
linux/log2.h \
|
||||
linux/of.h \
|
||||
linux/bug.h \
|
||||
linux/sched.h \
|
||||
linux/sched/mm.h \
|
||||
linux/sched/signal.h \
|
||||
linux/sched/task.h \
|
||||
linux/sched/task_stack.h \
|
||||
xen/ioemu.h \
|
||||
linux/fence.h \
|
||||
linux/dma-fence.h \
|
||||
linux/dma-resv.h \
|
||||
soc/tegra/chip-id.h \
|
||||
soc/tegra/fuse.h \
|
||||
soc/tegra/tegra_bpmp.h \
|
||||
video/nv_internal.h \
|
||||
linux/platform/tegra/dce/dce-client-ipc.h \
|
||||
linux/nvhost.h \
|
||||
linux/nvhost_t194.h \
|
||||
linux/host1x-next.h \
|
||||
asm/book3s/64/hash-64k.h \
|
||||
asm/set_memory.h \
|
||||
asm/prom.h \
|
||||
asm/powernv.h \
|
||||
linux/atomic.h \
|
||||
asm/barrier.h \
|
||||
asm/opal-api.h \
|
||||
sound/hdaudio.h \
|
||||
asm/pgtable_types.h \
|
||||
asm/page.h \
|
||||
linux/stringhash.h \
|
||||
linux/dma-map-ops.h \
|
||||
rdma/peer_mem.h \
|
||||
sound/hda_codec.h \
|
||||
linux/dma-buf.h \
|
||||
linux/time.h \
|
||||
linux/platform_device.h \
|
||||
linux/mutex.h \
|
||||
linux/reset.h \
|
||||
linux/of_platform.h \
|
||||
linux/of_device.h \
|
||||
linux/of_gpio.h \
|
||||
linux/gpio.h \
|
||||
linux/gpio/consumer.h \
|
||||
linux/interconnect.h \
|
||||
linux/pm_runtime.h \
|
||||
linux/clk.h \
|
||||
linux/clk-provider.h \
|
||||
linux/ioasid.h \
|
||||
linux/stdarg.h \
|
||||
linux/iosys-map.h \
|
||||
asm/coco.h \
|
||||
linux/vfio_pci_core.h \
|
||||
linux/mdev.h \
|
||||
soc/tegra/bpmp-abi.h \
|
||||
soc/tegra/bpmp.h \
|
||||
linux/sync_file.h \
|
||||
linux/cc_platform.h \
|
||||
asm/cpufeature.h
|
||||
include $(src)/header-presence-tests.mk
|
||||
|
||||
# Filename to store the define for the header in $(1); this is only consumed by
|
||||
# the rule below that concatenates all of these together.
|
||||
|
@ -57,12 +57,15 @@ else
|
||||
-e 's/armv[0-7]\w\+/arm/' \
|
||||
-e 's/aarch64/arm64/' \
|
||||
-e 's/ppc64le/powerpc/' \
|
||||
-e 's/riscv64/riscv/' \
|
||||
)
|
||||
endif
|
||||
|
||||
NV_KERNEL_MODULES ?= $(wildcard nvidia nvidia-uvm nvidia-vgpu-vfio nvidia-modeset nvidia-drm nvidia-peermem)
|
||||
NV_KERNEL_MODULES := $(filter-out $(NV_EXCLUDE_KERNEL_MODULES), \
|
||||
$(NV_KERNEL_MODULES))
|
||||
INSTALL_MOD_DIR ?= kernel/drivers/video
|
||||
|
||||
NV_VERBOSE ?=
|
||||
SPECTRE_V2_RETPOLINE ?= 0
|
||||
|
||||
@ -74,7 +77,7 @@ else
|
||||
KBUILD_PARAMS += NV_KERNEL_SOURCES=$(KERNEL_SOURCES)
|
||||
KBUILD_PARAMS += NV_KERNEL_OUTPUT=$(KERNEL_OUTPUT)
|
||||
KBUILD_PARAMS += NV_KERNEL_MODULES="$(NV_KERNEL_MODULES)"
|
||||
KBUILD_PARAMS += INSTALL_MOD_DIR=kernel/drivers/video
|
||||
KBUILD_PARAMS += INSTALL_MOD_DIR="$(INSTALL_MOD_DIR)"
|
||||
KBUILD_PARAMS += NV_SPECTRE_V2=$(SPECTRE_V2_RETPOLINE)
|
||||
|
||||
.PHONY: modules module clean clean_conftest modules_install
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2020-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@ -39,5 +39,6 @@
|
||||
#define NV_ESC_QUERY_DEVICE_INTR (NV_IOCTL_BASE + 13)
|
||||
#define NV_ESC_SYS_PARAMS (NV_IOCTL_BASE + 14)
|
||||
#define NV_ESC_EXPORT_TO_DMABUF_FD (NV_IOCTL_BASE + 17)
|
||||
#define NV_ESC_WAIT_OPEN_COMPLETE (NV_IOCTL_BASE + 18)
|
||||
|
||||
#endif
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2020-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@ -142,4 +142,10 @@ typedef struct nv_ioctl_export_to_dma_buf_fd
|
||||
NvU32 status;
|
||||
} nv_ioctl_export_to_dma_buf_fd_t;
|
||||
|
||||
typedef struct nv_ioctl_wait_open_complete
|
||||
{
|
||||
int rc;
|
||||
NvU32 adapterStatus;
|
||||
} nv_ioctl_wait_open_complete_t;
|
||||
|
||||
#endif
|
||||
|
@ -35,6 +35,7 @@
|
||||
#include "os-interface.h"
|
||||
#include "nv-timer.h"
|
||||
#include "nv-time.h"
|
||||
#include "nv-chardev-numbers.h"
|
||||
|
||||
#define NV_KERNEL_NAME "Linux"
|
||||
|
||||
@ -406,37 +407,6 @@ extern int nv_pat_mode;
|
||||
#define NV_GFP_DMA32 (NV_GFP_KERNEL)
|
||||
#endif
|
||||
|
||||
extern NvBool nvos_is_chipset_io_coherent(void);
|
||||
|
||||
#if defined(NVCPU_X86_64)
|
||||
#define CACHE_FLUSH() asm volatile("wbinvd":::"memory")
|
||||
#define WRITE_COMBINE_FLUSH() asm volatile("sfence":::"memory")
|
||||
#elif defined(NVCPU_AARCH64)
|
||||
static inline void nv_flush_cache_cpu(void *info)
|
||||
{
|
||||
if (!nvos_is_chipset_io_coherent())
|
||||
{
|
||||
#if defined(NV_FLUSH_CACHE_ALL_PRESENT)
|
||||
flush_cache_all();
|
||||
#else
|
||||
WARN_ONCE(0, "NVRM: kernel does not support flush_cache_all()\n");
|
||||
#endif
|
||||
}
|
||||
}
|
||||
#define CACHE_FLUSH() nv_flush_cache_cpu(NULL)
|
||||
#define CACHE_FLUSH_ALL() on_each_cpu(nv_flush_cache_cpu, NULL, 1)
|
||||
#define WRITE_COMBINE_FLUSH() mb()
|
||||
#elif defined(NVCPU_PPC64LE)
|
||||
#define CACHE_FLUSH() asm volatile("sync; \n" \
|
||||
"isync; \n" ::: "memory")
|
||||
#define WRITE_COMBINE_FLUSH() CACHE_FLUSH()
|
||||
#elif defined(NVCPU_RISCV64)
|
||||
#define CACHE_FLUSH() mb()
|
||||
#define WRITE_COMBINE_FLUSH() CACHE_FLUSH()
|
||||
#else
|
||||
#error "CACHE_FLUSH() and WRITE_COMBINE_FLUSH() need to be defined for this architecture."
|
||||
#endif
|
||||
|
||||
typedef enum
|
||||
{
|
||||
NV_MEMORY_TYPE_SYSTEM, /* Memory mapped for ROM, SBIOS and physical RAM. */
|
||||
@ -1380,7 +1350,19 @@ typedef struct nv_dma_map_s {
|
||||
i < dm->mapping.discontig.submap_count; \
|
||||
i++, sm = &dm->mapping.discontig.submaps[i])
|
||||
|
||||
/*
|
||||
* On 4K ARM kernels, use max submap size a multiple of 64K to keep nv-p2p happy.
|
||||
* Despite 4K OS pages, we still use 64K P2P pages due to dependent modules still using 64K.
|
||||
* Instead of using (4G-4K), use max submap size as (4G-64K) since the mapped IOVA range
|
||||
* must be aligned at 64K boundary.
|
||||
*/
|
||||
#if defined(CONFIG_ARM64_4K_PAGES)
|
||||
#define NV_DMA_U32_MAX_4K_PAGES ((NvU32)((NV_U32_MAX >> PAGE_SHIFT) + 1))
|
||||
#define NV_DMA_SUBMAP_MAX_PAGES ((NvU32)(NV_DMA_U32_MAX_4K_PAGES - 16))
|
||||
#else
|
||||
#define NV_DMA_SUBMAP_MAX_PAGES ((NvU32)(NV_U32_MAX >> PAGE_SHIFT))
|
||||
#endif
|
||||
|
||||
#define NV_DMA_SUBMAP_IDX_TO_PAGE_IDX(s) (s * NV_DMA_SUBMAP_MAX_PAGES)
|
||||
|
||||
/*
|
||||
@ -1460,6 +1442,11 @@ typedef struct coherent_link_info_s {
|
||||
* baremetal OS environment it is System Physical Address(SPA) and in the case
|
||||
* of virutalized OS environment it is Intermediate Physical Address(IPA) */
|
||||
NvU64 gpu_mem_pa;
|
||||
|
||||
/* Physical address of the reserved portion of the GPU memory, applicable
|
||||
* only in Grace Hopper self hosted passthrough virtualizatioan platform. */
|
||||
NvU64 rsvd_mem_pa;
|
||||
|
||||
/* Bitmap of NUMA node ids, corresponding to the reserved PXMs,
|
||||
* available for adding GPU memory to the kernel as system RAM */
|
||||
DECLARE_BITMAP(free_node_bitmap, MAX_NUMNODES);
|
||||
@ -1607,6 +1594,26 @@ typedef struct nv_linux_state_s {
|
||||
|
||||
struct nv_dma_device dma_dev;
|
||||
struct nv_dma_device niso_dma_dev;
|
||||
|
||||
/*
|
||||
* Background kthread for handling deferred open operations
|
||||
* (e.g. from O_NONBLOCK).
|
||||
*
|
||||
* Adding to open_q and reading/writing is_accepting_opens
|
||||
* are protected by nvl->open_q_lock (not nvl->ldata_lock).
|
||||
* This allows new deferred open operations to be enqueued without
|
||||
* blocking behind previous ones (which hold nvl->ldata_lock).
|
||||
*
|
||||
* Adding to open_q is only safe if is_accepting_opens is true.
|
||||
* This prevents open operations from racing with device removal.
|
||||
*
|
||||
* Stopping open_q is only safe after setting is_accepting_opens to false.
|
||||
* This ensures that the open_q (and the larger nvl structure) will
|
||||
* outlive any of the open operations enqueued.
|
||||
*/
|
||||
nv_kthread_q_t open_q;
|
||||
NvBool is_accepting_opens;
|
||||
struct semaphore open_q_lock;
|
||||
} nv_linux_state_t;
|
||||
|
||||
extern nv_linux_state_t *nv_linux_devices;
|
||||
@ -1656,7 +1663,7 @@ typedef struct
|
||||
|
||||
nvidia_stack_t *sp;
|
||||
nv_alloc_t *free_list;
|
||||
void *nvptr;
|
||||
nv_linux_state_t *nvptr;
|
||||
nvidia_event_t *event_data_head, *event_data_tail;
|
||||
NvBool dataless_event_pending;
|
||||
nv_spinlock_t fp_lock;
|
||||
@ -1667,6 +1674,12 @@ typedef struct
|
||||
nv_alloc_mapping_context_t mmap_context;
|
||||
struct address_space mapping;
|
||||
|
||||
nv_kthread_q_item_t open_q_item;
|
||||
struct completion open_complete;
|
||||
nv_linux_state_t *deferred_open_nvl;
|
||||
int open_rc;
|
||||
NV_STATUS adapter_status;
|
||||
|
||||
struct list_head entry;
|
||||
} nv_linux_file_private_t;
|
||||
|
||||
@ -1675,6 +1688,21 @@ static inline nv_linux_file_private_t *nv_get_nvlfp_from_nvfp(nv_file_private_t
|
||||
return container_of(nvfp, nv_linux_file_private_t, nvfp);
|
||||
}
|
||||
|
||||
static inline int nv_wait_open_complete_interruptible(nv_linux_file_private_t *nvlfp)
|
||||
{
|
||||
return wait_for_completion_interruptible(&nvlfp->open_complete);
|
||||
}
|
||||
|
||||
static inline void nv_wait_open_complete(nv_linux_file_private_t *nvlfp)
|
||||
{
|
||||
wait_for_completion(&nvlfp->open_complete);
|
||||
}
|
||||
|
||||
static inline NvBool nv_is_open_complete(nv_linux_file_private_t *nvlfp)
|
||||
{
|
||||
return completion_done(&nvlfp->open_complete);
|
||||
}
|
||||
|
||||
#define NV_SET_FILE_PRIVATE(filep,data) ((filep)->private_data = (data))
|
||||
#define NV_GET_LINUX_FILE_PRIVATE(filep) ((nv_linux_file_private_t *)(filep)->private_data)
|
||||
|
||||
@ -1756,12 +1784,18 @@ static inline NV_STATUS nv_check_gpu_state(nv_state_t *nv)
|
||||
extern NvU32 NVreg_EnableUserNUMAManagement;
|
||||
extern NvU32 NVreg_RegisterPCIDriver;
|
||||
extern NvU32 NVreg_EnableResizableBar;
|
||||
extern NvU32 NVreg_EnableNonblockingOpen;
|
||||
|
||||
extern NvU32 num_probed_nv_devices;
|
||||
extern NvU32 num_nv_devices;
|
||||
|
||||
#define NV_FILE_INODE(file) (file)->f_inode
|
||||
|
||||
static inline int nv_is_control_device(struct inode *inode)
|
||||
{
|
||||
return (minor((inode)->i_rdev) == NV_MINOR_DEVICE_NUMBER_CONTROL_DEVICE);
|
||||
}
|
||||
|
||||
#if defined(NV_DOM0_KERNEL_PRESENT) || defined(NV_VGPU_KVM_BUILD)
|
||||
#define NV_VGX_HYPER
|
||||
#if defined(NV_XEN_IOEMU_INJECT_MSI)
|
||||
@ -2040,4 +2074,7 @@ typedef enum
|
||||
#include <linux/clk-provider.h>
|
||||
#endif
|
||||
|
||||
#define NV_EXPORT_SYMBOL(symbol) EXPORT_SYMBOL_GPL(symbol)
|
||||
#define NV_CHECK_EXPORT_SYMBOL(symbol) NV_IS_EXPORT_SYMBOL_PRESENT_##symbol
|
||||
|
||||
#endif /* _NV_LINUX_H_ */
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2017 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2017-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@ -37,6 +37,7 @@
|
||||
|
||||
#if defined(CONFIG_PREEMPT_RT) || defined(CONFIG_PREEMPT_RT_FULL)
|
||||
typedef raw_spinlock_t nv_spinlock_t;
|
||||
#define NV_DEFINE_SPINLOCK(lock) DEFINE_RAW_SPINLOCK(lock)
|
||||
#define NV_SPIN_LOCK_INIT(lock) raw_spin_lock_init(lock)
|
||||
#define NV_SPIN_LOCK_IRQ(lock) raw_spin_lock_irq(lock)
|
||||
#define NV_SPIN_UNLOCK_IRQ(lock) raw_spin_unlock_irq(lock)
|
||||
@ -47,6 +48,7 @@ typedef raw_spinlock_t nv_spinlock_t;
|
||||
#define NV_SPIN_UNLOCK_WAIT(lock) raw_spin_unlock_wait(lock)
|
||||
#else
|
||||
typedef spinlock_t nv_spinlock_t;
|
||||
#define NV_DEFINE_SPINLOCK(lock) DEFINE_SPINLOCK(lock)
|
||||
#define NV_SPIN_LOCK_INIT(lock) spin_lock_init(lock)
|
||||
#define NV_SPIN_LOCK_IRQ(lock) spin_lock_irq(lock)
|
||||
#define NV_SPIN_UNLOCK_IRQ(lock) spin_unlock_irq(lock)
|
||||
|
@ -44,12 +44,18 @@ typedef int vm_fault_t;
|
||||
|
||||
#include <linux/mm.h>
|
||||
#include <linux/sched.h>
|
||||
#if defined(NV_PIN_USER_PAGES_PRESENT)
|
||||
|
||||
/*
|
||||
* FreeBSD's pin_user_pages's conftest breaks since pin_user_pages is an inline
|
||||
* function. Because it simply maps to get_user_pages, we can just replace
|
||||
* NV_PIN_USER_PAGES with NV_GET_USER_PAGES on FreeBSD
|
||||
*/
|
||||
#if defined(NV_PIN_USER_PAGES_PRESENT) && !defined(NV_BSD)
|
||||
#if defined(NV_PIN_USER_PAGES_HAS_ARGS_VMAS)
|
||||
#define NV_PIN_USER_PAGES pin_user_pages
|
||||
#define NV_PIN_USER_PAGES(start, nr_pages, gup_flags, pages) \
|
||||
pin_user_pages(start, nr_pages, gup_flags, pages, NULL)
|
||||
#else
|
||||
#define NV_PIN_USER_PAGES(start, nr_pages, gup_flags, pages, vmas) \
|
||||
pin_user_pages(start, nr_pages, gup_flags, pages)
|
||||
#define NV_PIN_USER_PAGES pin_user_pages
|
||||
#endif // NV_PIN_USER_PAGES_HAS_ARGS_VMAS
|
||||
#define NV_UNPIN_USER_PAGE unpin_user_page
|
||||
#else
|
||||
@ -80,29 +86,28 @@ typedef int vm_fault_t;
|
||||
*/
|
||||
|
||||
#if defined(NV_GET_USER_PAGES_HAS_ARGS_FLAGS)
|
||||
#define NV_GET_USER_PAGES(start, nr_pages, flags, pages, vmas) \
|
||||
get_user_pages(start, nr_pages, flags, pages)
|
||||
#elif defined(NV_GET_USER_PAGES_HAS_ARGS_FLAGS_VMAS)
|
||||
#define NV_GET_USER_PAGES get_user_pages
|
||||
#elif defined(NV_GET_USER_PAGES_HAS_ARGS_FLAGS_VMAS)
|
||||
#define NV_GET_USER_PAGES(start, nr_pages, flags, pages) \
|
||||
get_user_pages(start, nr_pages, flags, pages, NULL)
|
||||
#elif defined(NV_GET_USER_PAGES_HAS_ARGS_TSK_FLAGS_VMAS)
|
||||
#define NV_GET_USER_PAGES(start, nr_pages, flags, pages, vmas) \
|
||||
get_user_pages(current, current->mm, start, nr_pages, flags, pages, vmas)
|
||||
#define NV_GET_USER_PAGES(start, nr_pages, flags, pages) \
|
||||
get_user_pages(current, current->mm, start, nr_pages, flags, pages, NULL)
|
||||
#else
|
||||
static inline long NV_GET_USER_PAGES(unsigned long start,
|
||||
unsigned long nr_pages,
|
||||
unsigned int flags,
|
||||
struct page **pages,
|
||||
struct vm_area_struct **vmas)
|
||||
struct page **pages)
|
||||
{
|
||||
int write = flags & FOLL_WRITE;
|
||||
int force = flags & FOLL_FORCE;
|
||||
|
||||
#if defined(NV_GET_USER_PAGES_HAS_ARGS_WRITE_FORCE_VMAS)
|
||||
return get_user_pages(start, nr_pages, write, force, pages, vmas);
|
||||
return get_user_pages(start, nr_pages, write, force, pages, NULL);
|
||||
#else
|
||||
// NV_GET_USER_PAGES_HAS_ARGS_TSK_WRITE_FORCE_VMAS
|
||||
return get_user_pages(current, current->mm, start, nr_pages, write,
|
||||
force, pages, vmas);
|
||||
force, pages, NULL);
|
||||
#endif // NV_GET_USER_PAGES_HAS_ARGS_WRITE_FORCE_VMAS
|
||||
}
|
||||
#endif // NV_GET_USER_PAGES_HAS_ARGS_FLAGS
|
||||
@ -124,13 +129,13 @@ typedef int vm_fault_t;
|
||||
|
||||
#if defined(NV_PIN_USER_PAGES_REMOTE_PRESENT)
|
||||
#if defined(NV_PIN_USER_PAGES_REMOTE_HAS_ARGS_TSK_VMAS)
|
||||
#define NV_PIN_USER_PAGES_REMOTE(mm, start, nr_pages, flags, pages, vmas, locked) \
|
||||
pin_user_pages_remote(NULL, mm, start, nr_pages, flags, pages, vmas, locked)
|
||||
#define NV_PIN_USER_PAGES_REMOTE(mm, start, nr_pages, flags, pages, locked) \
|
||||
pin_user_pages_remote(NULL, mm, start, nr_pages, flags, pages, NULL, locked)
|
||||
#elif defined(NV_PIN_USER_PAGES_REMOTE_HAS_ARGS_VMAS)
|
||||
#define NV_PIN_USER_PAGES_REMOTE pin_user_pages_remote
|
||||
#define NV_PIN_USER_PAGES_REMOTE(mm, start, nr_pages, flags, pages, locked) \
|
||||
pin_user_pages_remote(mm, start, nr_pages, flags, pages, NULL, locked)
|
||||
#else
|
||||
#define NV_PIN_USER_PAGES_REMOTE(mm, start, nr_pages, flags, pages, vmas, locked) \
|
||||
pin_user_pages_remote(mm, start, nr_pages, flags, pages, locked)
|
||||
#define NV_PIN_USER_PAGES_REMOTE pin_user_pages_remote
|
||||
#endif // NV_PIN_USER_PAGES_REMOTE_HAS_ARGS_TSK_VMAS
|
||||
#else
|
||||
#define NV_PIN_USER_PAGES_REMOTE NV_GET_USER_PAGES_REMOTE
|
||||
@ -166,19 +171,19 @@ typedef int vm_fault_t;
|
||||
|
||||
#if defined(NV_GET_USER_PAGES_REMOTE_PRESENT)
|
||||
#if defined(NV_GET_USER_PAGES_REMOTE_HAS_ARGS_FLAGS_LOCKED)
|
||||
#define NV_GET_USER_PAGES_REMOTE(mm, start, nr_pages, flags, pages, vmas, locked) \
|
||||
get_user_pages_remote(mm, start, nr_pages, flags, pages, locked)
|
||||
|
||||
#elif defined(NV_GET_USER_PAGES_REMOTE_HAS_ARGS_FLAGS_LOCKED_VMAS)
|
||||
#define NV_GET_USER_PAGES_REMOTE get_user_pages_remote
|
||||
|
||||
#elif defined(NV_GET_USER_PAGES_REMOTE_HAS_ARGS_FLAGS_LOCKED_VMAS)
|
||||
#define NV_GET_USER_PAGES_REMOTE(mm, start, nr_pages, flags, pages, locked) \
|
||||
get_user_pages_remote(mm, start, nr_pages, flags, pages, NULL, locked)
|
||||
|
||||
#elif defined(NV_GET_USER_PAGES_REMOTE_HAS_ARGS_TSK_FLAGS_LOCKED_VMAS)
|
||||
#define NV_GET_USER_PAGES_REMOTE(mm, start, nr_pages, flags, pages, vmas, locked) \
|
||||
get_user_pages_remote(NULL, mm, start, nr_pages, flags, pages, vmas, locked)
|
||||
#define NV_GET_USER_PAGES_REMOTE(mm, start, nr_pages, flags, pages, locked) \
|
||||
get_user_pages_remote(NULL, mm, start, nr_pages, flags, pages, NULL, locked)
|
||||
|
||||
#elif defined(NV_GET_USER_PAGES_REMOTE_HAS_ARGS_TSK_FLAGS_VMAS)
|
||||
#define NV_GET_USER_PAGES_REMOTE(mm, start, nr_pages, flags, pages, vmas, locked) \
|
||||
get_user_pages_remote(NULL, mm, start, nr_pages, flags, pages, vmas)
|
||||
#define NV_GET_USER_PAGES_REMOTE(mm, start, nr_pages, flags, pages, locked) \
|
||||
get_user_pages_remote(NULL, mm, start, nr_pages, flags, pages, NULL)
|
||||
|
||||
#else
|
||||
// NV_GET_USER_PAGES_REMOTE_HAS_ARGS_TSK_WRITE_FORCE_VMAS
|
||||
@ -187,14 +192,13 @@ typedef int vm_fault_t;
|
||||
unsigned long nr_pages,
|
||||
unsigned int flags,
|
||||
struct page **pages,
|
||||
struct vm_area_struct **vmas,
|
||||
int *locked)
|
||||
{
|
||||
int write = flags & FOLL_WRITE;
|
||||
int force = flags & FOLL_FORCE;
|
||||
|
||||
return get_user_pages_remote(NULL, mm, start, nr_pages, write, force,
|
||||
pages, vmas);
|
||||
pages, NULL);
|
||||
}
|
||||
#endif // NV_GET_USER_PAGES_REMOTE_HAS_ARGS_FLAGS_LOCKED
|
||||
#else
|
||||
@ -204,18 +208,17 @@ typedef int vm_fault_t;
|
||||
unsigned long nr_pages,
|
||||
unsigned int flags,
|
||||
struct page **pages,
|
||||
struct vm_area_struct **vmas,
|
||||
int *locked)
|
||||
{
|
||||
int write = flags & FOLL_WRITE;
|
||||
int force = flags & FOLL_FORCE;
|
||||
|
||||
return get_user_pages(NULL, mm, start, nr_pages, write, force, pages, vmas);
|
||||
return get_user_pages(NULL, mm, start, nr_pages, write, force, pages, NULL);
|
||||
}
|
||||
|
||||
#else
|
||||
#define NV_GET_USER_PAGES_REMOTE(mm, start, nr_pages, flags, pages, vmas, locked) \
|
||||
get_user_pages(NULL, mm, start, nr_pages, flags, pages, vmas)
|
||||
#define NV_GET_USER_PAGES_REMOTE(mm, start, nr_pages, flags, pages, locked) \
|
||||
get_user_pages(NULL, mm, start, nr_pages, flags, pages, NULL)
|
||||
#endif // NV_GET_USER_PAGES_HAS_ARGS_TSK_WRITE_FORCE_VMAS
|
||||
#endif // NV_GET_USER_PAGES_REMOTE_PRESENT
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2015 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2015-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@ -60,6 +60,7 @@ static inline pgprot_t pgprot_modify_writecombine(pgprot_t old_prot)
|
||||
#endif /* !defined(NV_VMWARE) */
|
||||
|
||||
#if defined(NVCPU_AARCH64)
|
||||
extern NvBool nvos_is_chipset_io_coherent(void);
|
||||
/*
|
||||
* Don't rely on the kernel's definition of pgprot_noncached(), as on 64-bit
|
||||
* ARM that's not for system memory, but device memory instead. For I/O cache
|
||||
|
@ -92,6 +92,24 @@ typedef struct file_operations nv_proc_ops_t;
|
||||
#endif
|
||||
|
||||
#define NV_DEFINE_SINGLE_PROCFS_FILE_HELPER(name, lock) \
|
||||
static ssize_t nv_procfs_read_lock_##name( \
|
||||
struct file *file, \
|
||||
char __user *buf, \
|
||||
size_t size, \
|
||||
loff_t *ppos \
|
||||
) \
|
||||
{ \
|
||||
int ret; \
|
||||
ret = nv_down_read_interruptible(&lock); \
|
||||
if (ret < 0) \
|
||||
{ \
|
||||
return ret; \
|
||||
} \
|
||||
size = seq_read(file, buf, size, ppos); \
|
||||
up_read(&lock); \
|
||||
return size; \
|
||||
} \
|
||||
\
|
||||
static int nv_procfs_open_##name( \
|
||||
struct inode *inode, \
|
||||
struct file *filep \
|
||||
@ -104,11 +122,6 @@ typedef struct file_operations nv_proc_ops_t;
|
||||
{ \
|
||||
return ret; \
|
||||
} \
|
||||
ret = nv_down_read_interruptible(&lock); \
|
||||
if (ret < 0) \
|
||||
{ \
|
||||
single_release(inode, filep); \
|
||||
} \
|
||||
return ret; \
|
||||
} \
|
||||
\
|
||||
@ -117,7 +130,6 @@ typedef struct file_operations nv_proc_ops_t;
|
||||
struct file *filep \
|
||||
) \
|
||||
{ \
|
||||
up_read(&lock); \
|
||||
return single_release(inode, filep); \
|
||||
}
|
||||
|
||||
@ -127,46 +139,7 @@ typedef struct file_operations nv_proc_ops_t;
|
||||
static const nv_proc_ops_t nv_procfs_##name##_fops = { \
|
||||
NV_PROC_OPS_SET_OWNER() \
|
||||
.NV_PROC_OPS_OPEN = nv_procfs_open_##name, \
|
||||
.NV_PROC_OPS_READ = seq_read, \
|
||||
.NV_PROC_OPS_LSEEK = seq_lseek, \
|
||||
.NV_PROC_OPS_RELEASE = nv_procfs_release_##name, \
|
||||
};
|
||||
|
||||
|
||||
#define NV_DEFINE_SINGLE_PROCFS_FILE_READ_WRITE(name, lock, \
|
||||
write_callback) \
|
||||
NV_DEFINE_SINGLE_PROCFS_FILE_HELPER(name, lock) \
|
||||
\
|
||||
static ssize_t nv_procfs_write_##name( \
|
||||
struct file *file, \
|
||||
const char __user *buf, \
|
||||
size_t size, \
|
||||
loff_t *ppos \
|
||||
) \
|
||||
{ \
|
||||
ssize_t ret; \
|
||||
struct seq_file *s; \
|
||||
\
|
||||
s = file->private_data; \
|
||||
if (s == NULL) \
|
||||
{ \
|
||||
return -EIO; \
|
||||
} \
|
||||
\
|
||||
ret = write_callback(s, buf + *ppos, size - *ppos); \
|
||||
if (ret == 0) \
|
||||
{ \
|
||||
/* avoid infinite loop */ \
|
||||
ret = -EIO; \
|
||||
} \
|
||||
return ret; \
|
||||
} \
|
||||
\
|
||||
static const nv_proc_ops_t nv_procfs_##name##_fops = { \
|
||||
NV_PROC_OPS_SET_OWNER() \
|
||||
.NV_PROC_OPS_OPEN = nv_procfs_open_##name, \
|
||||
.NV_PROC_OPS_READ = seq_read, \
|
||||
.NV_PROC_OPS_WRITE = nv_procfs_write_##name, \
|
||||
.NV_PROC_OPS_READ = nv_procfs_read_lock_##name, \
|
||||
.NV_PROC_OPS_LSEEK = seq_lseek, \
|
||||
.NV_PROC_OPS_RELEASE = nv_procfs_release_##name, \
|
||||
};
|
||||
|
@ -88,4 +88,7 @@ int nv_linux_add_device_locked(nv_linux_state_t *);
|
||||
void nv_linux_remove_device_locked(nv_linux_state_t *);
|
||||
NvBool nv_acpi_power_resource_method_present(struct pci_dev *);
|
||||
|
||||
int nv_linux_init_open_q(nv_linux_state_t *);
|
||||
void nv_linux_stop_open_q(nv_linux_state_t *);
|
||||
|
||||
#endif /* _NV_PROTO_H_ */
|
||||
|
@ -221,7 +221,6 @@ typedef struct
|
||||
#define NV_RM_PAGE_MASK (NV_RM_PAGE_SIZE - 1)
|
||||
|
||||
#define NV_RM_TO_OS_PAGE_SHIFT (os_page_shift - NV_RM_PAGE_SHIFT)
|
||||
#define NV_RM_PAGES_PER_OS_PAGE (1U << NV_RM_TO_OS_PAGE_SHIFT)
|
||||
#define NV_RM_PAGES_TO_OS_PAGES(count) \
|
||||
((((NvUPtr)(count)) >> NV_RM_TO_OS_PAGE_SHIFT) + \
|
||||
((((count) & ((1 << NV_RM_TO_OS_PAGE_SHIFT) - 1)) != 0) ? 1 : 0))
|
||||
@ -467,12 +466,6 @@ typedef struct nv_state_t
|
||||
NvHandle hDisp;
|
||||
} rmapi;
|
||||
|
||||
/* Bool to check if ISO iommu enabled */
|
||||
NvBool iso_iommu_present;
|
||||
|
||||
/* Bool to check if NISO iommu enabled */
|
||||
NvBool niso_iommu_present;
|
||||
|
||||
/* Bool to check if dma-buf is supported */
|
||||
NvBool dma_buf_supported;
|
||||
|
||||
@ -484,6 +477,22 @@ typedef struct nv_state_t
|
||||
|
||||
/* Bool to check if the GPU has a coherent sysmem link */
|
||||
NvBool coherent;
|
||||
|
||||
/*
|
||||
* NUMA node ID of the CPU to which the GPU is attached.
|
||||
* Holds NUMA_NO_NODE on platforms that don't support NUMA configuration.
|
||||
*/
|
||||
NvS32 cpu_numa_node_id;
|
||||
|
||||
struct {
|
||||
/* Bool to check if ISO iommu enabled */
|
||||
NvBool iso_iommu_present;
|
||||
/* Bool to check if NISO iommu enabled */
|
||||
NvBool niso_iommu_present;
|
||||
/* Display SMMU Stream IDs */
|
||||
NvU32 dispIsoStreamId;
|
||||
NvU32 dispNisoStreamId;
|
||||
} iommus;
|
||||
} nv_state_t;
|
||||
|
||||
// These define need to be in sync with defines in system.h
|
||||
@ -613,10 +622,10 @@ typedef enum
|
||||
(((nv)->flags & NV_FLAG_IN_SURPRISE_REMOVAL) != 0)
|
||||
|
||||
#define NV_SOC_IS_ISO_IOMMU_PRESENT(nv) \
|
||||
((nv)->iso_iommu_present)
|
||||
((nv)->iommus.iso_iommu_present)
|
||||
|
||||
#define NV_SOC_IS_NISO_IOMMU_PRESENT(nv) \
|
||||
((nv)->niso_iommu_present)
|
||||
((nv)->iommus.niso_iommu_present)
|
||||
/*
|
||||
* GPU add/remove events
|
||||
*/
|
||||
@ -779,8 +788,6 @@ NV_STATUS NV_API_CALL nv_register_phys_pages (nv_state_t *, NvU64 *, NvU64,
|
||||
void NV_API_CALL nv_unregister_phys_pages (nv_state_t *, void *);
|
||||
|
||||
NV_STATUS NV_API_CALL nv_dma_map_sgt (nv_dma_device_t *, NvU64, NvU64 *, NvU32, void **);
|
||||
NV_STATUS NV_API_CALL nv_dma_map_pages (nv_dma_device_t *, NvU64, NvU64 *, NvBool, NvU32, void **);
|
||||
NV_STATUS NV_API_CALL nv_dma_unmap_pages (nv_dma_device_t *, NvU64, NvU64 *, void **);
|
||||
|
||||
NV_STATUS NV_API_CALL nv_dma_map_alloc (nv_dma_device_t *, NvU64, NvU64 *, NvBool, void **);
|
||||
NV_STATUS NV_API_CALL nv_dma_unmap_alloc (nv_dma_device_t *, NvU64, NvU64 *, void **);
|
||||
@ -830,7 +837,7 @@ void NV_API_CALL nv_put_firmware(const void *);
|
||||
nv_file_private_t* NV_API_CALL nv_get_file_private(NvS32, NvBool, void **);
|
||||
void NV_API_CALL nv_put_file_private(void *);
|
||||
|
||||
NV_STATUS NV_API_CALL nv_get_device_memory_config(nv_state_t *, NvU64 *, NvU64 *, NvU32 *, NvS32 *);
|
||||
NV_STATUS NV_API_CALL nv_get_device_memory_config(nv_state_t *, NvU64 *, NvU64 *, NvU64 *, NvU32 *, NvS32 *);
|
||||
NV_STATUS NV_API_CALL nv_get_egm_info(nv_state_t *, NvU64 *, NvU64 *, NvS32 *);
|
||||
|
||||
NV_STATUS NV_API_CALL nv_get_ibmnpu_genreg_info(nv_state_t *, NvU64 *, NvU64 *, void**);
|
||||
@ -877,9 +884,9 @@ struct drm_gem_object;
|
||||
|
||||
NV_STATUS NV_API_CALL nv_dma_import_sgt (nv_dma_device_t *, struct sg_table *, struct drm_gem_object *);
|
||||
void NV_API_CALL nv_dma_release_sgt(struct sg_table *, struct drm_gem_object *);
|
||||
NV_STATUS NV_API_CALL nv_dma_import_dma_buf (nv_dma_device_t *, struct dma_buf *, NvU32 *, void **, struct sg_table **, nv_dma_buf_t **);
|
||||
NV_STATUS NV_API_CALL nv_dma_import_from_fd (nv_dma_device_t *, NvS32, NvU32 *, void **, struct sg_table **, nv_dma_buf_t **);
|
||||
void NV_API_CALL nv_dma_release_dma_buf (void *, nv_dma_buf_t *);
|
||||
NV_STATUS NV_API_CALL nv_dma_import_dma_buf (nv_dma_device_t *, struct dma_buf *, NvU32 *, struct sg_table **, nv_dma_buf_t **);
|
||||
NV_STATUS NV_API_CALL nv_dma_import_from_fd (nv_dma_device_t *, NvS32, NvU32 *, struct sg_table **, nv_dma_buf_t **);
|
||||
void NV_API_CALL nv_dma_release_dma_buf (nv_dma_buf_t *);
|
||||
|
||||
void NV_API_CALL nv_schedule_uvm_isr (nv_state_t *);
|
||||
|
||||
@ -895,6 +902,8 @@ typedef void (*nvTegraDceClientIpcCallback)(NvU32, NvU32, NvU32, void *, void *)
|
||||
NV_STATUS NV_API_CALL nv_get_num_phys_pages (void *, NvU32 *);
|
||||
NV_STATUS NV_API_CALL nv_get_phys_pages (void *, void *, NvU32 *);
|
||||
|
||||
void NV_API_CALL nv_get_disp_smmu_stream_ids (nv_state_t *, NvU32 *, NvU32 *);
|
||||
|
||||
/*
|
||||
* ---------------------------------------------------------------------------
|
||||
*
|
||||
@ -921,6 +930,7 @@ NV_STATUS NV_API_CALL rm_ioctl (nvidia_stack_t *, nv_state_t *
|
||||
NvBool NV_API_CALL rm_isr (nvidia_stack_t *, nv_state_t *, NvU32 *);
|
||||
void NV_API_CALL rm_isr_bh (nvidia_stack_t *, nv_state_t *);
|
||||
void NV_API_CALL rm_isr_bh_unlocked (nvidia_stack_t *, nv_state_t *);
|
||||
NvBool NV_API_CALL rm_is_msix_allowed (nvidia_stack_t *, nv_state_t *);
|
||||
NV_STATUS NV_API_CALL rm_power_management (nvidia_stack_t *, nv_state_t *, nv_pm_action_t);
|
||||
NV_STATUS NV_API_CALL rm_stop_user_channels (nvidia_stack_t *, nv_state_t *);
|
||||
NV_STATUS NV_API_CALL rm_restart_user_channels (nvidia_stack_t *, nv_state_t *);
|
||||
@ -940,6 +950,7 @@ void NV_API_CALL rm_parse_option_string (nvidia_stack_t *, const char *
|
||||
char* NV_API_CALL rm_remove_spaces (const char *);
|
||||
char* NV_API_CALL rm_string_token (char **, const char);
|
||||
void NV_API_CALL rm_vgpu_vfio_set_driver_vm(nvidia_stack_t *, NvBool);
|
||||
NV_STATUS NV_API_CALL rm_get_adapter_status_external(nvidia_stack_t *, nv_state_t *);
|
||||
|
||||
NV_STATUS NV_API_CALL rm_run_rc_callback (nvidia_stack_t *, nv_state_t *);
|
||||
void NV_API_CALL rm_execute_work_item (nvidia_stack_t *, void *);
|
||||
|
@ -62,10 +62,10 @@ typedef struct
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceRegisterGpu
|
||||
|
||||
Registers the GPU with the provided UUID for use. A GPU must be registered
|
||||
before its UUID can be used with any other API. This call is ref-counted so
|
||||
every nvUvmInterfaceRegisterGpu must be paired with a corresponding
|
||||
nvUvmInterfaceUnregisterGpu.
|
||||
Registers the GPU with the provided physical UUID for use. A GPU must be
|
||||
registered before its UUID can be used with any other API. This call is
|
||||
ref-counted so every nvUvmInterfaceRegisterGpu must be paired with a
|
||||
corresponding nvUvmInterfaceUnregisterGpu.
|
||||
|
||||
You don't need to call nvUvmInterfaceSessionCreate before calling this.
|
||||
|
||||
@ -79,12 +79,13 @@ NV_STATUS nvUvmInterfaceRegisterGpu(const NvProcessorUuid *gpuUuid, UvmGpuPlatfo
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceUnregisterGpu
|
||||
|
||||
Unregisters the GPU with the provided UUID. This drops the ref count from
|
||||
nvUvmInterfaceRegisterGpu. Once the reference count goes to 0 the device may
|
||||
no longer be accessible until the next nvUvmInterfaceRegisterGpu call. No
|
||||
automatic resource freeing is performed, so only make the last unregister
|
||||
call after destroying all your allocations associated with that UUID (such
|
||||
as those from nvUvmInterfaceAddressSpaceCreate).
|
||||
Unregisters the GPU with the provided physical UUID. This drops the ref
|
||||
count from nvUvmInterfaceRegisterGpu. Once the reference count goes to 0
|
||||
the device may no longer be accessible until the next
|
||||
nvUvmInterfaceRegisterGpu call. No automatic resource freeing is performed,
|
||||
so only make the last unregister call after destroying all your allocations
|
||||
associated with that UUID (such as those from
|
||||
nvUvmInterfaceAddressSpaceCreate).
|
||||
|
||||
If the UUID is not found, no operation is performed.
|
||||
*/
|
||||
@ -121,10 +122,10 @@ NV_STATUS nvUvmInterfaceSessionDestroy(uvmGpuSessionHandle session);
|
||||
nvUvmInterfaceDeviceCreate
|
||||
|
||||
Creates a device object under the given session for the GPU with the given
|
||||
UUID. Also creates a partition object for the device iff bCreateSmcPartition
|
||||
is true and pGpuInfo->smcEnabled is true. pGpuInfo->smcUserClientInfo will
|
||||
be used to determine the SMC partition in this case. A device handle is
|
||||
returned in the device output parameter.
|
||||
physical UUID. Also creates a partition object for the device iff
|
||||
bCreateSmcPartition is true and pGpuInfo->smcEnabled is true.
|
||||
pGpuInfo->smcUserClientInfo will be used to determine the SMC partition in
|
||||
this case. A device handle is returned in the device output parameter.
|
||||
|
||||
Error codes:
|
||||
NV_ERR_GENERIC
|
||||
@ -161,6 +162,7 @@ void nvUvmInterfaceDeviceDestroy(uvmGpuDeviceHandle device);
|
||||
NV_STATUS nvUvmInterfaceAddressSpaceCreate(uvmGpuDeviceHandle device,
|
||||
unsigned long long vaBase,
|
||||
unsigned long long vaSize,
|
||||
NvBool enableAts,
|
||||
uvmGpuAddressSpaceHandle *vaSpace,
|
||||
UvmGpuAddressSpaceInfo *vaSpaceInfo);
|
||||
|
||||
@ -422,33 +424,6 @@ NV_STATUS nvUvmInterfacePmaPinPages(void *pPma,
|
||||
NvU64 pageSize,
|
||||
NvU32 flags);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfacePmaUnpinPages
|
||||
|
||||
This function will unpin the physical memory allocated using PMA. The pages
|
||||
passed as input must be already pinned, else this function will return an
|
||||
error and rollback any change if any page is not previously marked "pinned".
|
||||
Behaviour is undefined if any blacklisted pages are unpinned.
|
||||
|
||||
Arguments:
|
||||
pPma[IN] - Pointer to PMA object.
|
||||
pPages[IN] - Array of pointers, containing the PA base
|
||||
address of each page to be unpinned.
|
||||
pageCount [IN] - Number of pages required to be unpinned.
|
||||
pageSize [IN] - Page size of each page to be unpinned.
|
||||
|
||||
Error codes:
|
||||
NV_ERR_INVALID_ARGUMENT - Invalid input arguments.
|
||||
NV_ERR_GENERIC - Unexpected error. We try hard to avoid
|
||||
returning this error code as is not very
|
||||
informative.
|
||||
NV_ERR_NOT_SUPPORTED - Operation not supported on broken FB
|
||||
*/
|
||||
NV_STATUS nvUvmInterfacePmaUnpinPages(void *pPma,
|
||||
NvU64 *pPages,
|
||||
NvLength pageCount,
|
||||
NvU64 pageSize);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceMemoryFree
|
||||
|
||||
@ -638,6 +613,8 @@ NV_STATUS nvUvmInterfaceQueryCopyEnginesCaps(uvmGpuDeviceHandle device,
|
||||
nvUvmInterfaceGetGpuInfo
|
||||
|
||||
Return various gpu info, refer to the UvmGpuInfo struct for details.
|
||||
The input UUID is for the physical GPU and the pGpuClientInfo identifies
|
||||
the SMC partition if SMC is enabled and the partition exists.
|
||||
If no gpu matching the uuid is found, an error will be returned.
|
||||
|
||||
On Ampere+ GPUs, pGpuClientInfo contains SMC information provided by the
|
||||
@ -645,6 +622,9 @@ NV_STATUS nvUvmInterfaceQueryCopyEnginesCaps(uvmGpuDeviceHandle device,
|
||||
|
||||
Error codes:
|
||||
NV_ERR_GENERIC
|
||||
NV_ERR_NO_MEMORY
|
||||
NV_ERR_GPU_UUID_NOT_FOUND
|
||||
NV_ERR_INSUFFICIENT_PERMISSIONS
|
||||
NV_ERR_INSUFFICIENT_RESOURCES
|
||||
*/
|
||||
NV_STATUS nvUvmInterfaceGetGpuInfo(const NvProcessorUuid *gpuUuid,
|
||||
@ -857,7 +837,7 @@ NV_STATUS nvUvmInterfaceGetEccInfo(uvmGpuDeviceHandle device,
|
||||
UVM GPU UNLOCK
|
||||
|
||||
Arguments:
|
||||
gpuUuid[IN] - UUID of the GPU to operate on
|
||||
device[IN] - Device handle associated with the gpu
|
||||
bOwnInterrupts - Set to NV_TRUE for UVM to take ownership of the
|
||||
replayable page fault interrupts. Set to NV_FALSE
|
||||
to return ownership of the page fault interrupts
|
||||
@ -973,6 +953,7 @@ NV_STATUS nvUvmInterfaceGetNonReplayableFaults(UvmGpuFaultInfo *pFaultInfo,
|
||||
NOTES:
|
||||
- This function DOES NOT acquire the RM API or GPU locks. That is because
|
||||
it is called during fault servicing, which could produce deadlocks.
|
||||
- This function should not be called when interrupts are disabled.
|
||||
|
||||
Arguments:
|
||||
device[IN] - Device handle associated with the gpu
|
||||
@ -982,6 +963,27 @@ NV_STATUS nvUvmInterfaceGetNonReplayableFaults(UvmGpuFaultInfo *pFaultInfo,
|
||||
*/
|
||||
NV_STATUS nvUvmInterfaceFlushReplayableFaultBuffer(uvmGpuDeviceHandle device);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceTogglePrefetchFaults
|
||||
|
||||
This function sends an RPC to GSP in order to toggle the prefetch fault PRI.
|
||||
|
||||
NOTES:
|
||||
- This function DOES NOT acquire the RM API or GPU locks. That is because
|
||||
it is called during fault servicing, which could produce deadlocks.
|
||||
- This function should not be called when interrupts are disabled.
|
||||
|
||||
Arguments:
|
||||
pFaultInfo[IN] - Information provided by RM for fault handling.
|
||||
Used for obtaining the device handle without locks.
|
||||
bEnable[IN] - Instructs RM whether to toggle generating faults on
|
||||
prefetch on/off.
|
||||
|
||||
Error codes:
|
||||
NV_ERR_INVALID_ARGUMENT
|
||||
*/
|
||||
NV_STATUS nvUvmInterfaceTogglePrefetchFaults(UvmGpuFaultInfo *pFaultInfo, NvBool bEnable);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceInitAccessCntrInfo
|
||||
|
||||
@ -1087,7 +1089,8 @@ void nvUvmInterfaceDeRegisterUvmOps(void);
|
||||
|
||||
Error codes:
|
||||
NV_ERR_INVALID_ARGUMENT
|
||||
NV_ERR_OBJECT_NOT_FOUND : If device object associated with the uuids aren't found.
|
||||
NV_ERR_OBJECT_NOT_FOUND : If device object associated with the device
|
||||
handles isn't found.
|
||||
*/
|
||||
NV_STATUS nvUvmInterfaceP2pObjectCreate(uvmGpuDeviceHandle device1,
|
||||
uvmGpuDeviceHandle device2,
|
||||
@ -1140,6 +1143,8 @@ void nvUvmInterfaceP2pObjectDestroy(uvmGpuSessionHandle session,
|
||||
NV_ERR_NOT_READY - Returned when querying the PTEs requires a deferred setup
|
||||
which has not yet completed. It is expected that the caller
|
||||
will reattempt the call until a different code is returned.
|
||||
As an example, multi-node systems which require querying
|
||||
PTEs from the Fabric Manager may return this code.
|
||||
*/
|
||||
NV_STATUS nvUvmInterfaceGetExternalAllocPtes(uvmGpuAddressSpaceHandle vaSpace,
|
||||
NvHandle hMemory,
|
||||
@ -1449,18 +1454,7 @@ NV_STATUS nvUvmInterfacePagingChannelPushStream(UvmGpuPagingChannelHandle channe
|
||||
NvU32 methodStreamSize);
|
||||
|
||||
/*******************************************************************************
|
||||
CSL Interface and Locking
|
||||
|
||||
The following functions do not acquire the RM API or GPU locks and must not be called
|
||||
concurrently with the same UvmCslContext parameter in different threads. The caller must
|
||||
guarantee this exclusion.
|
||||
|
||||
* nvUvmInterfaceCslRotateIv
|
||||
* nvUvmInterfaceCslEncrypt
|
||||
* nvUvmInterfaceCslDecrypt
|
||||
* nvUvmInterfaceCslSign
|
||||
* nvUvmInterfaceCslQueryMessagePool
|
||||
* nvUvmInterfaceCslIncrementIv
|
||||
Cryptography Services Library (CSL) Interface
|
||||
*/
|
||||
|
||||
/*******************************************************************************
|
||||
@ -1471,8 +1465,11 @@ NV_STATUS nvUvmInterfacePagingChannelPushStream(UvmGpuPagingChannelHandle channe
|
||||
The lifetime of the context is the same as the lifetime of the secure channel
|
||||
it is paired with.
|
||||
|
||||
Locking: This function acquires an API lock.
|
||||
Memory : This function dynamically allocates memory.
|
||||
|
||||
Arguments:
|
||||
uvmCslContext[IN/OUT] - The CSL context.
|
||||
uvmCslContext[IN/OUT] - The CSL context associated with a channel.
|
||||
channel[IN] - Handle to a secure channel.
|
||||
|
||||
Error codes:
|
||||
@ -1490,11 +1487,33 @@ NV_STATUS nvUvmInterfaceCslInitContext(UvmCslContext *uvmCslContext,
|
||||
|
||||
If context is already deinitialized then function returns immediately.
|
||||
|
||||
Locking: This function does not acquire an API or GPU lock.
|
||||
Memory : This function may free memory.
|
||||
|
||||
Arguments:
|
||||
uvmCslContext[IN] - The CSL context.
|
||||
uvmCslContext[IN] - The CSL context associated with a channel.
|
||||
*/
|
||||
void nvUvmInterfaceDeinitCslContext(UvmCslContext *uvmCslContext);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceCslUpdateContext
|
||||
|
||||
Updates a context after a key rotation event and can only be called once per
|
||||
key rotation event. Following a key rotation event, and before
|
||||
nvUvmInterfaceCslUpdateContext is called, data encrypted by the GPU with the
|
||||
previous key can be decrypted with nvUvmInterfaceCslDecrypt.
|
||||
|
||||
Locking: This function acquires an API lock.
|
||||
Memory : This function does not dynamically allocate memory.
|
||||
|
||||
Arguments:
|
||||
uvmCslContext[IN] - The CSL context associated with a channel.
|
||||
|
||||
Error codes:
|
||||
NV_ERR_INVALID_ARGUMENT - The CSL context is not associated with a channel.
|
||||
*/
|
||||
NV_STATUS nvUvmInterfaceCslUpdateContext(UvmCslContext *uvmCslContext);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceCslRotateIv
|
||||
|
||||
@ -1509,11 +1528,13 @@ void nvUvmInterfaceDeinitCslContext(UvmCslContext *uvmCslContext);
|
||||
the channel must be idle before calling this function. This function can be
|
||||
called regardless of the value of the IV's message counter.
|
||||
|
||||
See "CSL Interface and Locking" for locking requirements.
|
||||
This function does not perform dynamic memory allocation.
|
||||
Locking: This function attempts to acquire the GPU lock.
|
||||
In case of failure to acquire the return code
|
||||
is NV_ERR_STATE_IN_USE.
|
||||
Memory : This function does not dynamically allocate memory.
|
||||
|
||||
Arguments:
|
||||
uvmCslContext[IN/OUT] - The CSL context.
|
||||
uvmCslContext[IN/OUT] - The CSL context associated with a channel.
|
||||
operation[IN] - Either
|
||||
- UVM_CSL_OPERATION_ENCRYPT
|
||||
- UVM_CSL_OPERATION_DECRYPT
|
||||
@ -1521,7 +1542,11 @@ Arguments:
|
||||
Error codes:
|
||||
NV_ERR_INSUFFICIENT_RESOURCES - The rotate operation would cause a counter
|
||||
to overflow.
|
||||
NV_ERR_STATE_IN_USE - Unable to acquire lock / resource. Caller
|
||||
can retry at a later time.
|
||||
NV_ERR_INVALID_ARGUMENT - Invalid value for operation.
|
||||
NV_ERR_GENERIC - A failure other than _STATE_IN_USE occurred
|
||||
when attempting to acquire a lock.
|
||||
*/
|
||||
NV_STATUS nvUvmInterfaceCslRotateIv(UvmCslContext *uvmCslContext,
|
||||
UvmCslOperation operation);
|
||||
@ -1538,11 +1563,13 @@ NV_STATUS nvUvmInterfaceCslRotateIv(UvmCslContext *uvmCslContext,
|
||||
The encryptIV can be obtained from nvUvmInterfaceCslIncrementIv.
|
||||
However, it is optional. If it is NULL, the next IV in line will be used.
|
||||
|
||||
See "CSL Interface and Locking" for locking requirements.
|
||||
This function does not perform dynamic memory allocation.
|
||||
Locking: This function does not acquire an API or GPU lock.
|
||||
If called concurrently in different threads with the same UvmCslContext
|
||||
the caller must guarantee exclusion.
|
||||
Memory : This function does not dynamically allocate memory.
|
||||
|
||||
Arguments:
|
||||
uvmCslContext[IN/OUT] - The CSL context.
|
||||
uvmCslContext[IN/OUT] - The CSL context associated with a channel.
|
||||
bufferSize[IN] - Size of the input and output buffers in
|
||||
units of bytes. Value can range from 1 byte
|
||||
to (2^32) - 1 bytes.
|
||||
@ -1553,8 +1580,9 @@ Arguments:
|
||||
Its size is UVM_CSL_CRYPT_AUTH_TAG_SIZE_BYTES.
|
||||
|
||||
Error codes:
|
||||
NV_ERR_INVALID_ARGUMENT - The size of the data is 0 bytes.
|
||||
- The encryptIv has already been used.
|
||||
NV_ERR_INVALID_ARGUMENT - The CSL context is not associated with a channel.
|
||||
- The size of the data is 0 bytes.
|
||||
- The encryptIv has already been used.
|
||||
*/
|
||||
NV_STATUS nvUvmInterfaceCslEncrypt(UvmCslContext *uvmCslContext,
|
||||
NvU32 bufferSize,
|
||||
@ -1573,8 +1601,10 @@ NV_STATUS nvUvmInterfaceCslEncrypt(UvmCslContext *uvmCslContext,
|
||||
maximized when the input and output buffers are 16-byte aligned. This is
|
||||
natural alignment for AES block.
|
||||
|
||||
See "CSL Interface and Locking" for locking requirements.
|
||||
This function does not perform dynamic memory allocation.
|
||||
Locking: This function does not acquire an API or GPU lock.
|
||||
If called concurrently in different threads with the same UvmCslContext
|
||||
the caller must guarantee exclusion.
|
||||
Memory : This function does not dynamically allocate memory.
|
||||
|
||||
Arguments:
|
||||
uvmCslContext[IN/OUT] - The CSL context.
|
||||
@ -1616,11 +1646,13 @@ NV_STATUS nvUvmInterfaceCslDecrypt(UvmCslContext *uvmCslContext,
|
||||
Auth and input buffers must not overlap. If they do then calling this function produces
|
||||
undefined behavior.
|
||||
|
||||
See "CSL Interface and Locking" for locking requirements.
|
||||
This function does not perform dynamic memory allocation.
|
||||
Locking: This function does not acquire an API or GPU lock.
|
||||
If called concurrently in different threads with the same UvmCslContext
|
||||
the caller must guarantee exclusion.
|
||||
Memory : This function does not dynamically allocate memory.
|
||||
|
||||
Arguments:
|
||||
uvmCslContext[IN/OUT] - The CSL context.
|
||||
uvmCslContext[IN/OUT] - The CSL context associated with a channel.
|
||||
bufferSize[IN] - Size of the input buffer in units of bytes.
|
||||
Value can range from 1 byte to (2^32) - 1 bytes.
|
||||
inputBuffer[IN] - Address of plaintext input buffer.
|
||||
@ -1629,7 +1661,8 @@ NV_STATUS nvUvmInterfaceCslDecrypt(UvmCslContext *uvmCslContext,
|
||||
|
||||
Error codes:
|
||||
NV_ERR_INSUFFICIENT_RESOURCES - The signing operation would cause a counter overflow to occur.
|
||||
NV_ERR_INVALID_ARGUMENT - The size of the data is 0 bytes.
|
||||
NV_ERR_INVALID_ARGUMENT - The CSL context is not associated with a channel.
|
||||
- The size of the data is 0 bytes.
|
||||
*/
|
||||
NV_STATUS nvUvmInterfaceCslSign(UvmCslContext *uvmCslContext,
|
||||
NvU32 bufferSize,
|
||||
@ -1641,8 +1674,10 @@ NV_STATUS nvUvmInterfaceCslSign(UvmCslContext *uvmCslContext,
|
||||
|
||||
Returns the number of messages that can be encrypted before the message counter will overflow.
|
||||
|
||||
See "CSL Interface and Locking" for locking requirements.
|
||||
This function does not perform dynamic memory allocation.
|
||||
Locking: This function does not acquire an API or GPU lock.
|
||||
Memory : This function does not dynamically allocate memory.
|
||||
If called concurrently in different threads with the same UvmCslContext
|
||||
the caller must guarantee exclusion.
|
||||
|
||||
Arguments:
|
||||
uvmCslContext[IN/OUT] - The CSL context.
|
||||
@ -1666,8 +1701,10 @@ NV_STATUS nvUvmInterfaceCslQueryMessagePool(UvmCslContext *uvmCslContext,
|
||||
can be used in nvUvmInterfaceCslEncrypt. If operation is UVM_CSL_OPERATION_DECRYPT then
|
||||
the returned IV can be used in nvUvmInterfaceCslDecrypt.
|
||||
|
||||
See "CSL Interface and Locking" for locking requirements.
|
||||
This function does not perform dynamic memory allocation.
|
||||
Locking: This function does not acquire an API or GPU lock.
|
||||
If called concurrently in different threads with the same UvmCslContext
|
||||
the caller must guarantee exclusion.
|
||||
Memory : This function does not dynamically allocate memory.
|
||||
|
||||
Arguments:
|
||||
uvmCslContext[IN/OUT] - The CSL context.
|
||||
@ -1675,7 +1712,7 @@ Arguments:
|
||||
- UVM_CSL_OPERATION_ENCRYPT
|
||||
- UVM_CSL_OPERATION_DECRYPT
|
||||
increment[IN] - The amount by which the IV is incremented. Can be 0.
|
||||
iv[out] - If non-NULL, a buffer to store the incremented IV.
|
||||
iv[OUT] - If non-NULL, a buffer to store the incremented IV.
|
||||
|
||||
Error codes:
|
||||
NV_ERR_INVALID_ARGUMENT - The value of the operation parameter is illegal.
|
||||
@ -1687,4 +1724,29 @@ NV_STATUS nvUvmInterfaceCslIncrementIv(UvmCslContext *uvmCslContext,
|
||||
NvU64 increment,
|
||||
UvmCslIv *iv);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceCslLogExternalEncryption
|
||||
|
||||
Checks and logs information about non-CSL encryptions, such as those that
|
||||
originate from the GPU.
|
||||
|
||||
This function does not modify elements of the UvmCslContext.
|
||||
|
||||
Locking: This function does not acquire an API or GPU lock.
|
||||
Memory : This function does not dynamically allocate memory.
|
||||
If called concurrently in different threads with the same UvmCslContext
|
||||
the caller must guarantee exclusion.
|
||||
|
||||
Arguments:
|
||||
uvmCslContext[IN/OUT] - The CSL context.
|
||||
bufferSize[OUT] - The size of the buffer encrypted by the
|
||||
external entity in units of bytes.
|
||||
|
||||
Error codes:
|
||||
NV_ERR_INSUFFICIENT_RESOURCES - The device encryption would cause a counter
|
||||
to overflow.
|
||||
*/
|
||||
NV_STATUS nvUvmInterfaceCslLogExternalEncryption(UvmCslContext *uvmCslContext,
|
||||
NvU32 bufferSize);
|
||||
|
||||
#endif // _NV_UVM_INTERFACE_H_
|
||||
|
@ -131,6 +131,8 @@ typedef struct UvmGpuMemoryInfo_tag
|
||||
// This is only valid if deviceDescendant is NV_TRUE.
|
||||
// When egm is NV_TRUE, this is also the UUID of the GPU
|
||||
// for which EGM is local.
|
||||
// If the GPU has SMC enabled, the UUID is the GI UUID.
|
||||
// Otherwise, it is the UUID for the physical GPU.
|
||||
// Note: If the allocation is owned by a device in
|
||||
// an SLI group and the allocation is broadcast
|
||||
// across the SLI group, this UUID will be any one
|
||||
@ -544,6 +546,10 @@ typedef struct UvmGpuP2PCapsParams_tag
|
||||
// the GPUs are direct peers.
|
||||
NvU32 peerIds[2];
|
||||
|
||||
// Out: peerId[i] contains gpu[i]'s EGM peer id of gpu[1 - i]. Only defined
|
||||
// if the GPUs are direct peers and EGM enabled in the system.
|
||||
NvU32 egmPeerIds[2];
|
||||
|
||||
// Out: UVM_LINK_TYPE
|
||||
NvU32 p2pLink;
|
||||
|
||||
@ -572,8 +578,11 @@ typedef struct UvmPlatformInfo_tag
|
||||
// Out: ATS (Address Translation Services) is supported
|
||||
NvBool atsSupported;
|
||||
|
||||
// Out: AMD SEV (Secure Encrypted Virtualization) is enabled
|
||||
NvBool sevEnabled;
|
||||
// Out: True if HW trusted execution, such as AMD's SEV-SNP or Intel's TDX,
|
||||
// is enabled in the VM, indicating that Confidential Computing must be
|
||||
// also enabled in the GPU(s); these two security features are either both
|
||||
// enabled, or both disabled.
|
||||
NvBool confComputingEnabled;
|
||||
} UvmPlatformInfo;
|
||||
|
||||
typedef struct UvmGpuClientInfo_tag
|
||||
@ -604,7 +613,8 @@ typedef struct UvmGpuInfo_tag
|
||||
// Printable gpu name
|
||||
char name[UVM_GPU_NAME_LENGTH];
|
||||
|
||||
// Uuid of this gpu
|
||||
// Uuid of the physical GPU or GI UUID if nvUvmInterfaceGetGpuInfo()
|
||||
// requested information for a valid SMC partition.
|
||||
NvProcessorUuid uuid;
|
||||
|
||||
// Gpu architecture; NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_*
|
||||
@ -688,8 +698,12 @@ typedef struct UvmGpuInfo_tag
|
||||
NvU64 nvswitchMemoryWindowStart;
|
||||
|
||||
// local EGM properties
|
||||
// NV_TRUE if EGM is enabled
|
||||
NvBool egmEnabled;
|
||||
// Peer ID to reach local EGM when EGM is enabled
|
||||
NvU8 egmPeerId;
|
||||
// EGM base address to offset in the GMMU PTE entry for EGM mappings
|
||||
NvU64 egmBaseAddr;
|
||||
} UvmGpuInfo;
|
||||
|
||||
typedef struct UvmGpuFbInfo_tag
|
||||
@ -778,14 +792,14 @@ typedef NV_STATUS (*uvmEventResume_t) (void);
|
||||
/*******************************************************************************
|
||||
uvmEventStartDevice
|
||||
This function will be called by the GPU driver once it has finished its
|
||||
initialization to tell the UVM driver that this GPU has come up.
|
||||
initialization to tell the UVM driver that this physical GPU has come up.
|
||||
*/
|
||||
typedef NV_STATUS (*uvmEventStartDevice_t) (const NvProcessorUuid *pGpuUuidStruct);
|
||||
|
||||
/*******************************************************************************
|
||||
uvmEventStopDevice
|
||||
This function will be called by the GPU driver to let UVM know that a GPU
|
||||
is going down.
|
||||
This function will be called by the GPU driver to let UVM know that a
|
||||
physical GPU is going down.
|
||||
*/
|
||||
typedef NV_STATUS (*uvmEventStopDevice_t) (const NvProcessorUuid *pGpuUuidStruct);
|
||||
|
||||
@ -816,7 +830,7 @@ typedef NV_STATUS (*uvmEventServiceInterrupt_t) (void *pDeviceObject,
|
||||
/*******************************************************************************
|
||||
uvmEventIsrTopHalf_t
|
||||
This function will be called by the GPU driver to let UVM know
|
||||
that an interrupt has occurred.
|
||||
that an interrupt has occurred on the given physical GPU.
|
||||
|
||||
Returns:
|
||||
NV_OK if the UVM driver handled the interrupt
|
||||
@ -923,11 +937,6 @@ typedef struct UvmGpuFaultInfo_tag
|
||||
// CSL context used for performing decryption of replayable faults when
|
||||
// Confidential Computing is enabled.
|
||||
UvmCslContext cslCtx;
|
||||
|
||||
// Indicates whether UVM owns the replayable fault buffer.
|
||||
// The value of this field is always NV_TRUE When Confidential Computing
|
||||
// is disabled.
|
||||
NvBool bUvmOwnsHwFaultBuffer;
|
||||
} replayable;
|
||||
struct
|
||||
{
|
||||
|
@ -58,6 +58,7 @@ typedef NvU32 NvKmsFrameLockHandle;
|
||||
typedef NvU32 NvKmsDeferredRequestFifoHandle;
|
||||
typedef NvU32 NvKmsSwapGroupHandle;
|
||||
typedef NvU32 NvKmsVblankSyncObjectHandle;
|
||||
typedef NvU32 NvKmsVblankSemControlHandle;
|
||||
|
||||
struct NvKmsSize {
|
||||
NvU16 width;
|
||||
|
@ -490,6 +490,8 @@ typedef enum NvKmsKapiRegisterWaiterResultRec {
|
||||
NVKMS_KAPI_REG_WAITER_ALREADY_SIGNALLED,
|
||||
} NvKmsKapiRegisterWaiterResult;
|
||||
|
||||
typedef void NvKmsKapiSuspendResumeCallbackFunc(NvBool suspend);
|
||||
|
||||
struct NvKmsKapiFunctionsTable {
|
||||
|
||||
/*!
|
||||
@ -1399,6 +1401,15 @@ struct NvKmsKapiFunctionsTable {
|
||||
NvU64 index,
|
||||
NvU64 new_value
|
||||
);
|
||||
|
||||
/*!
|
||||
* Set the callback function for suspending and resuming the display system.
|
||||
*/
|
||||
void
|
||||
(*setSuspendResumeCallback)
|
||||
(
|
||||
NvKmsKapiSuspendResumeCallbackFunc *function
|
||||
);
|
||||
};
|
||||
|
||||
/** @} */
|
||||
|
@ -919,6 +919,9 @@ static NV_FORCEINLINE void *NV_NVUPTR_TO_PTR(NvUPtr address)
|
||||
//
|
||||
#define NV_BIT_SET_128(b, lo, hi) { nvAssert( (b) < 128 ); if ( (b) < 64 ) (lo) |= NVBIT64(b); else (hi) |= NVBIT64( b & 0x3F ); }
|
||||
|
||||
// Get the number of elements the specified fixed-size array
|
||||
#define NV_ARRAY_ELEMENTS(x) ((sizeof(x)/sizeof((x)[0])))
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif //__cplusplus
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2014-2020 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2014-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@ -150,6 +150,7 @@ NV_STATUS_CODE(NV_ERR_NVLINK_CONFIGURATION_ERROR, 0x00000078, "Nvlink Confi
|
||||
NV_STATUS_CODE(NV_ERR_RISCV_ERROR, 0x00000079, "Generic RISC-V assert or halt")
|
||||
NV_STATUS_CODE(NV_ERR_FABRIC_MANAGER_NOT_PRESENT, 0x0000007A, "Fabric Manager is not loaded")
|
||||
NV_STATUS_CODE(NV_ERR_ALREADY_SIGNALLED, 0x0000007B, "Semaphore Surface value already >= requested wait value")
|
||||
NV_STATUS_CODE(NV_ERR_QUEUE_TASK_SLOT_NOT_AVAILABLE, 0x0000007C, "PMU RPC error due to no queue slot available for this event")
|
||||
|
||||
// Warnings:
|
||||
NV_STATUS_CODE(NV_WARN_HOT_SWITCH, 0x00010001, "WARNING Hot switch")
|
||||
|
@ -145,7 +145,12 @@ typedef signed short NvS16; /* -32768 to 32767 */
|
||||
#endif
|
||||
|
||||
// Macro to build an NvU32 from four bytes, listed from msb to lsb
|
||||
#define NvU32_BUILD(a, b, c, d) (((a) << 24) | ((b) << 16) | ((c) << 8) | (d))
|
||||
#define NvU32_BUILD(a, b, c, d) \
|
||||
((NvU32)( \
|
||||
(((NvU32)(a) & 0xff) << 24) | \
|
||||
(((NvU32)(b) & 0xff) << 16) | \
|
||||
(((NvU32)(c) & 0xff) << 8) | \
|
||||
(((NvU32)(d) & 0xff))))
|
||||
|
||||
#if NVTYPES_USE_STDINT
|
||||
typedef uint32_t NvV32; /* "void": enumerated or multiple fields */
|
||||
|
@ -67,7 +67,6 @@ typedef struct os_wait_queue os_wait_queue;
|
||||
* ---------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
NvU64 NV_API_CALL os_get_num_phys_pages (void);
|
||||
NV_STATUS NV_API_CALL os_alloc_mem (void **, NvU64);
|
||||
void NV_API_CALL os_free_mem (void *);
|
||||
NV_STATUS NV_API_CALL os_get_current_time (NvU32 *, NvU32 *);
|
||||
@ -105,7 +104,6 @@ void* NV_API_CALL os_map_kernel_space (NvU64, NvU64, NvU32);
|
||||
void NV_API_CALL os_unmap_kernel_space (void *, NvU64);
|
||||
void* NV_API_CALL os_map_user_space (NvU64, NvU64, NvU32, NvU32, void **);
|
||||
void NV_API_CALL os_unmap_user_space (void *, NvU64, void *);
|
||||
NV_STATUS NV_API_CALL os_flush_cpu_cache (void);
|
||||
NV_STATUS NV_API_CALL os_flush_cpu_cache_all (void);
|
||||
NV_STATUS NV_API_CALL os_flush_user_cache (void);
|
||||
void NV_API_CALL os_flush_cpu_write_combine_buffer(void);
|
||||
@ -230,14 +228,12 @@ extern NvBool os_dma_buf_enabled;
|
||||
* ---------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#define NV_DBG_INFO 0x1
|
||||
#define NV_DBG_SETUP 0x2
|
||||
#define NV_DBG_INFO 0x0
|
||||
#define NV_DBG_SETUP 0x1
|
||||
#define NV_DBG_USERERRORS 0x2
|
||||
#define NV_DBG_WARNINGS 0x3
|
||||
#define NV_DBG_ERRORS 0x4
|
||||
#define NV_DBG_HW_ERRORS 0x5
|
||||
#define NV_DBG_FATAL 0x6
|
||||
|
||||
#define NV_DBG_FORCE_LEVEL(level) ((level) | (1 << 8))
|
||||
|
||||
void NV_API_CALL out_string(const char *str);
|
||||
int NV_API_CALL nv_printf(NvU32 debuglevel, const char *printf_format, ...);
|
||||
|
@ -37,7 +37,7 @@ NV_STATUS NV_API_CALL rm_gpu_ops_create_session (nvidia_stack_t *, nvgpuSessio
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_destroy_session (nvidia_stack_t *, nvgpuSessionHandle_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_device_create (nvidia_stack_t *, nvgpuSessionHandle_t, const nvgpuInfo_t *, const NvProcessorUuid *, nvgpuDeviceHandle_t *, NvBool);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_device_destroy (nvidia_stack_t *, nvgpuDeviceHandle_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_address_space_create(nvidia_stack_t *, nvgpuDeviceHandle_t, unsigned long long, unsigned long long, nvgpuAddressSpaceHandle_t *, nvgpuAddressSpaceInfo_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_address_space_create(nvidia_stack_t *, nvgpuDeviceHandle_t, unsigned long long, unsigned long long, NvBool, nvgpuAddressSpaceHandle_t *, nvgpuAddressSpaceInfo_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_dup_address_space(nvidia_stack_t *, nvgpuDeviceHandle_t, NvHandle, NvHandle, nvgpuAddressSpaceHandle_t *, nvgpuAddressSpaceInfo_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_address_space_destroy(nvidia_stack_t *, nvgpuAddressSpaceHandle_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_memory_alloc_fb(nvidia_stack_t *, nvgpuAddressSpaceHandle_t, NvLength, NvU64 *, nvgpuAllocInfo_t);
|
||||
@ -45,7 +45,6 @@ NV_STATUS NV_API_CALL rm_gpu_ops_memory_alloc_fb(nvidia_stack_t *, nvgpuAddres
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_pma_alloc_pages(nvidia_stack_t *, void *, NvLength, NvU32 , nvgpuPmaAllocationOptions_t, NvU64 *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_pma_free_pages(nvidia_stack_t *, void *, NvU64 *, NvLength , NvU32, NvU32);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_pma_pin_pages(nvidia_stack_t *, void *, NvU64 *, NvLength , NvU32, NvU32);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_pma_unpin_pages(nvidia_stack_t *, void *, NvU64 *, NvLength , NvU32);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_get_pma_object(nvidia_stack_t *, nvgpuDeviceHandle_t, void **, const nvgpuPmaStatistics_t *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_pma_register_callbacks(nvidia_stack_t *sp, void *, nvPmaEvictPagesCallback, nvPmaEvictRangeCallback, void *);
|
||||
void NV_API_CALL rm_gpu_ops_pma_unregister_callbacks(nvidia_stack_t *sp, void *);
|
||||
@ -77,6 +76,7 @@ NV_STATUS NV_API_CALL rm_gpu_ops_init_fault_info(nvidia_stack_t *, nvgpuDeviceH
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_destroy_fault_info(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuFaultInfo_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_get_non_replayable_faults(nvidia_stack_t *, nvgpuFaultInfo_t, void *, NvU32 *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_flush_replayable_fault_buffer(nvidia_stack_t *, nvgpuDeviceHandle_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_toggle_prefetch_faults(nvidia_stack_t *, nvgpuFaultInfo_t, NvBool);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_has_pending_non_replayable_faults(nvidia_stack_t *, nvgpuFaultInfo_t, NvBool *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_init_access_cntr_info(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessCntrInfo_t, NvU32);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_destroy_access_cntr_info(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessCntrInfo_t);
|
||||
@ -103,6 +103,7 @@ NV_STATUS NV_API_CALL rm_gpu_ops_paging_channel_push_stream(nvidia_stack_t *, n
|
||||
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_context_init(nvidia_stack_t *, struct ccslContext_t **, nvgpuChannelHandle_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_context_clear(nvidia_stack_t *, struct ccslContext_t *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_context_update(nvidia_stack_t *, struct ccslContext_t *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_rotate_iv(nvidia_stack_t *, struct ccslContext_t *, NvU8);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_encrypt(nvidia_stack_t *, struct ccslContext_t *, NvU32, NvU8 const *, NvU8 *, NvU8 *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_encrypt_with_iv(nvidia_stack_t *, struct ccslContext_t *, NvU32, NvU8 const *, NvU8*, NvU8 *, NvU8 *);
|
||||
@ -110,5 +111,6 @@ NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_decrypt(nvidia_stack_t *, struct ccslCont
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_sign(nvidia_stack_t *, struct ccslContext_t *, NvU32, NvU8 const *, NvU8 *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_query_message_pool(nvidia_stack_t *, struct ccslContext_t *, NvU8, NvU64 *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_increment_iv(nvidia_stack_t *, struct ccslContext_t *, NvU8, NvU64, NvU8 *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_log_device_encryption(nvidia_stack_t *, struct ccslContext_t *, NvU32);
|
||||
|
||||
#endif
|
||||
|
@ -14,6 +14,13 @@ OUTPUT=$4
|
||||
XEN_PRESENT=1
|
||||
PREEMPT_RT_PRESENT=0
|
||||
|
||||
# We also use conftest.sh on FreeBSD to check for which symbols are provided
|
||||
# by the linux kernel programming interface (linuxkpi) when compiling nvidia-drm.ko
|
||||
OS_FREEBSD=0
|
||||
if [ "$OS" = "FreeBSD" ] ; then
|
||||
OS_FREEBSD=1
|
||||
fi
|
||||
|
||||
# VGX_BUILD parameter defined only for VGX builds (vGPU Host driver)
|
||||
# VGX_KVM_BUILD parameter defined only vGPU builds on KVM hypervisor
|
||||
# GRID_BUILD parameter defined only for GRID builds (GRID Guest driver)
|
||||
@ -205,11 +212,6 @@ CONFTEST_PREAMBLE="#include \"conftest/headers.h\"
|
||||
#if defined(NV_LINUX_KCONFIG_H_PRESENT)
|
||||
#include <linux/kconfig.h>
|
||||
#endif
|
||||
#if defined(NV_GENERATED_AUTOCONF_H_PRESENT)
|
||||
#include <generated/autoconf.h>
|
||||
#else
|
||||
#include <linux/autoconf.h>
|
||||
#endif
|
||||
#if defined(CONFIG_XEN) && \
|
||||
defined(CONFIG_XEN_INTERFACE_VERSION) && !defined(__XEN_INTERFACE_VERSION__)
|
||||
#define __XEN_INTERFACE_VERSION__ CONFIG_XEN_INTERFACE_VERSION
|
||||
@ -222,6 +224,17 @@ CONFTEST_PREAMBLE="#include \"conftest/headers.h\"
|
||||
#endif
|
||||
#endif"
|
||||
|
||||
# FreeBSD's Linux compatibility does not have autoconf.h defined
|
||||
# anywhere yet, only add this part on Linux
|
||||
if [ ${OS_FREEBSD} -ne 1 ] ; then
|
||||
CONFTEST_PREAMBLE="${CONFTEST_PREAMBLE}
|
||||
#if defined(NV_GENERATED_AUTOCONF_H_PRESENT)
|
||||
#include <generated/autoconf.h>
|
||||
#else
|
||||
#include <linux/autoconf.h>
|
||||
#endif"
|
||||
fi
|
||||
|
||||
test_configuration_option() {
|
||||
#
|
||||
# Check to see if the given configuration option is defined
|
||||
@ -308,16 +321,57 @@ compile_check_conftest() {
|
||||
fi
|
||||
}
|
||||
|
||||
export_symbol_present_conftest() {
|
||||
#
|
||||
# Check Module.symvers to see whether the given symbol is present.
|
||||
#
|
||||
check_symbol_exists() {
|
||||
# Check that the given symbol is available
|
||||
|
||||
SYMBOL="$1"
|
||||
TAB=' '
|
||||
|
||||
if grep -e "${TAB}${SYMBOL}${TAB}.*${TAB}EXPORT_SYMBOL\(_GPL\)\?\s*\$" \
|
||||
"$OUTPUT/Module.symvers" >/dev/null 2>&1; then
|
||||
if [ ${OS_FREEBSD} -ne 1 ] ; then
|
||||
# Linux:
|
||||
# ------
|
||||
#
|
||||
# Check Module.symvers to see whether the given symbol is present.
|
||||
#
|
||||
if grep -e "${TAB}${SYMBOL}${TAB}.*${TAB}EXPORT_SYMBOL.*\$" \
|
||||
"$OUTPUT/Module.symvers" >/dev/null 2>&1; then
|
||||
return 0
|
||||
fi
|
||||
else
|
||||
# FreeBSD:
|
||||
# ------
|
||||
#
|
||||
# Check if any of the linuxkpi or drm kernel module files contain
|
||||
# references to this symbol.
|
||||
|
||||
# Get the /boot/kernel/ and /boot/modules paths, convert the list to a
|
||||
# space separated list instead of semicolon separated so we can iterate
|
||||
# over it.
|
||||
if [ -z "${CONFTEST_BSD_KMODPATHS}" ] ; then
|
||||
KMODPATHS=`sysctl -n kern.module_path | sed -e "s/;/ /g"`
|
||||
else
|
||||
KMODPATHS="${CONFTEST_BSD_KMODPATHS}"
|
||||
fi
|
||||
|
||||
for KMOD in linuxkpi.ko linuxkpi_gplv2.ko drm.ko dmabuf.ko ; do
|
||||
for KMODPATH in $KMODPATHS; do
|
||||
if [ -e "$KMODPATH/$KMOD" ] ; then
|
||||
if nm "$KMODPATH/$KMOD" | grep "$SYMBOL" >/dev/null 2>&1 ; then
|
||||
return 0
|
||||
fi
|
||||
fi
|
||||
done
|
||||
done
|
||||
fi
|
||||
|
||||
return 1
|
||||
}
|
||||
|
||||
export_symbol_present_conftest() {
|
||||
|
||||
SYMBOL="$1"
|
||||
|
||||
if check_symbol_exists $SYMBOL; then
|
||||
echo "#define NV_IS_EXPORT_SYMBOL_PRESENT_$SYMBOL 1" |
|
||||
append_conftest "symbols"
|
||||
else
|
||||
@ -1206,6 +1260,36 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_VFIO_DEVICE_OPS_HAS_BIND_IOMMUFD" "" "types"
|
||||
;;
|
||||
|
||||
vfio_device_ops_has_detach_ioas)
|
||||
#
|
||||
# Determine if 'vfio_device_ops' struct has 'detach_ioas' field.
|
||||
#
|
||||
# Added by commit 9048c7341c4df9cae04c154a8b0f556dbe913358 ("vfio-iommufd: Add detach_ioas
|
||||
# support for physical VFIO devices
|
||||
#
|
||||
CODE="
|
||||
#include <linux/pci.h>
|
||||
#include <linux/vfio.h>
|
||||
int conftest_vfio_device_ops_has_detach_ioas(void) {
|
||||
return offsetof(struct vfio_device_ops, detach_ioas);
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_VFIO_DEVICE_OPS_HAS_DETACH_IOAS" "" "types"
|
||||
;;
|
||||
|
||||
pfn_address_space)
|
||||
#
|
||||
# Determine if 'struct pfn_address_space' structure is present or not.
|
||||
#
|
||||
CODE="
|
||||
#include <linux/memory-failure.h>
|
||||
void conftest_pfn_address_space() {
|
||||
struct pfn_address_space pfn_address_space;
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_PFN_ADDRESS_SPACE_STRUCT_PRESENT" "" "types"
|
||||
;;
|
||||
|
||||
pci_irq_vector_helpers)
|
||||
#
|
||||
# Determine if pci_alloc_irq_vectors(), pci_free_irq_vectors()
|
||||
@ -1343,7 +1427,7 @@ compile_test() {
|
||||
#include <drm/drm_drv.h>
|
||||
#endif
|
||||
|
||||
#if !defined(CONFIG_DRM) && !defined(CONFIG_DRM_MODULE)
|
||||
#if !defined(CONFIG_DRM) && !defined(CONFIG_DRM_MODULE) && !defined(__FreeBSD__)
|
||||
#error DRM not enabled
|
||||
#endif
|
||||
|
||||
@ -1807,7 +1891,7 @@ compile_test() {
|
||||
#include <drm/drmP.h>
|
||||
#endif
|
||||
#include <drm/drm_atomic.h>
|
||||
#if !defined(CONFIG_DRM) && !defined(CONFIG_DRM_MODULE)
|
||||
#if !defined(CONFIG_DRM) && !defined(CONFIG_DRM_MODULE) && !defined(__FreeBSD__)
|
||||
#error DRM not enabled
|
||||
#endif
|
||||
void conftest_drm_atomic_modeset_available(void) {
|
||||
@ -5203,10 +5287,16 @@ compile_test() {
|
||||
# Added by commit 7b7b27214bba ("mm/memory_hotplug: introduce
|
||||
# add_memory_driver_managed()") in v5.8.
|
||||
#
|
||||
# Before commit 3a0aaefe4134 ("mm/memory_hotplug: guard more
|
||||
# declarations by CONFIG_MEMORY_HOTPLUG") in v5.10, the
|
||||
# add_memory_driver_managed() was not guarded.
|
||||
#
|
||||
CODE="
|
||||
#include <linux/memory_hotplug.h>
|
||||
void conftest_add_memory_driver_managed() {
|
||||
#if defined(CONFIG_MEMORY_HOTPLUG)
|
||||
add_memory_driver_managed();
|
||||
#endif
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_ADD_MEMORY_DRIVER_MANAGED_PRESENT" "" "functions"
|
||||
@ -5669,22 +5759,6 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_GPIO_TO_IRQ_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
migrate_vma_setup)
|
||||
#
|
||||
# Determine if migrate_vma_setup() function is present
|
||||
#
|
||||
# Added by commit a7d1f22bb74f ("mm: turn migrate_vma upside
|
||||
# down") in v5.4.
|
||||
#
|
||||
CODE="
|
||||
#include <linux/migrate.h>
|
||||
int conftest_migrate_vma_setup(void) {
|
||||
migrate_vma_setup();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_MIGRATE_VMA_SETUP_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
migrate_vma_added_flags)
|
||||
#
|
||||
# Determine if migrate_vma structure has flags
|
||||
@ -5795,6 +5869,24 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_MM_PASID_DROP_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
iommu_is_dma_domain)
|
||||
#
|
||||
# Determine if iommu_is_dma_domain() function is present
|
||||
# this also assumes that iommu_get_domain_for_dev() function is
|
||||
# present.
|
||||
#
|
||||
# Added by commit bf3aed4660c6 ("iommu: Introduce explicit type
|
||||
# for non-strict DMA domains") in v5.15
|
||||
#
|
||||
CODE="
|
||||
#include <linux/iommu.h>
|
||||
void conftest_iommu_is_dma_domain(void) {
|
||||
iommu_is_dma_domain();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_IOMMU_IS_DMA_DOMAIN_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
drm_crtc_state_has_no_vblank)
|
||||
#
|
||||
# Determine if the 'drm_crtc_state' structure has 'no_vblank'.
|
||||
@ -6483,6 +6575,21 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_FIND_NEXT_BIT_WRAP_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
crypto_tfm_ctx_aligned)
|
||||
# Determine if 'crypto_tfm_ctx_aligned' is defined.
|
||||
#
|
||||
# Removed by commit 25c74a39e0f6 ("crypto: hmac - remove unnecessary
|
||||
# alignment logic") in v6.7.
|
||||
#
|
||||
CODE="
|
||||
#include <crypto/algapi.h>
|
||||
void conftest_crypto_tfm_ctx_aligned(void) {
|
||||
(void)crypto_tfm_ctx_aligned();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_CRYPTO_TFM_CTX_ALIGNED_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
crypto)
|
||||
#
|
||||
# Determine if we support various crypto functions.
|
||||
@ -6604,9 +6711,9 @@ compile_test() {
|
||||
# 'supported_colorspaces' argument.
|
||||
#
|
||||
# The 'u32 supported_colorspaces' argument was added to
|
||||
# drm_mode_create_dp_colorspace_property() by linux-next commit
|
||||
# drm_mode_create_dp_colorspace_property() by commit
|
||||
# c265f340eaa8 ("drm/connector: Allow drivers to pass list of
|
||||
# supported colorspaces").
|
||||
# supported colorspaces") in v6.5.
|
||||
#
|
||||
# To test if drm_mode_create_dp_colorspace_property() has the
|
||||
# 'supported_colorspaces' argument, declare a function prototype
|
||||
@ -6634,6 +6741,27 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_DRM_MODE_CREATE_DP_COLORSPACE_PROPERTY_HAS_SUPPORTED_COLORSPACES_ARG" "" "types"
|
||||
;;
|
||||
|
||||
drm_unlocked_ioctl_flag_present)
|
||||
# Determine if DRM_UNLOCKED IOCTL flag is present.
|
||||
#
|
||||
# DRM_UNLOCKED was removed by commit 2798ffcc1d6a ("drm: Remove
|
||||
# locking for legacy ioctls and DRM_UNLOCKED") in Linux
|
||||
# next-20231208.
|
||||
#
|
||||
# DRM_UNLOCKED definition was moved from drmP.h to drm_ioctl.h by
|
||||
# commit 2640981f3600 ("drm: document drm_ioctl.[hc]") in v4.12.
|
||||
CODE="
|
||||
#if defined(NV_DRM_DRM_IOCTL_H_PRESENT)
|
||||
#include <drm/drm_ioctl.h>
|
||||
#endif
|
||||
#if defined(NV_DRM_DRMP_H_PRESENT)
|
||||
#include <drm/drmP.h>
|
||||
#endif
|
||||
int flags = DRM_UNLOCKED;"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_DRM_UNLOCKED_IOCTL_FLAG_PRESENT" "" "types"
|
||||
;;
|
||||
|
||||
# When adding a new conftest entry, please use the correct format for
|
||||
# specifying the relevant upstream Linux kernel commit. Please
|
||||
# avoid specifying -rc kernels, and only use SHAs that actually exist
|
||||
@ -6935,10 +7063,12 @@ case "$5" in
|
||||
#
|
||||
VERBOSE=$6
|
||||
iommu=CONFIG_VFIO_IOMMU_TYPE1
|
||||
iommufd_vfio_container=CONFIG_IOMMUFD_VFIO_CONTAINER
|
||||
mdev=CONFIG_VFIO_MDEV
|
||||
kvm=CONFIG_KVM_VFIO
|
||||
vfio_pci_core=CONFIG_VFIO_PCI_CORE
|
||||
VFIO_IOMMU_PRESENT=0
|
||||
VFIO_IOMMUFD_VFIO_CONTAINER_PRESENT=0
|
||||
VFIO_MDEV_PRESENT=0
|
||||
KVM_PRESENT=0
|
||||
VFIO_PCI_CORE_PRESENT=0
|
||||
@ -6948,6 +7078,10 @@ case "$5" in
|
||||
VFIO_IOMMU_PRESENT=1
|
||||
fi
|
||||
|
||||
if (test_configuration_option ${iommufd_vfio_container} || test_configuration_option ${iommufd_vfio_container}_MODULE); then
|
||||
VFIO_IOMMUFD_VFIO_CONTAINER_PRESENT=1
|
||||
fi
|
||||
|
||||
if (test_configuration_option ${mdev} || test_configuration_option ${mdev}_MODULE); then
|
||||
VFIO_MDEV_PRESENT=1
|
||||
fi
|
||||
@ -6960,36 +7094,23 @@ case "$5" in
|
||||
VFIO_PCI_CORE_PRESENT=1
|
||||
fi
|
||||
|
||||
# When this sanity check is run via nvidia-installer, it sets ARCH as aarch64.
|
||||
# But, when it is run via Kbuild, ARCH is set as arm64
|
||||
if [ "$ARCH" = "aarch64" ]; then
|
||||
ARCH="arm64"
|
||||
fi
|
||||
|
||||
if [ "$VFIO_IOMMU_PRESENT" != "0" ] && [ "$KVM_PRESENT" != "0" ] ; then
|
||||
|
||||
# On x86_64, vGPU requires MDEV framework to be present.
|
||||
# On aarch64, vGPU requires MDEV or vfio-pci-core framework to be present.
|
||||
if ([ "$ARCH" = "arm64" ] && ([ "$VFIO_MDEV_PRESENT" != "0" ] || [ "$VFIO_PCI_CORE_PRESENT" != "0" ])) ||
|
||||
([ "$ARCH" = "x86_64" ] && [ "$VFIO_MDEV_PRESENT" != "0" ];) then
|
||||
if ([ "$VFIO_IOMMU_PRESENT" != "0" ] || [ "$VFIO_IOMMUFD_VFIO_CONTAINER_PRESENT" != "0" ])&& [ "$KVM_PRESENT" != "0" ] ; then
|
||||
# vGPU requires either MDEV or vfio-pci-core framework to be present.
|
||||
if [ "$VFIO_MDEV_PRESENT" != "0" ] || [ "$VFIO_PCI_CORE_PRESENT" != "0" ]; then
|
||||
exit 0
|
||||
fi
|
||||
fi
|
||||
|
||||
echo "Below CONFIG options are missing on the kernel for installing";
|
||||
echo "NVIDIA vGPU driver on KVM host";
|
||||
if [ "$VFIO_IOMMU_PRESENT" = "0" ]; then
|
||||
echo "CONFIG_VFIO_IOMMU_TYPE1";
|
||||
if [ "$VFIO_IOMMU_PRESENT" = "0" ] && [ "$VFIO_IOMMUFD_VFIO_CONTAINER_PRESENT" = "0" ]; then
|
||||
echo "either CONFIG_VFIO_IOMMU_TYPE1 or CONFIG_IOMMUFD_VFIO_CONTAINER";
|
||||
fi
|
||||
|
||||
if [ "$ARCH" = "arm64" ] && [ "$VFIO_MDEV_PRESENT" = "0" ] && [ "$VFIO_PCI_CORE_PRESENT" = "0" ]; then
|
||||
if [ "$VFIO_MDEV_PRESENT" = "0" ] && [ "$VFIO_PCI_CORE_PRESENT" = "0" ]; then
|
||||
echo "either CONFIG_VFIO_MDEV or CONFIG_VFIO_PCI_CORE";
|
||||
fi
|
||||
|
||||
if [ "$ARCH" = "x86_64" ] && [ "$VFIO_MDEV_PRESENT" = "0" ]; then
|
||||
echo "CONFIG_VFIO_MDEV";
|
||||
fi
|
||||
|
||||
if [ "$KVM_PRESENT" = "0" ]; then
|
||||
echo "CONFIG_KVM";
|
||||
fi
|
||||
|
100
kernel-open/header-presence-tests.mk
Normal file
100
kernel-open/header-presence-tests.mk
Normal file
@ -0,0 +1,100 @@
|
||||
# Each of these headers is checked for presence with a test #include; a
|
||||
# corresponding #define will be generated in conftest/headers.h.
|
||||
NV_HEADER_PRESENCE_TESTS = \
|
||||
asm/system.h \
|
||||
drm/drmP.h \
|
||||
drm/drm_aperture.h \
|
||||
drm/drm_auth.h \
|
||||
drm/drm_gem.h \
|
||||
drm/drm_crtc.h \
|
||||
drm/drm_color_mgmt.h \
|
||||
drm/drm_atomic.h \
|
||||
drm/drm_atomic_helper.h \
|
||||
drm/drm_atomic_state_helper.h \
|
||||
drm/drm_encoder.h \
|
||||
drm/drm_atomic_uapi.h \
|
||||
drm/drm_drv.h \
|
||||
drm/drm_fbdev_generic.h \
|
||||
drm/drm_framebuffer.h \
|
||||
drm/drm_connector.h \
|
||||
drm/drm_probe_helper.h \
|
||||
drm/drm_blend.h \
|
||||
drm/drm_fourcc.h \
|
||||
drm/drm_prime.h \
|
||||
drm/drm_plane.h \
|
||||
drm/drm_vblank.h \
|
||||
drm/drm_file.h \
|
||||
drm/drm_ioctl.h \
|
||||
drm/drm_device.h \
|
||||
drm/drm_mode_config.h \
|
||||
drm/drm_modeset_lock.h \
|
||||
dt-bindings/interconnect/tegra_icc_id.h \
|
||||
generated/autoconf.h \
|
||||
generated/compile.h \
|
||||
generated/utsrelease.h \
|
||||
linux/efi.h \
|
||||
linux/kconfig.h \
|
||||
linux/platform/tegra/mc_utils.h \
|
||||
linux/printk.h \
|
||||
linux/ratelimit.h \
|
||||
linux/prio_tree.h \
|
||||
linux/log2.h \
|
||||
linux/of.h \
|
||||
linux/bug.h \
|
||||
linux/sched.h \
|
||||
linux/sched/mm.h \
|
||||
linux/sched/signal.h \
|
||||
linux/sched/task.h \
|
||||
linux/sched/task_stack.h \
|
||||
xen/ioemu.h \
|
||||
linux/fence.h \
|
||||
linux/dma-fence.h \
|
||||
linux/dma-resv.h \
|
||||
soc/tegra/chip-id.h \
|
||||
soc/tegra/fuse.h \
|
||||
soc/tegra/tegra_bpmp.h \
|
||||
video/nv_internal.h \
|
||||
linux/platform/tegra/dce/dce-client-ipc.h \
|
||||
linux/nvhost.h \
|
||||
linux/nvhost_t194.h \
|
||||
linux/host1x-next.h \
|
||||
asm/book3s/64/hash-64k.h \
|
||||
asm/set_memory.h \
|
||||
asm/prom.h \
|
||||
asm/powernv.h \
|
||||
linux/atomic.h \
|
||||
asm/barrier.h \
|
||||
asm/opal-api.h \
|
||||
sound/hdaudio.h \
|
||||
asm/pgtable_types.h \
|
||||
asm/page.h \
|
||||
linux/stringhash.h \
|
||||
linux/dma-map-ops.h \
|
||||
rdma/peer_mem.h \
|
||||
sound/hda_codec.h \
|
||||
linux/dma-buf.h \
|
||||
linux/time.h \
|
||||
linux/platform_device.h \
|
||||
linux/mutex.h \
|
||||
linux/reset.h \
|
||||
linux/of_platform.h \
|
||||
linux/of_device.h \
|
||||
linux/of_gpio.h \
|
||||
linux/gpio.h \
|
||||
linux/gpio/consumer.h \
|
||||
linux/interconnect.h \
|
||||
linux/pm_runtime.h \
|
||||
linux/clk.h \
|
||||
linux/clk-provider.h \
|
||||
linux/ioasid.h \
|
||||
linux/stdarg.h \
|
||||
linux/iosys-map.h \
|
||||
asm/coco.h \
|
||||
linux/vfio_pci_core.h \
|
||||
linux/mdev.h \
|
||||
soc/tegra/bpmp-abi.h \
|
||||
soc/tegra/bpmp.h \
|
||||
linux/sync_file.h \
|
||||
linux/cc_platform.h \
|
||||
asm/cpufeature.h
|
||||
|
@ -25,6 +25,15 @@
|
||||
#include <linux/module.h>
|
||||
|
||||
#include "nv-pci-table.h"
|
||||
#include "cpuopsys.h"
|
||||
|
||||
#if defined(NV_BSD)
|
||||
/* Define PCI classes that FreeBSD's linuxkpi is missing */
|
||||
#define PCI_VENDOR_ID_NVIDIA 0x10de
|
||||
#define PCI_CLASS_DISPLAY_VGA 0x0300
|
||||
#define PCI_CLASS_DISPLAY_3D 0x0302
|
||||
#define PCI_CLASS_BRIDGE_OTHER 0x0680
|
||||
#endif
|
||||
|
||||
/* Devices supported by RM */
|
||||
struct pci_device_id nv_pci_table[] = {
|
||||
@ -48,7 +57,7 @@ struct pci_device_id nv_pci_table[] = {
|
||||
};
|
||||
|
||||
/* Devices supported by all drivers in nvidia.ko */
|
||||
struct pci_device_id nv_module_device_table[] = {
|
||||
struct pci_device_id nv_module_device_table[4] = {
|
||||
{
|
||||
.vendor = PCI_VENDOR_ID_NVIDIA,
|
||||
.device = PCI_ANY_ID,
|
||||
@ -76,4 +85,6 @@ struct pci_device_id nv_module_device_table[] = {
|
||||
{ }
|
||||
};
|
||||
|
||||
#if defined(NV_LINUX)
|
||||
MODULE_DEVICE_TABLE(pci, nv_module_device_table);
|
||||
#endif
|
||||
|
@ -27,5 +27,6 @@
|
||||
#include <linux/pci.h>
|
||||
|
||||
extern struct pci_device_id nv_pci_table[];
|
||||
extern struct pci_device_id nv_module_device_table[4];
|
||||
|
||||
#endif /* _NV_PCI_TABLE_H_ */
|
||||
|
@ -24,6 +24,7 @@
|
||||
#define __NVIDIA_DRM_CONFTEST_H__
|
||||
|
||||
#include "conftest.h"
|
||||
#include "nvtypes.h"
|
||||
|
||||
/*
|
||||
* NOTE: This file is expected to get included at the top before including any
|
||||
@ -72,4 +73,121 @@
|
||||
#undef NV_DRM_COLOR_MGMT_AVAILABLE
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Adapt to quirks in FreeBSD's Linux kernel compatibility layer.
|
||||
*/
|
||||
#if defined(NV_BSD)
|
||||
|
||||
#include <linux/rwsem.h>
|
||||
#include <sys/param.h>
|
||||
#include <sys/lock.h>
|
||||
#include <sys/sx.h>
|
||||
|
||||
/* For nv_drm_gem_prime_force_fence_signal */
|
||||
#ifndef spin_is_locked
|
||||
#define spin_is_locked(lock) mtx_owned(lock.m)
|
||||
#endif
|
||||
|
||||
#ifndef rwsem_is_locked
|
||||
#define rwsem_is_locked(sem) (((sem)->sx.sx_lock & (SX_LOCK_SHARED)) \
|
||||
|| ((sem)->sx.sx_lock & ~(SX_LOCK_FLAGMASK & ~SX_LOCK_SHARED)))
|
||||
#endif
|
||||
|
||||
/*
|
||||
* FreeBSD does not define vm_flags_t in its linuxkpi, since there is already
|
||||
* a FreeBSD vm_flags_t (of a different size) and they don't want the names to
|
||||
* collide. Temporarily redefine it when including nv-mm.h
|
||||
*/
|
||||
#define vm_flags_t unsigned long
|
||||
#include "nv-mm.h"
|
||||
#undef vm_flags_t
|
||||
|
||||
/*
|
||||
* sys/nv.h and nvidia/nv.h have the same header guard
|
||||
* we need to clear it for nvlist_t to get loaded
|
||||
*/
|
||||
#undef _NV_H_
|
||||
#include <sys/nv.h>
|
||||
|
||||
/*
|
||||
* For now just use set_page_dirty as the lock variant
|
||||
* is not ported for FreeBSD. (in progress). This calls
|
||||
* vm_page_dirty. Used in nv-mm.h
|
||||
*/
|
||||
#define set_page_dirty_lock set_page_dirty
|
||||
|
||||
/*
|
||||
* FreeBSD does not implement drm_atomic_state_free, simply
|
||||
* default to drm_atomic_state_put
|
||||
*/
|
||||
#define drm_atomic_state_free drm_atomic_state_put
|
||||
|
||||
#if __FreeBSD_version < 1300000
|
||||
/* redefine LIST_HEAD_INIT to the linux version */
|
||||
#include <linux/list.h>
|
||||
#define LIST_HEAD_INIT(name) LINUX_LIST_HEAD_INIT(name)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* FreeBSD currently has only vmf_insert_pfn_prot defined, and it has a
|
||||
* static assert warning not to use it since all of DRM's usages are in
|
||||
* loops with the vm obj lock(s) held. Instead we should use the lkpi
|
||||
* function itself directly. For us none of this applies so we can just
|
||||
* wrap it in our own definition of vmf_insert_pfn
|
||||
*/
|
||||
#ifndef NV_VMF_INSERT_PFN_PRESENT
|
||||
#define NV_VMF_INSERT_PFN_PRESENT 1
|
||||
|
||||
#if __FreeBSD_version < 1300000
|
||||
#define VM_SHARED (1 << 17)
|
||||
|
||||
/* Not present in 12.2 */
|
||||
static inline vm_fault_t
|
||||
lkpi_vmf_insert_pfn_prot_locked(struct vm_area_struct *vma, unsigned long addr,
|
||||
unsigned long pfn, pgprot_t prot)
|
||||
{
|
||||
vm_object_t vm_obj = vma->vm_obj;
|
||||
vm_page_t page;
|
||||
vm_pindex_t pindex;
|
||||
|
||||
VM_OBJECT_ASSERT_WLOCKED(vm_obj);
|
||||
pindex = OFF_TO_IDX(addr - vma->vm_start);
|
||||
if (vma->vm_pfn_count == 0)
|
||||
vma->vm_pfn_first = pindex;
|
||||
MPASS(pindex <= OFF_TO_IDX(vma->vm_end));
|
||||
|
||||
page = vm_page_grab(vm_obj, pindex, VM_ALLOC_NORMAL);
|
||||
if (page == NULL) {
|
||||
page = PHYS_TO_VM_PAGE(IDX_TO_OFF(pfn));
|
||||
vm_page_xbusy(page);
|
||||
if (vm_page_insert(page, vm_obj, pindex)) {
|
||||
vm_page_xunbusy(page);
|
||||
return (VM_FAULT_OOM);
|
||||
}
|
||||
page->valid = VM_PAGE_BITS_ALL;
|
||||
}
|
||||
pmap_page_set_memattr(page, pgprot2cachemode(prot));
|
||||
vma->vm_pfn_count++;
|
||||
|
||||
return (VM_FAULT_NOPAGE);
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline vm_fault_t
|
||||
vmf_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
|
||||
unsigned long pfn)
|
||||
{
|
||||
vm_fault_t ret;
|
||||
|
||||
VM_OBJECT_WLOCK(vma->vm_obj);
|
||||
ret = lkpi_vmf_insert_pfn_prot_locked(vma, addr, pfn, vma->vm_page_prot);
|
||||
VM_OBJECT_WUNLOCK(vma->vm_obj);
|
||||
|
||||
return (ret);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif /* defined(NV_BSD) */
|
||||
|
||||
#endif /* defined(__NVIDIA_DRM_CONFTEST_H__) */
|
||||
|
@ -92,11 +92,22 @@ static void nv_drm_plane_destroy(struct drm_plane *plane)
|
||||
nv_drm_free(nv_plane);
|
||||
}
|
||||
|
||||
static inline void
|
||||
plane_config_clear(struct NvKmsKapiLayerConfig *layerConfig)
|
||||
{
|
||||
if (layerConfig == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
memset(layerConfig, 0, sizeof(*layerConfig));
|
||||
layerConfig->csc = NVKMS_IDENTITY_CSC_MATRIX;
|
||||
}
|
||||
|
||||
static inline void
|
||||
plane_req_config_disable(struct NvKmsKapiLayerRequestedConfig *req_config)
|
||||
{
|
||||
/* Clear layer config */
|
||||
memset(&req_config->config, 0, sizeof(req_config->config));
|
||||
plane_config_clear(&req_config->config);
|
||||
|
||||
/* Set flags to get cleared layer config applied */
|
||||
req_config->flags.surfaceChanged = NV_TRUE;
|
||||
@ -113,6 +124,45 @@ cursor_req_config_disable(struct NvKmsKapiCursorRequestedConfig *req_config)
|
||||
req_config->flags.surfaceChanged = NV_TRUE;
|
||||
}
|
||||
|
||||
#if defined(NV_DRM_COLOR_MGMT_AVAILABLE)
|
||||
static void color_mgmt_config_ctm_to_csc(struct NvKmsCscMatrix *nvkms_csc,
|
||||
struct drm_color_ctm *drm_ctm)
|
||||
{
|
||||
int y;
|
||||
|
||||
/* CTM is a 3x3 matrix while ours is 3x4. Zero out the last column. */
|
||||
nvkms_csc->m[0][3] = nvkms_csc->m[1][3] = nvkms_csc->m[2][3] = 0;
|
||||
|
||||
for (y = 0; y < 3; y++) {
|
||||
int x;
|
||||
|
||||
for (x = 0; x < 3; x++) {
|
||||
/*
|
||||
* Values in the CTM are encoded in S31.32 sign-magnitude fixed-
|
||||
* point format, while NvKms CSC values are signed 2's-complement
|
||||
* S15.16 (Ssign-extend12-3.16?) fixed-point format.
|
||||
*/
|
||||
NvU64 ctmVal = drm_ctm->matrix[y*3 + x];
|
||||
NvU64 signBit = ctmVal & (1ULL << 63);
|
||||
NvU64 magnitude = ctmVal & ~signBit;
|
||||
|
||||
/*
|
||||
* Drop the low 16 bits of the fractional part and the high 17 bits
|
||||
* of the integral part. Drop 17 bits to avoid corner cases where
|
||||
* the highest resulting bit is a 1, causing the `cscVal = -cscVal`
|
||||
* line to result in a positive number.
|
||||
*/
|
||||
NvS32 cscVal = (magnitude >> 16) & ((1ULL << 31) - 1);
|
||||
if (signBit) {
|
||||
cscVal = -cscVal;
|
||||
}
|
||||
|
||||
nvkms_csc->m[y][x] = cscVal;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif /* NV_DRM_COLOR_MGMT_AVAILABLE */
|
||||
|
||||
static void
|
||||
cursor_plane_req_config_update(struct drm_plane *plane,
|
||||
struct drm_plane_state *plane_state,
|
||||
@ -239,6 +289,8 @@ plane_req_config_update(struct drm_plane *plane,
|
||||
.dstY = plane_state->crtc_y,
|
||||
.dstWidth = plane_state->crtc_w,
|
||||
.dstHeight = plane_state->crtc_h,
|
||||
|
||||
.csc = old_config.csc
|
||||
},
|
||||
};
|
||||
|
||||
@ -578,6 +630,24 @@ static int nv_drm_plane_atomic_check(struct drm_plane *plane,
|
||||
return ret;
|
||||
}
|
||||
|
||||
#if defined(NV_DRM_COLOR_MGMT_AVAILABLE)
|
||||
if (crtc_state->color_mgmt_changed) {
|
||||
/*
|
||||
* According to the comment in the Linux kernel's
|
||||
* drivers/gpu/drm/drm_color_mgmt.c, if this property is NULL,
|
||||
* the CTM needs to be changed to the identity matrix
|
||||
*/
|
||||
if (crtc_state->ctm) {
|
||||
color_mgmt_config_ctm_to_csc(&plane_requested_config->config.csc,
|
||||
(struct drm_color_ctm *)crtc_state->ctm->data);
|
||||
} else {
|
||||
plane_requested_config->config.csc = NVKMS_IDENTITY_CSC_MATRIX;
|
||||
}
|
||||
plane_requested_config->config.cscUseMain = NV_FALSE;
|
||||
plane_requested_config->flags.cscChanged = NV_TRUE;
|
||||
}
|
||||
#endif /* NV_DRM_COLOR_MGMT_AVAILABLE */
|
||||
|
||||
if (__is_async_flip_requested(plane, crtc_state)) {
|
||||
/*
|
||||
* Async flip requests that the flip happen 'as soon as
|
||||
@ -668,6 +738,38 @@ static int nv_drm_plane_atomic_get_property(
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/**
|
||||
* nv_drm_plane_atomic_reset - plane state reset hook
|
||||
* @plane: DRM plane
|
||||
*
|
||||
* Allocate an empty DRM plane state.
|
||||
*/
|
||||
static void nv_drm_plane_atomic_reset(struct drm_plane *plane)
|
||||
{
|
||||
struct nv_drm_plane_state *nv_plane_state =
|
||||
nv_drm_calloc(1, sizeof(*nv_plane_state));
|
||||
|
||||
if (!nv_plane_state) {
|
||||
return;
|
||||
}
|
||||
|
||||
drm_atomic_helper_plane_reset(plane);
|
||||
|
||||
/*
|
||||
* The drm atomic helper function allocates a state object that is the wrong
|
||||
* size. Copy its contents into the one we allocated above and replace the
|
||||
* pointer.
|
||||
*/
|
||||
if (plane->state) {
|
||||
nv_plane_state->base = *plane->state;
|
||||
kfree(plane->state);
|
||||
plane->state = &nv_plane_state->base;
|
||||
} else {
|
||||
kfree(nv_plane_state);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static struct drm_plane_state *
|
||||
nv_drm_plane_atomic_duplicate_state(struct drm_plane *plane)
|
||||
{
|
||||
@ -727,7 +829,7 @@ static const struct drm_plane_funcs nv_plane_funcs = {
|
||||
.update_plane = drm_atomic_helper_update_plane,
|
||||
.disable_plane = drm_atomic_helper_disable_plane,
|
||||
.destroy = nv_drm_plane_destroy,
|
||||
.reset = drm_atomic_helper_plane_reset,
|
||||
.reset = nv_drm_plane_atomic_reset,
|
||||
.atomic_get_property = nv_drm_plane_atomic_get_property,
|
||||
.atomic_set_property = nv_drm_plane_atomic_set_property,
|
||||
.atomic_duplicate_state = nv_drm_plane_atomic_duplicate_state,
|
||||
@ -784,6 +886,52 @@ static inline void nv_drm_crtc_duplicate_req_head_modeset_config(
|
||||
}
|
||||
}
|
||||
|
||||
static inline struct nv_drm_crtc_state *nv_drm_crtc_state_alloc(void)
|
||||
{
|
||||
struct nv_drm_crtc_state *nv_state = nv_drm_calloc(1, sizeof(*nv_state));
|
||||
int i;
|
||||
|
||||
if (nv_state == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(nv_state->req_config.layerRequestedConfig); i++) {
|
||||
plane_config_clear(&nv_state->req_config.layerRequestedConfig[i].config);
|
||||
}
|
||||
return nv_state;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* nv_drm_atomic_crtc_reset - crtc state reset hook
|
||||
* @crtc: DRM crtc
|
||||
*
|
||||
* Allocate an empty DRM crtc state.
|
||||
*/
|
||||
static void nv_drm_atomic_crtc_reset(struct drm_crtc *crtc)
|
||||
{
|
||||
struct nv_drm_crtc_state *nv_state = nv_drm_crtc_state_alloc();
|
||||
|
||||
if (!nv_state) {
|
||||
return;
|
||||
}
|
||||
|
||||
drm_atomic_helper_crtc_reset(crtc);
|
||||
|
||||
/*
|
||||
* The drm atomic helper function allocates a state object that is the wrong
|
||||
* size. Copy its contents into the one we allocated above and replace the
|
||||
* pointer.
|
||||
*/
|
||||
if (crtc->state) {
|
||||
nv_state->base = *crtc->state;
|
||||
kfree(crtc->state);
|
||||
crtc->state = &nv_state->base;
|
||||
} else {
|
||||
kfree(nv_state);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* nv_drm_atomic_crtc_duplicate_state - crtc state duplicate hook
|
||||
* @crtc: DRM crtc
|
||||
@ -795,7 +943,7 @@ static inline void nv_drm_crtc_duplicate_req_head_modeset_config(
|
||||
static struct drm_crtc_state*
|
||||
nv_drm_atomic_crtc_duplicate_state(struct drm_crtc *crtc)
|
||||
{
|
||||
struct nv_drm_crtc_state *nv_state = nv_drm_calloc(1, sizeof(*nv_state));
|
||||
struct nv_drm_crtc_state *nv_state = nv_drm_crtc_state_alloc();
|
||||
|
||||
if (nv_state == NULL) {
|
||||
return NULL;
|
||||
@ -851,7 +999,7 @@ static void nv_drm_atomic_crtc_destroy_state(struct drm_crtc *crtc,
|
||||
static struct drm_crtc_funcs nv_crtc_funcs = {
|
||||
.set_config = drm_atomic_helper_set_config,
|
||||
.page_flip = drm_atomic_helper_page_flip,
|
||||
.reset = drm_atomic_helper_crtc_reset,
|
||||
.reset = nv_drm_atomic_crtc_reset,
|
||||
.destroy = nv_drm_crtc_destroy,
|
||||
.atomic_duplicate_state = nv_drm_atomic_crtc_duplicate_state,
|
||||
.atomic_destroy_state = nv_drm_atomic_crtc_destroy_state,
|
||||
@ -914,70 +1062,25 @@ static int color_mgmt_config_copy_lut(struct NvKmsLutRamps *nvkms_lut,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void color_mgmt_config_ctm_to_csc(struct NvKmsCscMatrix *nvkms_csc,
|
||||
struct drm_color_ctm *drm_ctm)
|
||||
{
|
||||
int y;
|
||||
|
||||
/* CTM is a 3x3 matrix while ours is 3x4. Zero out the last column. */
|
||||
nvkms_csc->m[0][3] = nvkms_csc->m[1][3] = nvkms_csc->m[2][3] = 0;
|
||||
|
||||
for (y = 0; y < 3; y++) {
|
||||
int x;
|
||||
|
||||
for (x = 0; x < 3; x++) {
|
||||
/*
|
||||
* Values in the CTM are encoded in S31.32 sign-magnitude fixed-
|
||||
* point format, while NvKms CSC values are signed 2's-complement
|
||||
* S15.16 (Ssign-extend12-3.16?) fixed-point format.
|
||||
*/
|
||||
NvU64 ctmVal = drm_ctm->matrix[y*3 + x];
|
||||
NvU64 signBit = ctmVal & (1ULL << 63);
|
||||
NvU64 magnitude = ctmVal & ~signBit;
|
||||
|
||||
/*
|
||||
* Drop the low 16 bits of the fractional part and the high 17 bits
|
||||
* of the integral part. Drop 17 bits to avoid corner cases where
|
||||
* the highest resulting bit is a 1, causing the `cscVal = -cscVal`
|
||||
* line to result in a positive number.
|
||||
*/
|
||||
NvS32 cscVal = (magnitude >> 16) & ((1ULL << 31) - 1);
|
||||
if (signBit) {
|
||||
cscVal = -cscVal;
|
||||
}
|
||||
|
||||
nvkms_csc->m[y][x] = cscVal;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int color_mgmt_config_set(struct nv_drm_crtc_state *nv_crtc_state,
|
||||
struct NvKmsKapiHeadRequestedConfig *req_config)
|
||||
static int color_mgmt_config_set_luts(struct nv_drm_crtc_state *nv_crtc_state,
|
||||
struct NvKmsKapiHeadRequestedConfig *req_config)
|
||||
{
|
||||
struct NvKmsKapiHeadModeSetConfig *modeset_config =
|
||||
&req_config->modeSetConfig;
|
||||
struct drm_crtc_state *crtc_state = &nv_crtc_state->base;
|
||||
int ret = 0;
|
||||
|
||||
struct drm_color_lut *degamma_lut = NULL;
|
||||
struct drm_color_ctm *ctm = NULL;
|
||||
struct drm_color_lut *gamma_lut = NULL;
|
||||
uint64_t degamma_len = 0;
|
||||
uint64_t gamma_len = 0;
|
||||
|
||||
int i;
|
||||
struct drm_plane *plane;
|
||||
struct drm_plane_state *plane_state;
|
||||
|
||||
/*
|
||||
* According to the comment in the Linux kernel's
|
||||
* drivers/gpu/drm/drm_color_mgmt.c, if any of these properties are NULL,
|
||||
* that LUT or CTM needs to be changed to a linear LUT or identity matrix
|
||||
* respectively.
|
||||
* drivers/gpu/drm/drm_color_mgmt.c, if either property is NULL, that LUT
|
||||
* needs to be changed to a linear LUT
|
||||
*/
|
||||
|
||||
req_config->flags.lutChanged = NV_TRUE;
|
||||
if (crtc_state->degamma_lut) {
|
||||
struct drm_color_lut *degamma_lut = NULL;
|
||||
uint64_t degamma_len = 0;
|
||||
|
||||
nv_crtc_state->ilut_ramps = nv_drm_calloc(1, sizeof(*nv_crtc_state->ilut_ramps));
|
||||
if (!nv_crtc_state->ilut_ramps) {
|
||||
ret = -ENOMEM;
|
||||
@ -1007,34 +1110,13 @@ static int color_mgmt_config_set(struct nv_drm_crtc_state *nv_crtc_state,
|
||||
modeset_config->lut.input.start = 0;
|
||||
modeset_config->lut.input.end = 0;
|
||||
modeset_config->lut.input.pRamps = NULL;
|
||||
}
|
||||
|
||||
nv_drm_for_each_new_plane_in_state(crtc_state->state, plane,
|
||||
plane_state, i) {
|
||||
struct nv_drm_plane *nv_plane = to_nv_plane(plane);
|
||||
uint32_t layer = nv_plane->layer_idx;
|
||||
struct NvKmsKapiLayerRequestedConfig *layer_config;
|
||||
|
||||
if (layer == NVKMS_KAPI_LAYER_INVALID_IDX || plane_state->crtc != crtc_state->crtc) {
|
||||
continue;
|
||||
}
|
||||
layer_config = &req_config->layerRequestedConfig[layer];
|
||||
|
||||
if (layer == NVKMS_KAPI_LAYER_PRIMARY_IDX && crtc_state->ctm) {
|
||||
ctm = (struct drm_color_ctm *)crtc_state->ctm->data;
|
||||
|
||||
color_mgmt_config_ctm_to_csc(&layer_config->config.csc, ctm);
|
||||
layer_config->config.cscUseMain = NV_FALSE;
|
||||
} else {
|
||||
/* When crtc_state->ctm is unset, this also sets the main layer to
|
||||
* the identity matrix.
|
||||
*/
|
||||
layer_config->config.csc = NVKMS_IDENTITY_CSC_MATRIX;
|
||||
}
|
||||
layer_config->flags.cscChanged = NV_TRUE;
|
||||
}
|
||||
|
||||
if (crtc_state->gamma_lut) {
|
||||
struct drm_color_lut *gamma_lut = NULL;
|
||||
uint64_t gamma_len = 0;
|
||||
|
||||
nv_crtc_state->olut_ramps = nv_drm_calloc(1, sizeof(*nv_crtc_state->olut_ramps));
|
||||
if (!nv_crtc_state->olut_ramps) {
|
||||
ret = -ENOMEM;
|
||||
@ -1158,7 +1240,7 @@ static int nv_drm_crtc_atomic_check(struct drm_crtc *crtc,
|
||||
crtc_state->color_mgmt_changed = NV_TRUE;
|
||||
}
|
||||
if (crtc_state->color_mgmt_changed) {
|
||||
if ((ret = color_mgmt_config_set(nv_crtc_state, req_config)) != 0) {
|
||||
if ((ret = color_mgmt_config_set_luts(nv_crtc_state, req_config)) != 0) {
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
@ -1428,7 +1510,7 @@ static struct drm_crtc *__nv_drm_crtc_create(struct nv_drm_device *nv_dev,
|
||||
goto failed;
|
||||
}
|
||||
|
||||
nv_state = nv_drm_calloc(1, sizeof(*nv_state));
|
||||
nv_state = nv_drm_crtc_state_alloc();
|
||||
if (nv_state == NULL) {
|
||||
goto failed_state_alloc;
|
||||
}
|
||||
|
@ -74,6 +74,7 @@
|
||||
#endif
|
||||
|
||||
#include <linux/pci.h>
|
||||
#include <linux/workqueue.h>
|
||||
|
||||
/*
|
||||
* Commit fcd70cd36b9b ("drm: Split out drm_probe_helper.h")
|
||||
@ -405,6 +406,27 @@ static int nv_drm_create_properties(struct nv_drm_device *nv_dev)
|
||||
return 0;
|
||||
}
|
||||
|
||||
#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
|
||||
/*
|
||||
* We can't just call drm_kms_helper_hotplug_event directly because
|
||||
* fbdev_generic may attempt to set a mode from inside the hotplug event
|
||||
* handler. Because kapi event handling runs on nvkms_kthread_q, this blocks
|
||||
* other event processing including the flip completion notifier expected by
|
||||
* nv_drm_atomic_commit.
|
||||
*
|
||||
* Defer hotplug event handling to a work item so that nvkms_kthread_q can
|
||||
* continue processing events while a DRM modeset is in progress.
|
||||
*/
|
||||
static void nv_drm_handle_hotplug_event(struct work_struct *work)
|
||||
{
|
||||
struct delayed_work *dwork = to_delayed_work(work);
|
||||
struct nv_drm_device *nv_dev =
|
||||
container_of(dwork, struct nv_drm_device, hotplug_event_work);
|
||||
|
||||
drm_kms_helper_hotplug_event(nv_dev->dev);
|
||||
}
|
||||
#endif
|
||||
|
||||
static int nv_drm_load(struct drm_device *dev, unsigned long flags)
|
||||
{
|
||||
#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
|
||||
@ -540,6 +562,7 @@ static int nv_drm_load(struct drm_device *dev, unsigned long flags)
|
||||
|
||||
/* Enable event handling */
|
||||
|
||||
INIT_DELAYED_WORK(&nv_dev->hotplug_event_work, nv_drm_handle_hotplug_event);
|
||||
atomic_set(&nv_dev->enable_event_handling, true);
|
||||
|
||||
init_waitqueue_head(&nv_dev->flip_event_wq);
|
||||
@ -567,6 +590,7 @@ static void __nv_drm_unload(struct drm_device *dev)
|
||||
return;
|
||||
}
|
||||
|
||||
cancel_delayed_work_sync(&nv_dev->hotplug_event_work);
|
||||
mutex_lock(&nv_dev->lock);
|
||||
|
||||
WARN_ON(nv_dev->subOwnershipGranted);
|
||||
@ -1523,9 +1547,21 @@ static const struct drm_ioctl_desc nv_drm_ioctls[] = {
|
||||
DRM_RENDER_ALLOW|DRM_UNLOCKED),
|
||||
#endif
|
||||
|
||||
/*
|
||||
* DRM_UNLOCKED is implicit for all non-legacy DRM driver IOCTLs since Linux
|
||||
* v4.10 commit fa5386459f06 "drm: Used DRM_LEGACY for all legacy functions"
|
||||
* (Linux v4.4 commit ea487835e887 "drm: Enforce unlocked ioctl operation
|
||||
* for kms driver ioctls" previously did it only for drivers that set the
|
||||
* DRM_MODESET flag), so this will race with SET_CLIENT_CAP. Linux v4.11
|
||||
* commit dcf727ab5d17 "drm: setclientcap doesn't need the drm BKL" also
|
||||
* removed locking from SET_CLIENT_CAP so there is no use attempting to lock
|
||||
* manually. The latter commit acknowledges that this can expose userspace
|
||||
* to inconsistent behavior when racing with itself, but accepts that risk.
|
||||
*/
|
||||
DRM_IOCTL_DEF_DRV(NVIDIA_GET_CLIENT_CAPABILITY,
|
||||
nv_drm_get_client_capability_ioctl,
|
||||
0),
|
||||
|
||||
#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
|
||||
DRM_IOCTL_DEF_DRV(NVIDIA_GET_CRTC_CRC32,
|
||||
nv_drm_get_crtc_crc32_ioctl,
|
||||
@ -1647,7 +1683,7 @@ static struct drm_driver nv_drm_driver = {
|
||||
* kernel supports atomic modeset and the 'modeset' kernel module
|
||||
* parameter is true.
|
||||
*/
|
||||
static void nv_drm_update_drm_driver_features(void)
|
||||
void nv_drm_update_drm_driver_features(void)
|
||||
{
|
||||
#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
|
||||
|
||||
@ -1673,7 +1709,7 @@ static void nv_drm_update_drm_driver_features(void)
|
||||
/*
|
||||
* Helper function for allocate/register DRM device for given NVIDIA GPU ID.
|
||||
*/
|
||||
static void nv_drm_register_drm_device(const nv_gpu_info_t *gpu_info)
|
||||
void nv_drm_register_drm_device(const nv_gpu_info_t *gpu_info)
|
||||
{
|
||||
struct nv_drm_device *nv_dev = NULL;
|
||||
struct drm_device *dev = NULL;
|
||||
@ -1711,8 +1747,15 @@ static void nv_drm_register_drm_device(const nv_gpu_info_t *gpu_info)
|
||||
dev->dev_private = nv_dev;
|
||||
nv_dev->dev = dev;
|
||||
|
||||
bool bus_is_pci =
|
||||
#if defined(NV_LINUX)
|
||||
device->bus == &pci_bus_type;
|
||||
#elif defined(NV_BSD)
|
||||
devclass_find("pci");
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_DEVICE_HAS_PDEV)
|
||||
if (device->bus == &pci_bus_type) {
|
||||
if (bus_is_pci) {
|
||||
dev->pdev = to_pci_dev(device);
|
||||
}
|
||||
#endif
|
||||
@ -1733,7 +1776,7 @@ static void nv_drm_register_drm_device(const nv_gpu_info_t *gpu_info)
|
||||
goto failed_grab_ownership;
|
||||
}
|
||||
|
||||
if (device->bus == &pci_bus_type) {
|
||||
if (bus_is_pci) {
|
||||
struct pci_dev *pdev = to_pci_dev(device);
|
||||
|
||||
#if defined(NV_DRM_APERTURE_REMOVE_CONFLICTING_PCI_FRAMEBUFFERS_HAS_DRIVER_ARG)
|
||||
@ -1773,6 +1816,7 @@ failed_drm_alloc:
|
||||
/*
|
||||
* Enumerate NVIDIA GPUs and allocate/register DRM device for each of them.
|
||||
*/
|
||||
#if defined(NV_LINUX)
|
||||
int nv_drm_probe_devices(void)
|
||||
{
|
||||
nv_gpu_info_t *gpu_info = NULL;
|
||||
@ -1815,6 +1859,7 @@ done:
|
||||
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Unregister all NVIDIA DRM devices.
|
||||
@ -1840,4 +1885,51 @@ void nv_drm_remove_devices(void)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Handle system suspend and resume.
|
||||
*
|
||||
* Normally, a DRM driver would use drm_mode_config_helper_suspend() to save the
|
||||
* current state on suspend and drm_mode_config_helper_resume() to restore it
|
||||
* after resume. This works for upstream drivers because user-mode tasks are
|
||||
* frozen before the suspend hook is called.
|
||||
*
|
||||
* In the case of nvidia-drm, the suspend hook is also called when 'suspend' is
|
||||
* written to /proc/driver/nvidia/suspend, before user-mode tasks are frozen.
|
||||
* However, we don't actually need to save and restore the display state because
|
||||
* the driver requires a VT switch to an unused VT before suspending and a
|
||||
* switch back to the application (or fbdev console) on resume. The DRM client
|
||||
* (or fbdev helper functions) will restore the appropriate mode on resume.
|
||||
*
|
||||
*/
|
||||
void nv_drm_suspend_resume(NvBool suspend)
|
||||
{
|
||||
#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
|
||||
struct nv_drm_device *nv_dev = dev_list;
|
||||
|
||||
/*
|
||||
* NVKMS shuts down all heads on suspend. Update DRM state accordingly.
|
||||
*/
|
||||
for (nv_dev = dev_list; nv_dev; nv_dev = nv_dev->next) {
|
||||
struct drm_device *dev = nv_dev->dev;
|
||||
|
||||
if (!drm_core_check_feature(dev, DRIVER_MODESET)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (suspend) {
|
||||
drm_kms_helper_poll_disable(dev);
|
||||
#if defined(NV_DRM_FBDEV_GENERIC_AVAILABLE)
|
||||
drm_fb_helper_set_suspend_unlocked(dev->fb_helper, 1);
|
||||
#endif
|
||||
drm_mode_config_reset(dev);
|
||||
} else {
|
||||
#if defined(NV_DRM_FBDEV_GENERIC_AVAILABLE)
|
||||
drm_fb_helper_set_suspend_unlocked(dev->fb_helper, 0);
|
||||
#endif
|
||||
drm_kms_helper_poll_enable(dev);
|
||||
}
|
||||
}
|
||||
#endif /* NV_DRM_ATOMIC_MODESET_AVAILABLE */
|
||||
}
|
||||
|
||||
#endif /* NV_DRM_AVAILABLE */
|
||||
|
@ -31,6 +31,12 @@ int nv_drm_probe_devices(void);
|
||||
|
||||
void nv_drm_remove_devices(void);
|
||||
|
||||
void nv_drm_suspend_resume(NvBool suspend);
|
||||
|
||||
void nv_drm_register_drm_device(const nv_gpu_info_t *);
|
||||
|
||||
void nv_drm_update_drm_driver_features(void);
|
||||
|
||||
#endif /* defined(NV_DRM_AVAILABLE) */
|
||||
|
||||
#endif /* __NVIDIA_DRM_DRV_H__ */
|
||||
|
@ -300,7 +300,7 @@ void nv_drm_handle_display_change(struct nv_drm_device *nv_dev,
|
||||
|
||||
nv_drm_connector_mark_connection_status_dirty(nv_encoder->nv_connector);
|
||||
|
||||
drm_kms_helper_hotplug_event(dev);
|
||||
schedule_delayed_work(&nv_dev->hotplug_event_work, 0);
|
||||
}
|
||||
|
||||
void nv_drm_handle_dynamic_display_connected(struct nv_drm_device *nv_dev,
|
||||
@ -347,6 +347,6 @@ void nv_drm_handle_dynamic_display_connected(struct nv_drm_device *nv_dev,
|
||||
drm_reinit_primary_mode_group(dev);
|
||||
#endif
|
||||
|
||||
drm_kms_helper_hotplug_event(dev);
|
||||
schedule_delayed_work(&nv_dev->hotplug_event_work, 0);
|
||||
}
|
||||
#endif
|
||||
|
@ -240,7 +240,7 @@ struct drm_framebuffer *nv_drm_internal_framebuffer_create(
|
||||
if (nv_dev->modifiers[i] == DRM_FORMAT_MOD_INVALID) {
|
||||
NV_DRM_DEV_DEBUG_DRIVER(
|
||||
nv_dev,
|
||||
"Invalid format modifier for framebuffer object: 0x%016llx",
|
||||
"Invalid format modifier for framebuffer object: 0x%016" NvU64_fmtx,
|
||||
modifier);
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
|
@ -1638,7 +1638,7 @@ int nv_drm_semsurf_fence_wait_ioctl(struct drm_device *dev,
|
||||
if (p->pre_wait_value >= p->post_wait_value) {
|
||||
NV_DRM_DEV_LOG_ERR(
|
||||
nv_dev,
|
||||
"Non-monotonic wait values specified to fence wait: 0x%llu, 0x%llu",
|
||||
"Non-monotonic wait values specified to fence wait: 0x%" NvU64_fmtu ", 0x%" NvU64_fmtu,
|
||||
p->pre_wait_value, p->post_wait_value);
|
||||
goto done;
|
||||
}
|
||||
|
@ -71,12 +71,42 @@ static int __nv_drm_gem_dma_buf_create_mmap_offset(
|
||||
static int __nv_drm_gem_dma_buf_mmap(struct nv_drm_gem_object *nv_gem,
|
||||
struct vm_area_struct *vma)
|
||||
{
|
||||
#if defined(NV_LINUX)
|
||||
struct dma_buf_attachment *attach = nv_gem->base.import_attach;
|
||||
struct dma_buf *dma_buf = attach->dmabuf;
|
||||
#endif
|
||||
struct file *old_file;
|
||||
int ret;
|
||||
|
||||
/* check if buffer supports mmap */
|
||||
#if defined(NV_BSD)
|
||||
/*
|
||||
* Most of the FreeBSD DRM code refers to struct file*, which is actually
|
||||
* a struct linux_file*. The dmabuf code in FreeBSD is not actually plumbed
|
||||
* through the same linuxkpi bits it seems (probably so it can be used
|
||||
* elsewhere), so dma_buf->file really is a native FreeBSD struct file...
|
||||
*/
|
||||
if (!nv_gem->base.filp->f_op->mmap)
|
||||
return -EINVAL;
|
||||
|
||||
/* readjust the vma */
|
||||
get_file(nv_gem->base.filp);
|
||||
old_file = vma->vm_file;
|
||||
vma->vm_file = nv_gem->base.filp;
|
||||
vma->vm_pgoff -= drm_vma_node_start(&nv_gem->base.vma_node);
|
||||
|
||||
ret = nv_gem->base.filp->f_op->mmap(nv_gem->base.filp, vma);
|
||||
|
||||
if (ret) {
|
||||
/* restore old parameters on failure */
|
||||
vma->vm_file = old_file;
|
||||
vma->vm_pgoff += drm_vma_node_start(&nv_gem->base.vma_node);
|
||||
fput(nv_gem->base.filp);
|
||||
} else {
|
||||
if (old_file)
|
||||
fput(old_file);
|
||||
}
|
||||
#else
|
||||
if (!dma_buf->file->f_op->mmap)
|
||||
return -EINVAL;
|
||||
|
||||
@ -84,18 +114,20 @@ static int __nv_drm_gem_dma_buf_mmap(struct nv_drm_gem_object *nv_gem,
|
||||
get_file(dma_buf->file);
|
||||
old_file = vma->vm_file;
|
||||
vma->vm_file = dma_buf->file;
|
||||
vma->vm_pgoff -= drm_vma_node_start(&nv_gem->base.vma_node);;
|
||||
vma->vm_pgoff -= drm_vma_node_start(&nv_gem->base.vma_node);
|
||||
|
||||
ret = dma_buf->file->f_op->mmap(dma_buf->file, vma);
|
||||
|
||||
if (ret) {
|
||||
/* restore old parameters on failure */
|
||||
vma->vm_file = old_file;
|
||||
vma->vm_pgoff += drm_vma_node_start(&nv_gem->base.vma_node);
|
||||
fput(dma_buf->file);
|
||||
} else {
|
||||
if (old_file)
|
||||
fput(old_file);
|
||||
}
|
||||
#endif
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -37,6 +37,9 @@
|
||||
#endif
|
||||
|
||||
#include <linux/io.h>
|
||||
#if defined(NV_BSD)
|
||||
#include <vm/vm_pageout.h>
|
||||
#endif
|
||||
|
||||
#include "nv-mm.h"
|
||||
|
||||
@ -93,7 +96,17 @@ static vm_fault_t __nv_drm_gem_nvkms_handle_vma_fault(
|
||||
if (nv_nvkms_memory->pages_count == 0) {
|
||||
pfn = (unsigned long)(uintptr_t)nv_nvkms_memory->pPhysicalAddress;
|
||||
pfn >>= PAGE_SHIFT;
|
||||
#if defined(NV_LINUX)
|
||||
/*
|
||||
* FreeBSD doesn't set pgoff. We instead have pfn be the base physical
|
||||
* address, and we will calculate the index pidx from the virtual address.
|
||||
*
|
||||
* This only works because linux_cdev_pager_populate passes the pidx as
|
||||
* vmf->virtual_address. Then we turn the virtual address
|
||||
* into a physical page number.
|
||||
*/
|
||||
pfn += page_offset;
|
||||
#endif
|
||||
} else {
|
||||
BUG_ON(page_offset >= nv_nvkms_memory->pages_count);
|
||||
pfn = page_to_pfn(nv_nvkms_memory->pages[page_offset]);
|
||||
@ -243,6 +256,15 @@ static int __nv_drm_nvkms_gem_obj_init(
|
||||
NvU64 *pages = NULL;
|
||||
NvU32 numPages = 0;
|
||||
|
||||
if ((size % PAGE_SIZE) != 0) {
|
||||
NV_DRM_DEV_LOG_ERR(
|
||||
nv_dev,
|
||||
"NvKmsKapiMemory 0x%p size should be in a multiple of page size to "
|
||||
"create a gem object",
|
||||
pMemory);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
nv_nvkms_memory->pPhysicalAddress = NULL;
|
||||
nv_nvkms_memory->pWriteCombinedIORemapAddress = NULL;
|
||||
nv_nvkms_memory->physically_mapped = false;
|
||||
@ -314,7 +336,7 @@ int nv_drm_dumb_create(
|
||||
ret = -ENOMEM;
|
||||
NV_DRM_DEV_LOG_ERR(
|
||||
nv_dev,
|
||||
"Failed to allocate NvKmsKapiMemory for dumb object of size %llu",
|
||||
"Failed to allocate NvKmsKapiMemory for dumb object of size %" NvU64_fmtu,
|
||||
args->size);
|
||||
goto nvkms_alloc_memory_failed;
|
||||
}
|
||||
|
@ -36,6 +36,10 @@
|
||||
#include "linux/mm.h"
|
||||
#include "nv-mm.h"
|
||||
|
||||
#if defined(NV_BSD)
|
||||
#include <vm/vm_pageout.h>
|
||||
#endif
|
||||
|
||||
static inline
|
||||
void __nv_drm_gem_user_memory_free(struct nv_drm_gem_object *nv_gem)
|
||||
{
|
||||
@ -113,6 +117,10 @@ static vm_fault_t __nv_drm_gem_user_memory_handle_vma_fault(
|
||||
page_offset = vmf->pgoff - drm_vma_node_start(&gem->vma_node);
|
||||
|
||||
BUG_ON(page_offset >= nv_user_memory->pages_count);
|
||||
|
||||
#if !defined(NV_LINUX)
|
||||
ret = vmf_insert_pfn(vma, address, page_to_pfn(nv_user_memory->pages[page_offset]));
|
||||
#else /* !defined(NV_LINUX) */
|
||||
ret = vm_insert_page(vma, address, nv_user_memory->pages[page_offset]);
|
||||
switch (ret) {
|
||||
case 0:
|
||||
@ -131,6 +139,7 @@ static vm_fault_t __nv_drm_gem_user_memory_handle_vma_fault(
|
||||
ret = VM_FAULT_SIGBUS;
|
||||
break;
|
||||
}
|
||||
#endif /* !defined(NV_LINUX) */
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -170,7 +179,7 @@ int nv_drm_gem_import_userspace_memory_ioctl(struct drm_device *dev,
|
||||
if ((params->size % PAGE_SIZE) != 0) {
|
||||
NV_DRM_DEV_LOG_ERR(
|
||||
nv_dev,
|
||||
"Userspace memory 0x%llx size should be in a multiple of page "
|
||||
"Userspace memory 0x%" NvU64_fmtx " size should be in a multiple of page "
|
||||
"size to create a gem object",
|
||||
params->address);
|
||||
return -EINVAL;
|
||||
@ -183,7 +192,7 @@ int nv_drm_gem_import_userspace_memory_ioctl(struct drm_device *dev,
|
||||
if (ret != 0) {
|
||||
NV_DRM_DEV_LOG_ERR(
|
||||
nv_dev,
|
||||
"Failed to lock user pages for address 0x%llx: %d",
|
||||
"Failed to lock user pages for address 0x%" NvU64_fmtx ": %d",
|
||||
params->address, ret);
|
||||
return ret;
|
||||
}
|
||||
|
@ -612,6 +612,19 @@ static inline int nv_drm_format_num_planes(uint32_t format)
|
||||
|
||||
#endif /* defined(NV_DRM_FORMAT_MODIFIERS_PRESENT) */
|
||||
|
||||
/*
|
||||
* DRM_UNLOCKED was removed with linux-next commit 2798ffcc1d6a ("drm: Remove
|
||||
* locking for legacy ioctls and DRM_UNLOCKED"), but it was previously made
|
||||
* implicit for all non-legacy DRM driver IOCTLs since Linux v4.10 commit
|
||||
* fa5386459f06 "drm: Used DRM_LEGACY for all legacy functions" (Linux v4.4
|
||||
* commit ea487835e887 "drm: Enforce unlocked ioctl operation for kms driver
|
||||
* ioctls" previously did it only for drivers that set the DRM_MODESET flag), so
|
||||
* it was effectively a no-op anyway.
|
||||
*/
|
||||
#if !defined(NV_DRM_UNLOCKED_IOCTL_FLAG_PRESENT)
|
||||
#define DRM_UNLOCKED 0
|
||||
#endif
|
||||
|
||||
/*
|
||||
* drm_vma_offset_exact_lookup_locked() were added
|
||||
* by kernel commit 2225cfe46bcc which was Signed-off-by:
|
||||
|
@ -71,7 +71,7 @@
|
||||
*
|
||||
* 'warning: suggest parentheses around arithmetic in operand of |'
|
||||
*/
|
||||
#if defined(NV_LINUX)
|
||||
#if defined(NV_LINUX) || defined(NV_BSD)
|
||||
#define DRM_IOCTL_NVIDIA_FENCE_SUPPORTED \
|
||||
DRM_IO(DRM_COMMAND_BASE + DRM_NVIDIA_FENCE_SUPPORTED)
|
||||
#define DRM_IOCTL_NVIDIA_DMABUF_SUPPORTED \
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2015-2023, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@ -21,8 +21,6 @@
|
||||
*/
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/err.h>
|
||||
|
||||
#include "nvidia-drm-os-interface.h"
|
||||
#include "nvidia-drm.h"
|
||||
@ -31,261 +29,18 @@
|
||||
|
||||
#if defined(NV_DRM_AVAILABLE)
|
||||
|
||||
#if defined(NV_DRM_DRMP_H_PRESENT)
|
||||
#include <drm/drmP.h>
|
||||
#endif
|
||||
|
||||
#if defined(NV_LINUX_SYNC_FILE_H_PRESENT)
|
||||
#include <linux/file.h>
|
||||
#include <linux/sync_file.h>
|
||||
#endif
|
||||
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/sched.h>
|
||||
|
||||
#include "nv-mm.h"
|
||||
|
||||
MODULE_PARM_DESC(
|
||||
modeset,
|
||||
"Enable atomic kernel modesetting (1 = enable, 0 = disable (default))");
|
||||
bool nv_drm_modeset_module_param = false;
|
||||
module_param_named(modeset, nv_drm_modeset_module_param, bool, 0400);
|
||||
|
||||
#if defined(NV_DRM_FBDEV_GENERIC_AVAILABLE)
|
||||
MODULE_PARM_DESC(
|
||||
fbdev,
|
||||
"Create a framebuffer device (1 = enable, 0 = disable (default)) (EXPERIMENTAL)");
|
||||
bool nv_drm_fbdev_module_param = false;
|
||||
module_param_named(fbdev, nv_drm_fbdev_module_param, bool, 0400);
|
||||
#endif
|
||||
|
||||
void *nv_drm_calloc(size_t nmemb, size_t size)
|
||||
{
|
||||
size_t total_size = nmemb * size;
|
||||
//
|
||||
// Check for overflow.
|
||||
//
|
||||
if ((nmemb != 0) && ((total_size / nmemb) != size))
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
return kzalloc(nmemb * size, GFP_KERNEL);
|
||||
}
|
||||
|
||||
void nv_drm_free(void *ptr)
|
||||
{
|
||||
if (IS_ERR(ptr)) {
|
||||
return;
|
||||
}
|
||||
|
||||
kfree(ptr);
|
||||
}
|
||||
|
||||
char *nv_drm_asprintf(const char *fmt, ...)
|
||||
{
|
||||
va_list ap;
|
||||
char *p;
|
||||
|
||||
va_start(ap, fmt);
|
||||
p = kvasprintf(GFP_KERNEL, fmt, ap);
|
||||
va_end(ap);
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
#if defined(NVCPU_X86) || defined(NVCPU_X86_64)
|
||||
#define WRITE_COMBINE_FLUSH() asm volatile("sfence":::"memory")
|
||||
#elif defined(NVCPU_PPC64LE)
|
||||
#define WRITE_COMBINE_FLUSH() asm volatile("sync":::"memory")
|
||||
#else
|
||||
#define WRITE_COMBINE_FLUSH() mb()
|
||||
#endif
|
||||
|
||||
void nv_drm_write_combine_flush(void)
|
||||
{
|
||||
WRITE_COMBINE_FLUSH();
|
||||
}
|
||||
|
||||
int nv_drm_lock_user_pages(unsigned long address,
|
||||
unsigned long pages_count, struct page ***pages)
|
||||
{
|
||||
struct mm_struct *mm = current->mm;
|
||||
struct page **user_pages;
|
||||
int pages_pinned;
|
||||
|
||||
user_pages = nv_drm_calloc(pages_count, sizeof(*user_pages));
|
||||
|
||||
if (user_pages == NULL) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
nv_mmap_read_lock(mm);
|
||||
|
||||
pages_pinned = NV_PIN_USER_PAGES(address, pages_count, FOLL_WRITE,
|
||||
user_pages, NULL);
|
||||
nv_mmap_read_unlock(mm);
|
||||
|
||||
if (pages_pinned < 0 || (unsigned)pages_pinned < pages_count) {
|
||||
goto failed;
|
||||
}
|
||||
|
||||
*pages = user_pages;
|
||||
|
||||
return 0;
|
||||
|
||||
failed:
|
||||
|
||||
if (pages_pinned > 0) {
|
||||
int i;
|
||||
|
||||
for (i = 0; i < pages_pinned; i++) {
|
||||
NV_UNPIN_USER_PAGE(user_pages[i]);
|
||||
}
|
||||
}
|
||||
|
||||
nv_drm_free(user_pages);
|
||||
|
||||
return (pages_pinned < 0) ? pages_pinned : -EINVAL;
|
||||
}
|
||||
|
||||
void nv_drm_unlock_user_pages(unsigned long pages_count, struct page **pages)
|
||||
{
|
||||
unsigned long i;
|
||||
|
||||
for (i = 0; i < pages_count; i++) {
|
||||
set_page_dirty_lock(pages[i]);
|
||||
NV_UNPIN_USER_PAGE(pages[i]);
|
||||
}
|
||||
|
||||
nv_drm_free(pages);
|
||||
}
|
||||
|
||||
void *nv_drm_vmap(struct page **pages, unsigned long pages_count)
|
||||
{
|
||||
return vmap(pages, pages_count, VM_USERMAP, PAGE_KERNEL);
|
||||
}
|
||||
|
||||
void nv_drm_vunmap(void *address)
|
||||
{
|
||||
vunmap(address);
|
||||
}
|
||||
|
||||
bool nv_drm_workthread_init(nv_drm_workthread *worker, const char *name)
|
||||
{
|
||||
worker->shutting_down = false;
|
||||
if (nv_kthread_q_init(&worker->q, name)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
spin_lock_init(&worker->lock);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void nv_drm_workthread_shutdown(nv_drm_workthread *worker)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&worker->lock, flags);
|
||||
worker->shutting_down = true;
|
||||
spin_unlock_irqrestore(&worker->lock, flags);
|
||||
|
||||
nv_kthread_q_stop(&worker->q);
|
||||
}
|
||||
|
||||
void nv_drm_workthread_work_init(nv_drm_work *work,
|
||||
void (*callback)(void *),
|
||||
void *arg)
|
||||
{
|
||||
nv_kthread_q_item_init(work, callback, arg);
|
||||
}
|
||||
|
||||
int nv_drm_workthread_add_work(nv_drm_workthread *worker, nv_drm_work *work)
|
||||
{
|
||||
unsigned long flags;
|
||||
int ret = 0;
|
||||
|
||||
spin_lock_irqsave(&worker->lock, flags);
|
||||
if (!worker->shutting_down) {
|
||||
ret = nv_kthread_q_schedule_q_item(&worker->q, work);
|
||||
}
|
||||
spin_unlock_irqrestore(&worker->lock, flags);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void nv_drm_timer_setup(nv_drm_timer *timer, void (*callback)(nv_drm_timer *nv_drm_timer))
|
||||
{
|
||||
nv_timer_setup(timer, callback);
|
||||
}
|
||||
|
||||
void nv_drm_mod_timer(nv_drm_timer *timer, unsigned long timeout_native)
|
||||
{
|
||||
mod_timer(&timer->kernel_timer, timeout_native);
|
||||
}
|
||||
|
||||
unsigned long nv_drm_timer_now(void)
|
||||
{
|
||||
return jiffies;
|
||||
}
|
||||
|
||||
unsigned long nv_drm_timeout_from_ms(NvU64 relative_timeout_ms)
|
||||
{
|
||||
return jiffies + msecs_to_jiffies(relative_timeout_ms);
|
||||
}
|
||||
|
||||
bool nv_drm_del_timer_sync(nv_drm_timer *timer)
|
||||
{
|
||||
if (del_timer_sync(&timer->kernel_timer)) {
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(NV_DRM_FENCE_AVAILABLE)
|
||||
int nv_drm_create_sync_file(nv_dma_fence_t *fence)
|
||||
{
|
||||
#if defined(NV_LINUX_SYNC_FILE_H_PRESENT)
|
||||
struct sync_file *sync;
|
||||
int fd = get_unused_fd_flags(O_CLOEXEC);
|
||||
|
||||
if (fd < 0) {
|
||||
return fd;
|
||||
}
|
||||
|
||||
/* sync_file_create() generates its own reference to the fence */
|
||||
sync = sync_file_create(fence);
|
||||
|
||||
if (IS_ERR(sync)) {
|
||||
put_unused_fd(fd);
|
||||
return PTR_ERR(sync);
|
||||
}
|
||||
|
||||
fd_install(fd, sync->file);
|
||||
|
||||
return fd;
|
||||
#else /* defined(NV_LINUX_SYNC_FILE_H_PRESENT) */
|
||||
return -EINVAL;
|
||||
#endif /* defined(NV_LINUX_SYNC_FILE_H_PRESENT) */
|
||||
}
|
||||
|
||||
nv_dma_fence_t *nv_drm_sync_file_get_fence(int fd)
|
||||
{
|
||||
#if defined(NV_SYNC_FILE_GET_FENCE_PRESENT)
|
||||
return sync_file_get_fence(fd);
|
||||
#else /* defined(NV_SYNC_FILE_GET_FENCE_PRESENT) */
|
||||
return NULL;
|
||||
#endif /* defined(NV_SYNC_FILE_GET_FENCE_PRESENT) */
|
||||
}
|
||||
#endif /* defined(NV_DRM_FENCE_AVAILABLE) */
|
||||
|
||||
void nv_drm_yield(void)
|
||||
{
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
schedule_timeout(1);
|
||||
}
|
||||
|
||||
#endif /* NV_DRM_AVAILABLE */
|
||||
|
||||
/*************************************************************************
|
||||
|
@ -321,6 +321,24 @@ int nv_drm_atomic_check(struct drm_device *dev,
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
#if defined(NV_DRM_COLOR_MGMT_AVAILABLE)
|
||||
struct drm_crtc *crtc;
|
||||
struct drm_crtc_state *crtc_state;
|
||||
int i;
|
||||
|
||||
nv_drm_for_each_crtc_in_state(state, crtc, crtc_state, i) {
|
||||
/*
|
||||
* if the color management changed on the crtc, we need to update the
|
||||
* crtc's plane's CSC matrices, so add the crtc's planes to the commit
|
||||
*/
|
||||
if (crtc_state->color_mgmt_changed) {
|
||||
if ((ret = drm_atomic_add_affected_planes(state, crtc)) != 0) {
|
||||
goto done;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif /* NV_DRM_COLOR_MGMT_AVAILABLE */
|
||||
|
||||
if ((ret = drm_atomic_helper_check(dev, state)) != 0) {
|
||||
goto done;
|
||||
}
|
||||
|
285
kernel-open/nvidia-drm/nvidia-drm-os-interface.c
Normal file
285
kernel-open/nvidia-drm/nvidia-drm-os-interface.c
Normal file
@ -0,0 +1,285 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2023, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <linux/slab.h>
|
||||
|
||||
#include "nvidia-drm-os-interface.h"
|
||||
|
||||
#if defined(NV_DRM_AVAILABLE)
|
||||
|
||||
#if defined(NV_LINUX_SYNC_FILE_H_PRESENT)
|
||||
#include <linux/file.h>
|
||||
#include <linux/sync_file.h>
|
||||
#endif
|
||||
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/device.h>
|
||||
|
||||
#include "nv-mm.h"
|
||||
|
||||
#if defined(NV_DRM_DRMP_H_PRESENT)
|
||||
#include <drm/drmP.h>
|
||||
#endif
|
||||
|
||||
bool nv_drm_modeset_module_param = false;
|
||||
bool nv_drm_fbdev_module_param = false;
|
||||
|
||||
void *nv_drm_calloc(size_t nmemb, size_t size)
|
||||
{
|
||||
size_t total_size = nmemb * size;
|
||||
//
|
||||
// Check for overflow.
|
||||
//
|
||||
if ((nmemb != 0) && ((total_size / nmemb) != size))
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
return kzalloc(nmemb * size, GFP_KERNEL);
|
||||
}
|
||||
|
||||
void nv_drm_free(void *ptr)
|
||||
{
|
||||
if (IS_ERR(ptr)) {
|
||||
return;
|
||||
}
|
||||
|
||||
kfree(ptr);
|
||||
}
|
||||
|
||||
char *nv_drm_asprintf(const char *fmt, ...)
|
||||
{
|
||||
va_list ap;
|
||||
char *p;
|
||||
|
||||
va_start(ap, fmt);
|
||||
p = kvasprintf(GFP_KERNEL, fmt, ap);
|
||||
va_end(ap);
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
#if defined(NVCPU_X86) || defined(NVCPU_X86_64)
|
||||
#define WRITE_COMBINE_FLUSH() asm volatile("sfence":::"memory")
|
||||
#elif defined(NVCPU_PPC64LE)
|
||||
#define WRITE_COMBINE_FLUSH() asm volatile("sync":::"memory")
|
||||
#else
|
||||
#define WRITE_COMBINE_FLUSH() mb()
|
||||
#endif
|
||||
|
||||
void nv_drm_write_combine_flush(void)
|
||||
{
|
||||
WRITE_COMBINE_FLUSH();
|
||||
}
|
||||
|
||||
int nv_drm_lock_user_pages(unsigned long address,
|
||||
unsigned long pages_count, struct page ***pages)
|
||||
{
|
||||
struct mm_struct *mm = current->mm;
|
||||
struct page **user_pages;
|
||||
int pages_pinned;
|
||||
|
||||
user_pages = nv_drm_calloc(pages_count, sizeof(*user_pages));
|
||||
|
||||
if (user_pages == NULL) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
nv_mmap_read_lock(mm);
|
||||
|
||||
pages_pinned = NV_PIN_USER_PAGES(address, pages_count, FOLL_WRITE,
|
||||
user_pages);
|
||||
nv_mmap_read_unlock(mm);
|
||||
|
||||
if (pages_pinned < 0 || (unsigned)pages_pinned < pages_count) {
|
||||
goto failed;
|
||||
}
|
||||
|
||||
*pages = user_pages;
|
||||
|
||||
return 0;
|
||||
|
||||
failed:
|
||||
|
||||
if (pages_pinned > 0) {
|
||||
int i;
|
||||
|
||||
for (i = 0; i < pages_pinned; i++) {
|
||||
NV_UNPIN_USER_PAGE(user_pages[i]);
|
||||
}
|
||||
}
|
||||
|
||||
nv_drm_free(user_pages);
|
||||
|
||||
return (pages_pinned < 0) ? pages_pinned : -EINVAL;
|
||||
}
|
||||
|
||||
void nv_drm_unlock_user_pages(unsigned long pages_count, struct page **pages)
|
||||
{
|
||||
unsigned long i;
|
||||
|
||||
for (i = 0; i < pages_count; i++) {
|
||||
set_page_dirty_lock(pages[i]);
|
||||
NV_UNPIN_USER_PAGE(pages[i]);
|
||||
}
|
||||
|
||||
nv_drm_free(pages);
|
||||
}
|
||||
|
||||
/*
|
||||
* linuxkpi vmap doesn't use the flags argument as it
|
||||
* doesn't seem to be needed. Define VM_USERMAP to 0
|
||||
* to make errors go away
|
||||
*
|
||||
* vmap: sys/compat/linuxkpi/common/src/linux_compat.c
|
||||
*/
|
||||
#if defined(NV_BSD)
|
||||
#define VM_USERMAP 0
|
||||
#endif
|
||||
|
||||
void *nv_drm_vmap(struct page **pages, unsigned long pages_count)
|
||||
{
|
||||
return vmap(pages, pages_count, VM_USERMAP, PAGE_KERNEL);
|
||||
}
|
||||
|
||||
void nv_drm_vunmap(void *address)
|
||||
{
|
||||
vunmap(address);
|
||||
}
|
||||
|
||||
bool nv_drm_workthread_init(nv_drm_workthread *worker, const char *name)
|
||||
{
|
||||
worker->shutting_down = false;
|
||||
if (nv_kthread_q_init(&worker->q, name)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
spin_lock_init(&worker->lock);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void nv_drm_workthread_shutdown(nv_drm_workthread *worker)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&worker->lock, flags);
|
||||
worker->shutting_down = true;
|
||||
spin_unlock_irqrestore(&worker->lock, flags);
|
||||
|
||||
nv_kthread_q_stop(&worker->q);
|
||||
}
|
||||
|
||||
void nv_drm_workthread_work_init(nv_drm_work *work,
|
||||
void (*callback)(void *),
|
||||
void *arg)
|
||||
{
|
||||
nv_kthread_q_item_init(work, callback, arg);
|
||||
}
|
||||
|
||||
int nv_drm_workthread_add_work(nv_drm_workthread *worker, nv_drm_work *work)
|
||||
{
|
||||
unsigned long flags;
|
||||
int ret = 0;
|
||||
|
||||
spin_lock_irqsave(&worker->lock, flags);
|
||||
if (!worker->shutting_down) {
|
||||
ret = nv_kthread_q_schedule_q_item(&worker->q, work);
|
||||
}
|
||||
spin_unlock_irqrestore(&worker->lock, flags);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void nv_drm_timer_setup(nv_drm_timer *timer, void (*callback)(nv_drm_timer *nv_drm_timer))
|
||||
{
|
||||
nv_timer_setup(timer, callback);
|
||||
}
|
||||
|
||||
void nv_drm_mod_timer(nv_drm_timer *timer, unsigned long timeout_native)
|
||||
{
|
||||
mod_timer(&timer->kernel_timer, timeout_native);
|
||||
}
|
||||
|
||||
unsigned long nv_drm_timer_now(void)
|
||||
{
|
||||
return jiffies;
|
||||
}
|
||||
|
||||
unsigned long nv_drm_timeout_from_ms(NvU64 relative_timeout_ms)
|
||||
{
|
||||
return jiffies + msecs_to_jiffies(relative_timeout_ms);
|
||||
}
|
||||
|
||||
bool nv_drm_del_timer_sync(nv_drm_timer *timer)
|
||||
{
|
||||
if (del_timer_sync(&timer->kernel_timer)) {
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(NV_DRM_FENCE_AVAILABLE)
|
||||
int nv_drm_create_sync_file(nv_dma_fence_t *fence)
|
||||
{
|
||||
#if defined(NV_LINUX_SYNC_FILE_H_PRESENT)
|
||||
struct sync_file *sync;
|
||||
int fd = get_unused_fd_flags(O_CLOEXEC);
|
||||
|
||||
if (fd < 0) {
|
||||
return fd;
|
||||
}
|
||||
|
||||
/* sync_file_create() generates its own reference to the fence */
|
||||
sync = sync_file_create(fence);
|
||||
|
||||
if (IS_ERR(sync)) {
|
||||
put_unused_fd(fd);
|
||||
return PTR_ERR(sync);
|
||||
}
|
||||
|
||||
fd_install(fd, sync->file);
|
||||
|
||||
return fd;
|
||||
#else /* defined(NV_LINUX_SYNC_FILE_H_PRESENT) */
|
||||
return -EINVAL;
|
||||
#endif /* defined(NV_LINUX_SYNC_FILE_H_PRESENT) */
|
||||
}
|
||||
|
||||
nv_dma_fence_t *nv_drm_sync_file_get_fence(int fd)
|
||||
{
|
||||
#if defined(NV_SYNC_FILE_GET_FENCE_PRESENT)
|
||||
return sync_file_get_fence(fd);
|
||||
#else /* defined(NV_SYNC_FILE_GET_FENCE_PRESENT) */
|
||||
return NULL;
|
||||
#endif /* defined(NV_SYNC_FILE_GET_FENCE_PRESENT) */
|
||||
}
|
||||
#endif /* defined(NV_DRM_FENCE_AVAILABLE) */
|
||||
|
||||
void nv_drm_yield(void)
|
||||
{
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
schedule_timeout(1);
|
||||
}
|
||||
|
||||
#endif /* NV_DRM_AVAILABLE */
|
@ -33,7 +33,7 @@
|
||||
#include "nvidia-dma-fence-helper.h"
|
||||
#endif
|
||||
|
||||
#if defined(NV_LINUX)
|
||||
#if defined(NV_LINUX) || defined(NV_BSD)
|
||||
#include "nv-kthread-q.h"
|
||||
#include "linux/spinlock.h"
|
||||
|
||||
@ -45,18 +45,18 @@ typedef struct nv_drm_workthread {
|
||||
|
||||
typedef nv_kthread_q_item_t nv_drm_work;
|
||||
|
||||
#else /* defined(NV_LINUX) */
|
||||
#else
|
||||
#error "Need to define deferred work primitives for this OS"
|
||||
#endif /* else defined(NV_LINUX) */
|
||||
#endif
|
||||
|
||||
#if defined(NV_LINUX)
|
||||
#if defined(NV_LINUX) || defined(NV_BSD)
|
||||
#include "nv-timer.h"
|
||||
|
||||
typedef struct nv_timer nv_drm_timer;
|
||||
|
||||
#else /* defined(NV_LINUX) */
|
||||
#else
|
||||
#error "Need to define kernel timer callback primitives for this OS"
|
||||
#endif /* else defined(NV_LINUX) */
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_FBDEV_GENERIC_SETUP_PRESENT) && defined(NV_DRM_APERTURE_REMOVE_CONFLICTING_PCI_FRAMEBUFFERS_PRESENT)
|
||||
#define NV_DRM_FBDEV_GENERIC_AVAILABLE
|
||||
|
@ -126,6 +126,7 @@ struct nv_drm_device {
|
||||
NvU64 modifiers[6 /* block linear */ + 1 /* linear */ + 1 /* terminator */];
|
||||
#endif
|
||||
|
||||
struct delayed_work hotplug_event_work;
|
||||
atomic_t enable_event_handling;
|
||||
|
||||
/**
|
||||
|
131
kernel-open/nvidia-drm/nvidia-drm-sources.mk
Normal file
131
kernel-open/nvidia-drm/nvidia-drm-sources.mk
Normal file
@ -0,0 +1,131 @@
|
||||
###########################################################################
|
||||
# Kbuild fragment for nvidia-drm.ko
|
||||
###########################################################################
|
||||
|
||||
#
|
||||
# Define NVIDIA_DRM_SOURCES
|
||||
#
|
||||
|
||||
NVIDIA_DRM_SOURCES =
|
||||
NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm.c
|
||||
NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-drv.c
|
||||
NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-utils.c
|
||||
NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-crtc.c
|
||||
NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-encoder.c
|
||||
NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-connector.c
|
||||
NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-gem.c
|
||||
NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-fb.c
|
||||
NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-modeset.c
|
||||
NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-fence.c
|
||||
NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-helper.c
|
||||
NVIDIA_DRM_SOURCES += nvidia-drm/nv-kthread-q.c
|
||||
NVIDIA_DRM_SOURCES += nvidia-drm/nv-pci-table.c
|
||||
NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-gem-nvkms-memory.c
|
||||
NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-gem-user-memory.c
|
||||
NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-gem-dma-buf.c
|
||||
NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-format.c
|
||||
NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-os-interface.c
|
||||
|
||||
#
|
||||
# Register the conftests needed by nvidia-drm.ko
|
||||
#
|
||||
|
||||
NV_CONFTEST_GENERIC_COMPILE_TESTS += drm_available
|
||||
NV_CONFTEST_GENERIC_COMPILE_TESTS += drm_atomic_available
|
||||
NV_CONFTEST_GENERIC_COMPILE_TESTS += is_export_symbol_gpl_refcount_inc
|
||||
NV_CONFTEST_GENERIC_COMPILE_TESTS += is_export_symbol_gpl_refcount_dec_and_test
|
||||
NV_CONFTEST_GENERIC_COMPILE_TESTS += drm_alpha_blending_available
|
||||
NV_CONFTEST_GENERIC_COMPILE_TESTS += is_export_symbol_present_drm_gem_prime_fd_to_handle
|
||||
NV_CONFTEST_GENERIC_COMPILE_TESTS += is_export_symbol_present_drm_gem_prime_handle_to_fd
|
||||
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_dev_unref
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_reinit_primary_mode_group
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += get_user_pages_remote
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += get_user_pages
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += pin_user_pages_remote
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += pin_user_pages
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_gem_object_lookup
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_atomic_state_ref_counting
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_driver_has_gem_prime_res_obj
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_atomic_helper_connector_dpms
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_connector_funcs_have_mode_in_name
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_connector_has_vrr_capable_property
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += vmf_insert_pfn
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_framebuffer_get
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_gem_object_get
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_dev_put
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_format_num_planes
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_connector_for_each_possible_encoder
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_rotation_available
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_vma_offset_exact_lookup_locked
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_gem_object_put_unlocked
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += nvhost_dma_fence_unpack
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += list_is_first
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += timer_setup
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += dma_fence_set_error
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += fence_set_error
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += sync_file_get_fence
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_aperture_remove_conflicting_pci_framebuffers
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_fbdev_generic_setup
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_connector_attach_hdr_output_metadata_property
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_helper_crtc_enable_color_mgmt
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_crtc_enable_color_mgmt
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_atomic_helper_legacy_gamma_set
|
||||
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_bus_present
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_bus_has_bus_type
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_bus_has_get_irq
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_bus_has_get_name
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_has_device_list
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_has_legacy_dev_list
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_has_set_busid
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_crtc_state_has_connectors_changed
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_init_function_args
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_helper_mode_fill_fb_struct
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_master_drop_has_from_release_arg
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_unload_has_int_return_type
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_fault_has_address
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_ops_fault_removed_vma_arg
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_atomic_helper_crtc_destroy_state_has_crtc_arg
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_atomic_helper_plane_destroy_state_has_plane_arg
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_mode_object_find_has_file_priv_arg
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += dma_buf_owner
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_connector_list_iter
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_atomic_helper_swap_state_has_stall_arg
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_prime_flag_present
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_fault_t
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_gem_object_has_resv
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_crtc_state_has_async_flip
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_crtc_state_has_pageflip_flags
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_crtc_state_has_vrr_enabled
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_format_modifiers_present
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += mm_has_mmap_lock
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_vma_node_is_allowed_has_tag_arg
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_vma_offset_node_has_readonly
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_display_mode_has_vrefresh
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_master_set_has_int_return_type
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_has_gem_free_object
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_prime_pages_to_sg_has_drm_device_arg
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_has_gem_prime_callbacks
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_crtc_atomic_check_has_atomic_state_arg
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_gem_object_vmap_has_map_arg
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_plane_atomic_check_has_atomic_state_arg
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_device_has_pdev
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_crtc_state_has_no_vblank
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_mode_config_has_allow_fb_modifiers
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_has_hdr_output_metadata
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += dma_resv_add_fence
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += dma_resv_reserve_fences
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += reservation_object_reserve_shared_has_num_fences_arg
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_connector_has_override_edid
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_master_has_leases
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_file_get_master
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_modeset_lock_all_end
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_connector_lookup
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_connector_put
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_area_struct_has_const_vm_flags
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_has_dumb_destroy
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += fence_ops_use_64bit_seqno
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_aperture_remove_conflicting_pci_framebuffers_has_driver_arg
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_mode_create_dp_colorspace_property_has_supported_colorspaces_arg
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_unlocked_ioctl_flag_present
|
@ -2,30 +2,16 @@
|
||||
# Kbuild fragment for nvidia-drm.ko
|
||||
###########################################################################
|
||||
|
||||
# Get our source file list and conftest list from the common file
|
||||
include $(src)/nvidia-drm/nvidia-drm-sources.mk
|
||||
|
||||
# Linux-specific sources
|
||||
NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-linux.c
|
||||
|
||||
#
|
||||
# Define NVIDIA_DRM_{SOURCES,OBJECTS}
|
||||
#
|
||||
|
||||
NVIDIA_DRM_SOURCES =
|
||||
NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm.c
|
||||
NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-drv.c
|
||||
NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-utils.c
|
||||
NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-crtc.c
|
||||
NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-encoder.c
|
||||
NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-connector.c
|
||||
NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-gem.c
|
||||
NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-fb.c
|
||||
NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-modeset.c
|
||||
NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-fence.c
|
||||
NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-linux.c
|
||||
NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-helper.c
|
||||
NVIDIA_DRM_SOURCES += nvidia-drm/nv-kthread-q.c
|
||||
NVIDIA_DRM_SOURCES += nvidia-drm/nv-pci-table.c
|
||||
NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-gem-nvkms-memory.c
|
||||
NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-gem-user-memory.c
|
||||
NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-gem-dma-buf.c
|
||||
NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-format.c
|
||||
|
||||
NVIDIA_DRM_OBJECTS = $(patsubst %.c,%.o,$(NVIDIA_DRM_SOURCES))
|
||||
|
||||
obj-m += nvidia-drm.o
|
||||
@ -44,107 +30,4 @@ NVIDIA_DRM_CFLAGS += -UDEBUG -U_DEBUG -DNDEBUG -DNV_BUILD_MODULE_INSTANCES=0
|
||||
|
||||
$(call ASSIGN_PER_OBJ_CFLAGS, $(NVIDIA_DRM_OBJECTS), $(NVIDIA_DRM_CFLAGS))
|
||||
|
||||
#
|
||||
# Register the conftests needed by nvidia-drm.ko
|
||||
#
|
||||
|
||||
NV_OBJECTS_DEPEND_ON_CONFTEST += $(NVIDIA_DRM_OBJECTS)
|
||||
|
||||
NV_CONFTEST_GENERIC_COMPILE_TESTS += drm_available
|
||||
NV_CONFTEST_GENERIC_COMPILE_TESTS += drm_atomic_available
|
||||
NV_CONFTEST_GENERIC_COMPILE_TESTS += is_export_symbol_gpl_refcount_inc
|
||||
NV_CONFTEST_GENERIC_COMPILE_TESTS += is_export_symbol_gpl_refcount_dec_and_test
|
||||
NV_CONFTEST_GENERIC_COMPILE_TESTS += drm_alpha_blending_available
|
||||
NV_CONFTEST_GENERIC_COMPILE_TESTS += is_export_symbol_present_drm_gem_prime_fd_to_handle
|
||||
NV_CONFTEST_GENERIC_COMPILE_TESTS += is_export_symbol_present_drm_gem_prime_handle_to_fd
|
||||
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_dev_unref
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_reinit_primary_mode_group
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += get_user_pages_remote
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += get_user_pages
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += pin_user_pages_remote
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += pin_user_pages
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_gem_object_lookup
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_atomic_state_ref_counting
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_driver_has_gem_prime_res_obj
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_atomic_helper_connector_dpms
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_connector_funcs_have_mode_in_name
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_connector_has_vrr_capable_property
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += vmf_insert_pfn
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_framebuffer_get
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_gem_object_get
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_dev_put
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_format_num_planes
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_connector_for_each_possible_encoder
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_rotation_available
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_vma_offset_exact_lookup_locked
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_gem_object_put_unlocked
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += nvhost_dma_fence_unpack
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += list_is_first
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += timer_setup
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += dma_fence_set_error
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += fence_set_error
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += sync_file_get_fence
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_aperture_remove_conflicting_pci_framebuffers
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_fbdev_generic_setup
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_connector_attach_hdr_output_metadata_property
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_helper_crtc_enable_color_mgmt
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_crtc_enable_color_mgmt
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_atomic_helper_legacy_gamma_set
|
||||
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_bus_present
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_bus_has_bus_type
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_bus_has_get_irq
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_bus_has_get_name
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_has_device_list
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_has_legacy_dev_list
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_has_set_busid
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_crtc_state_has_connectors_changed
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_init_function_args
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_helper_mode_fill_fb_struct
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_master_drop_has_from_release_arg
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_unload_has_int_return_type
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_fault_has_address
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_ops_fault_removed_vma_arg
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_atomic_helper_crtc_destroy_state_has_crtc_arg
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_atomic_helper_plane_destroy_state_has_plane_arg
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_mode_object_find_has_file_priv_arg
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += dma_buf_owner
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_connector_list_iter
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_atomic_helper_swap_state_has_stall_arg
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_prime_flag_present
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_fault_t
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_gem_object_has_resv
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_crtc_state_has_async_flip
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_crtc_state_has_pageflip_flags
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_crtc_state_has_vrr_enabled
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_format_modifiers_present
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += mm_has_mmap_lock
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_vma_node_is_allowed_has_tag_arg
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_vma_offset_node_has_readonly
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_display_mode_has_vrefresh
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_master_set_has_int_return_type
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_has_gem_free_object
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_prime_pages_to_sg_has_drm_device_arg
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_has_gem_prime_callbacks
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_crtc_atomic_check_has_atomic_state_arg
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_gem_object_vmap_has_map_arg
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_plane_atomic_check_has_atomic_state_arg
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_device_has_pdev
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_crtc_state_has_no_vblank
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_mode_config_has_allow_fb_modifiers
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_has_hdr_output_metadata
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += dma_resv_add_fence
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += dma_resv_reserve_fences
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += reservation_object_reserve_shared_has_num_fences_arg
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_connector_has_override_edid
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_master_has_leases
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_file_get_master
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_modeset_lock_all_end
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_connector_lookup
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_connector_put
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_area_struct_has_const_vm_flags
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_has_dumb_destroy
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += fence_ops_use_64bit_seqno
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_aperture_remove_conflicting_pci_framebuffers_has_driver_arg
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_mode_create_dp_colorspace_property_has_supported_colorspaces_arg
|
||||
|
@ -45,6 +45,7 @@ int nv_drm_init(void)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
nvKms->setSuspendResumeCallback(nv_drm_suspend_resume);
|
||||
return nv_drm_probe_devices();
|
||||
#else
|
||||
return 0;
|
||||
@ -54,6 +55,7 @@ int nv_drm_init(void)
|
||||
void nv_drm_exit(void)
|
||||
{
|
||||
#if defined(NV_DRM_AVAILABLE)
|
||||
nvKms->setSuspendResumeCallback(NULL);
|
||||
nv_drm_remove_devices();
|
||||
#endif
|
||||
}
|
||||
|
@ -35,12 +35,13 @@
|
||||
#include <linux/list.h>
|
||||
#include <linux/rwsem.h>
|
||||
#include <linux/freezer.h>
|
||||
#include <linux/poll.h>
|
||||
#include <linux/cdev.h>
|
||||
|
||||
#include <acpi/video.h>
|
||||
|
||||
#include "nvstatus.h"
|
||||
|
||||
#include "nv-register-module.h"
|
||||
#include "nv-modeset-interface.h"
|
||||
#include "nv-kref.h"
|
||||
|
||||
@ -53,6 +54,7 @@
|
||||
#include "nv-kthread-q.h"
|
||||
#include "nv-time.h"
|
||||
#include "nv-lock.h"
|
||||
#include "nv-chardev-numbers.h"
|
||||
|
||||
#if !defined(CONFIG_RETPOLINE)
|
||||
#include "nv-retpoline.h"
|
||||
@ -74,6 +76,12 @@ module_param_named(disable_vrr_memclk_switch, disable_vrr_memclk_switch, bool, 0
|
||||
static bool hdmi_deepcolor = false;
|
||||
module_param_named(hdmi_deepcolor, hdmi_deepcolor, bool, 0400);
|
||||
|
||||
static bool vblank_sem_control = false;
|
||||
module_param_named(vblank_sem_control, vblank_sem_control, bool, 0400);
|
||||
|
||||
static bool opportunistic_display_sync = true;
|
||||
module_param_named(opportunistic_display_sync, opportunistic_display_sync, bool, 0400);
|
||||
|
||||
/* These parameters are used for fault injection tests. Normally the defaults
|
||||
* should be used. */
|
||||
MODULE_PARM_DESC(fail_malloc, "Fail the Nth call to nvkms_alloc");
|
||||
@ -117,6 +125,16 @@ NvBool nvkms_hdmi_deepcolor(void)
|
||||
return hdmi_deepcolor;
|
||||
}
|
||||
|
||||
NvBool nvkms_vblank_sem_control(void)
|
||||
{
|
||||
return vblank_sem_control;
|
||||
}
|
||||
|
||||
NvBool nvkms_opportunistic_display_sync(void)
|
||||
{
|
||||
return opportunistic_display_sync;
|
||||
}
|
||||
|
||||
#define NVKMS_SYNCPT_STUBS_NEEDED
|
||||
|
||||
/*************************************************************************
|
||||
@ -482,6 +500,7 @@ nvkms_event_queue_changed(nvkms_per_open_handle_t *pOpenKernel,
|
||||
static void nvkms_suspend(NvU32 gpuId)
|
||||
{
|
||||
if (gpuId == 0) {
|
||||
nvKmsKapiSuspendResume(NV_TRUE /* suspend */);
|
||||
nvkms_write_lock_pm_lock();
|
||||
}
|
||||
|
||||
@ -498,6 +517,7 @@ static void nvkms_resume(NvU32 gpuId)
|
||||
|
||||
if (gpuId == 0) {
|
||||
nvkms_write_unlock_pm_lock();
|
||||
nvKmsKapiSuspendResume(NV_FALSE /* suspend */);
|
||||
}
|
||||
}
|
||||
|
||||
@ -827,49 +847,6 @@ void nvkms_free_timer(nvkms_timer_handle_t *handle)
|
||||
timer->cancel = NV_TRUE;
|
||||
}
|
||||
|
||||
void* nvkms_get_per_open_data(int fd)
|
||||
{
|
||||
struct file *filp = fget(fd);
|
||||
struct nvkms_per_open *popen = NULL;
|
||||
dev_t rdev = 0;
|
||||
void *data = NULL;
|
||||
|
||||
if (filp == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (filp->f_inode == NULL) {
|
||||
goto done;
|
||||
}
|
||||
rdev = filp->f_inode->i_rdev;
|
||||
|
||||
if ((MAJOR(rdev) != NVKMS_MAJOR_DEVICE_NUMBER) ||
|
||||
(MINOR(rdev) != NVKMS_MINOR_DEVICE_NUMBER)) {
|
||||
goto done;
|
||||
}
|
||||
|
||||
popen = filp->private_data;
|
||||
if (popen == NULL) {
|
||||
goto done;
|
||||
}
|
||||
|
||||
data = popen->data;
|
||||
|
||||
done:
|
||||
/*
|
||||
* fget() incremented the struct file's reference count, which
|
||||
* needs to be balanced with a call to fput(). It is safe to
|
||||
* decrement the reference count before returning
|
||||
* filp->private_data because core NVKMS is currently holding the
|
||||
* nvkms_lock, which prevents the nvkms_close() => nvKmsClose()
|
||||
* call chain from freeing the file out from under the caller of
|
||||
* nvkms_get_per_open_data().
|
||||
*/
|
||||
fput(filp);
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
NvBool nvkms_fd_is_nvidia_chardev(int fd)
|
||||
{
|
||||
struct file *filp = fget(fd);
|
||||
@ -1621,6 +1598,12 @@ static int nvkms_ioctl(struct inode *inode, struct file *filp,
|
||||
return status;
|
||||
}
|
||||
|
||||
static long nvkms_unlocked_ioctl(struct file *filp, unsigned int cmd,
|
||||
unsigned long arg)
|
||||
{
|
||||
return nvkms_ioctl(filp->f_inode, filp, cmd, arg);
|
||||
}
|
||||
|
||||
static unsigned int nvkms_poll(struct file *filp, poll_table *wait)
|
||||
{
|
||||
unsigned int mask = 0;
|
||||
@ -1648,17 +1631,73 @@ static unsigned int nvkms_poll(struct file *filp, poll_table *wait)
|
||||
* Module loading support code.
|
||||
*************************************************************************/
|
||||
|
||||
static nvidia_module_t nvidia_modeset_module = {
|
||||
#define NVKMS_RDEV (MKDEV(NV_MAJOR_DEVICE_NUMBER, \
|
||||
NV_MINOR_DEVICE_NUMBER_MODESET_DEVICE))
|
||||
|
||||
static struct file_operations nvkms_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.module_name = "nvidia-modeset",
|
||||
.instance = 1, /* minor number: 255-1=254 */
|
||||
.open = nvkms_open,
|
||||
.close = nvkms_close,
|
||||
.mmap = nvkms_mmap,
|
||||
.ioctl = nvkms_ioctl,
|
||||
.poll = nvkms_poll,
|
||||
.unlocked_ioctl = nvkms_unlocked_ioctl,
|
||||
#if NVCPU_IS_X86_64 || NVCPU_IS_AARCH64
|
||||
.compat_ioctl = nvkms_unlocked_ioctl,
|
||||
#endif
|
||||
.mmap = nvkms_mmap,
|
||||
.open = nvkms_open,
|
||||
.release = nvkms_close,
|
||||
};
|
||||
|
||||
static struct cdev nvkms_device_cdev;
|
||||
|
||||
static int __init nvkms_register_chrdev(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = register_chrdev_region(NVKMS_RDEV, 1, "nvidia-modeset");
|
||||
if (ret < 0) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
cdev_init(&nvkms_device_cdev, &nvkms_fops);
|
||||
ret = cdev_add(&nvkms_device_cdev, NVKMS_RDEV, 1);
|
||||
if (ret < 0) {
|
||||
unregister_chrdev_region(NVKMS_RDEV, 1);
|
||||
return ret;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void nvkms_unregister_chrdev(void)
|
||||
{
|
||||
cdev_del(&nvkms_device_cdev);
|
||||
unregister_chrdev_region(NVKMS_RDEV, 1);
|
||||
}
|
||||
|
||||
void* nvkms_get_per_open_data(int fd)
|
||||
{
|
||||
struct file *filp = fget(fd);
|
||||
void *data = NULL;
|
||||
|
||||
if (filp) {
|
||||
if (filp->f_op == &nvkms_fops && filp->private_data) {
|
||||
struct nvkms_per_open *popen = filp->private_data;
|
||||
data = popen->data;
|
||||
}
|
||||
|
||||
/*
|
||||
* fget() incremented the struct file's reference count, which needs to
|
||||
* be balanced with a call to fput(). It is safe to decrement the
|
||||
* reference count before returning filp->private_data because core
|
||||
* NVKMS is currently holding the nvkms_lock, which prevents the
|
||||
* nvkms_close() => nvKmsClose() call chain from freeing the file out
|
||||
* from under the caller of nvkms_get_per_open_data().
|
||||
*/
|
||||
fput(filp);
|
||||
}
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
static int __init nvkms_init(void)
|
||||
{
|
||||
int ret;
|
||||
@ -1689,10 +1728,9 @@ static int __init nvkms_init(void)
|
||||
INIT_LIST_HEAD(&nvkms_timers.list);
|
||||
spin_lock_init(&nvkms_timers.lock);
|
||||
|
||||
ret = nvidia_register_module(&nvidia_modeset_module);
|
||||
|
||||
ret = nvkms_register_chrdev();
|
||||
if (ret != 0) {
|
||||
goto fail_register_module;
|
||||
goto fail_register_chrdev;
|
||||
}
|
||||
|
||||
down(&nvkms_lock);
|
||||
@ -1711,8 +1749,8 @@ static int __init nvkms_init(void)
|
||||
return 0;
|
||||
|
||||
fail_module_load:
|
||||
nvidia_unregister_module(&nvidia_modeset_module);
|
||||
fail_register_module:
|
||||
nvkms_unregister_chrdev();
|
||||
fail_register_chrdev:
|
||||
nv_kthread_q_stop(&nvkms_deferred_close_kthread_q);
|
||||
fail_deferred_close_kthread:
|
||||
nv_kthread_q_stop(&nvkms_kthread_q);
|
||||
@ -1776,7 +1814,7 @@ restart:
|
||||
nv_kthread_q_stop(&nvkms_deferred_close_kthread_q);
|
||||
nv_kthread_q_stop(&nvkms_kthread_q);
|
||||
|
||||
nvidia_unregister_module(&nvidia_modeset_module);
|
||||
nvkms_unregister_chrdev();
|
||||
nvkms_free_rm();
|
||||
|
||||
if (malloc_verbose) {
|
||||
|
@ -100,6 +100,8 @@ NvBool nvkms_output_rounding_fix(void);
|
||||
NvBool nvkms_disable_hdmi_frl(void);
|
||||
NvBool nvkms_disable_vrr_memclk_switch(void);
|
||||
NvBool nvkms_hdmi_deepcolor(void);
|
||||
NvBool nvkms_vblank_sem_control(void);
|
||||
NvBool nvkms_opportunistic_display_sync(void);
|
||||
|
||||
void nvkms_call_rm (void *ops);
|
||||
void* nvkms_alloc (size_t size,
|
||||
|
@ -103,6 +103,8 @@ NvBool nvKmsKapiGetFunctionsTableInternal
|
||||
struct NvKmsKapiFunctionsTable *funcsTable
|
||||
);
|
||||
|
||||
void nvKmsKapiSuspendResume(NvBool suspend);
|
||||
|
||||
NvBool nvKmsGetBacklight(NvU32 display_id, void *drv_priv, NvU32 *brightness);
|
||||
NvBool nvKmsSetBacklight(NvU32 display_id, void *drv_priv, NvU32 brightness);
|
||||
|
||||
|
@ -1,20 +1,25 @@
|
||||
/* SPDX-License-Identifier: Linux-OpenIB */
|
||||
/*
|
||||
* Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or
|
||||
* without modification, are permitted provided that the following
|
||||
* conditions are met:
|
||||
* This software is available to you under a choice of one of two
|
||||
* licenses. You may choose to be licensed under the terms of the GNU
|
||||
* General Public License (GPL) Version 2, available from the file
|
||||
* COPYING in the main directory of this source tree, or the
|
||||
* OpenIB.org BSD license below:
|
||||
*
|
||||
* - Redistributions of source code must retain the above
|
||||
* copyright notice, this list of conditions and the following
|
||||
* disclaimer.
|
||||
* Redistribution and use in source and binary forms, with or
|
||||
* without modification, are permitted provided that the following
|
||||
* conditions are met:
|
||||
*
|
||||
* - Redistributions in binary form must reproduce the above
|
||||
* copyright notice, this list of conditions and the following
|
||||
* disclaimer in the documentation and/or other materials
|
||||
* provided with the distribution.
|
||||
* - Redistributions of source code must retain the above
|
||||
* copyright notice, this list of conditions and the following
|
||||
* disclaimer.
|
||||
*
|
||||
* - Redistributions in binary form must reproduce the above
|
||||
* copyright notice, this list of conditions and the following
|
||||
* disclaimer in the documentation and/or other materials
|
||||
* provided with the distribution.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
@ -43,7 +48,9 @@
|
||||
|
||||
MODULE_AUTHOR("Yishai Hadas");
|
||||
MODULE_DESCRIPTION("NVIDIA GPU memory plug-in");
|
||||
MODULE_LICENSE("Linux-OpenIB");
|
||||
|
||||
MODULE_LICENSE("Dual BSD/GPL");
|
||||
|
||||
MODULE_VERSION(DRV_VERSION);
|
||||
enum {
|
||||
NV_MEM_PEERDIRECT_SUPPORT_DEFAULT = 0,
|
||||
@ -53,7 +60,13 @@ static int peerdirect_support = NV_MEM_PEERDIRECT_SUPPORT_DEFAULT;
|
||||
module_param(peerdirect_support, int, S_IRUGO);
|
||||
MODULE_PARM_DESC(peerdirect_support, "Set level of support for Peer-direct, 0 [default] or 1 [legacy, for example MLNX_OFED 4.9 LTS]");
|
||||
|
||||
#define peer_err(FMT, ARGS...) printk(KERN_ERR "nvidia-peermem" " %s:%d " FMT, __FUNCTION__, __LINE__, ## ARGS)
|
||||
|
||||
#define peer_err(FMT, ARGS...) printk(KERN_ERR "nvidia-peermem" " %s:%d ERROR " FMT, __FUNCTION__, __LINE__, ## ARGS)
|
||||
#ifdef NV_MEM_DEBUG
|
||||
#define peer_trace(FMT, ARGS...) printk(KERN_DEBUG "nvidia-peermem" " %s:%d TRACE " FMT, __FUNCTION__, __LINE__, ## ARGS)
|
||||
#else
|
||||
#define peer_trace(FMT, ARGS...) do {} while (0)
|
||||
#endif
|
||||
|
||||
#if defined(NV_MLNX_IB_PEER_MEM_SYMBOLS_PRESENT)
|
||||
|
||||
@ -74,7 +87,10 @@ invalidate_peer_memory mem_invalidate_callback;
|
||||
static void *reg_handle = NULL;
|
||||
static void *reg_handle_nc = NULL;
|
||||
|
||||
#define NV_MEM_CONTEXT_MAGIC ((u64)0xF1F4F1D0FEF0DAD0ULL)
|
||||
|
||||
struct nv_mem_context {
|
||||
u64 pad1;
|
||||
struct nvidia_p2p_page_table *page_table;
|
||||
struct nvidia_p2p_dma_mapping *dma_mapping;
|
||||
u64 core_context;
|
||||
@ -86,8 +102,22 @@ struct nv_mem_context {
|
||||
struct task_struct *callback_task;
|
||||
int sg_allocated;
|
||||
struct sg_table sg_head;
|
||||
u64 pad2;
|
||||
};
|
||||
|
||||
#define NV_MEM_CONTEXT_CHECK_OK(MC) ({ \
|
||||
struct nv_mem_context *mc = (MC); \
|
||||
int rc = ((0 != mc) && \
|
||||
(READ_ONCE(mc->pad1) == NV_MEM_CONTEXT_MAGIC) && \
|
||||
(READ_ONCE(mc->pad2) == NV_MEM_CONTEXT_MAGIC)); \
|
||||
if (!rc) { \
|
||||
peer_trace("invalid nv_mem_context=%px pad1=%016llx pad2=%016llx\n", \
|
||||
mc, \
|
||||
mc?mc->pad1:0, \
|
||||
mc?mc->pad2:0); \
|
||||
} \
|
||||
rc; \
|
||||
})
|
||||
|
||||
static void nv_get_p2p_free_callback(void *data)
|
||||
{
|
||||
@ -97,8 +127,9 @@ static void nv_get_p2p_free_callback(void *data)
|
||||
struct nvidia_p2p_dma_mapping *dma_mapping = NULL;
|
||||
|
||||
__module_get(THIS_MODULE);
|
||||
if (!nv_mem_context) {
|
||||
peer_err("nv_get_p2p_free_callback -- invalid nv_mem_context\n");
|
||||
|
||||
if (!NV_MEM_CONTEXT_CHECK_OK(nv_mem_context)) {
|
||||
peer_err("detected invalid context, skipping further processing\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
@ -169,9 +200,11 @@ static int nv_mem_acquire(unsigned long addr, size_t size, void *peer_mem_privat
|
||||
/* Error case handled as not mine */
|
||||
return 0;
|
||||
|
||||
nv_mem_context->pad1 = NV_MEM_CONTEXT_MAGIC;
|
||||
nv_mem_context->page_virt_start = addr & GPU_PAGE_MASK;
|
||||
nv_mem_context->page_virt_end = (addr + size + GPU_PAGE_SIZE - 1) & GPU_PAGE_MASK;
|
||||
nv_mem_context->mapped_size = nv_mem_context->page_virt_end - nv_mem_context->page_virt_start;
|
||||
nv_mem_context->pad2 = NV_MEM_CONTEXT_MAGIC;
|
||||
|
||||
ret = nvidia_p2p_get_pages(0, 0, nv_mem_context->page_virt_start, nv_mem_context->mapped_size,
|
||||
&nv_mem_context->page_table, nv_mem_dummy_callback, nv_mem_context);
|
||||
@ -195,6 +228,7 @@ static int nv_mem_acquire(unsigned long addr, size_t size, void *peer_mem_privat
|
||||
return 1;
|
||||
|
||||
err:
|
||||
memset(nv_mem_context, 0, sizeof(*nv_mem_context));
|
||||
kfree(nv_mem_context);
|
||||
|
||||
/* Error case handled as not mine */
|
||||
@ -347,6 +381,7 @@ static void nv_mem_release(void *context)
|
||||
sg_free_table(&nv_mem_context->sg_head);
|
||||
nv_mem_context->sg_allocated = 0;
|
||||
}
|
||||
memset(nv_mem_context, 0, sizeof(*nv_mem_context));
|
||||
kfree(nv_mem_context);
|
||||
module_put(THIS_MODULE);
|
||||
return;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2013-2022 NVIDIA Corporation
|
||||
Copyright (c) 2013-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
|
@ -82,12 +82,12 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += set_pages_uc
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ktime_get_raw_ts64
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ioasid_get
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += mm_pasid_drop
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += migrate_vma_setup
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += mmget_not_zero
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += mmgrab
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += iommu_sva_bind_device_has_drvdata_arg
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += vm_fault_to_errno
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += find_next_bit_wrap
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += iommu_is_dma_domain
|
||||
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += backing_dev_info
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += mm_context_t
|
||||
@ -116,3 +116,4 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += mpol_preferred_many_present
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += mmu_interval_notifier
|
||||
|
||||
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_int_active_memcg
|
||||
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_migrate_vma_setup
|
||||
|
@ -25,7 +25,8 @@
|
||||
|
||||
#if !defined(NV_PRINTF_STRING_SECTION)
|
||||
#if defined(NVRM) && NVOS_IS_LIBOS
|
||||
#define NV_PRINTF_STRING_SECTION __attribute__ ((section (".logging")))
|
||||
#include "libos_log.h"
|
||||
#define NV_PRINTF_STRING_SECTION LIBOS_SECTION_LOGGING
|
||||
#else // defined(NVRM) && NVOS_IS_LIBOS
|
||||
#define NV_PRINTF_STRING_SECTION
|
||||
#endif // defined(NVRM) && NVOS_IS_LIBOS
|
||||
@ -33,7 +34,7 @@
|
||||
|
||||
/*
|
||||
* Include nvstatuscodes.h twice. Once for creating constant strings in the
|
||||
* the NV_PRINTF_STRING_SECTION section of the ececutable, and once to build
|
||||
* the NV_PRINTF_STRING_SECTION section of the executable, and once to build
|
||||
* the g_StatusCodeList table.
|
||||
*/
|
||||
#undef NV_STATUS_CODE
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2022 NVIDIA Corporation
|
||||
Copyright (c) 2015-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -1053,7 +1053,7 @@ NV_STATUS uvm_test_register_unload_state_buffer(UVM_TEST_REGISTER_UNLOAD_STATE_B
|
||||
// are not used because unload_state_buf may be a managed memory pointer and
|
||||
// therefore a locking assertion from the CPU fault handler could be fired.
|
||||
nv_mmap_read_lock(current->mm);
|
||||
ret = NV_PIN_USER_PAGES(params->unload_state_buf, 1, FOLL_WRITE, &page, NULL);
|
||||
ret = NV_PIN_USER_PAGES(params->unload_state_buf, 1, FOLL_WRITE, &page);
|
||||
nv_mmap_read_unlock(current->mm);
|
||||
|
||||
if (ret < 0)
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2013-2022 NVIDIA Corporation
|
||||
Copyright (c) 2013-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -45,16 +45,20 @@
|
||||
// #endif
|
||||
// 3) Do the same thing for the function definition, and for any structs that
|
||||
// are taken as arguments to these functions.
|
||||
// 4) Let this change propagate over to cuda_a, so that the CUDA driver can
|
||||
// start using the new API by bumping up the API version number its using.
|
||||
// This can be found in gpgpu/cuda/cuda.nvmk.
|
||||
// 5) Once the cuda_a changes have made it back into chips_a, remove the old API
|
||||
// declaration, definition, and any old structs that were in use.
|
||||
// 4) Let this change propagate over to cuda_a and dev_a, so that the CUDA and
|
||||
// nvidia-cfg libraries can start using the new API by bumping up the API
|
||||
// version number it's using.
|
||||
// Places where UVM_API_REVISION is defined are:
|
||||
// drivers/gpgpu/cuda/cuda.nvmk (cuda_a)
|
||||
// drivers/setup/linux/nvidia-cfg/makefile.nvmk (dev_a)
|
||||
// 5) Once the dev_a and cuda_a changes have made it back into chips_a,
|
||||
// remove the old API declaration, definition, and any old structs that were
|
||||
// in use.
|
||||
|
||||
#ifndef _UVM_H_
|
||||
#define _UVM_H_
|
||||
|
||||
#define UVM_API_LATEST_REVISION 8
|
||||
#define UVM_API_LATEST_REVISION 9
|
||||
|
||||
#if !defined(UVM_API_REVISION)
|
||||
#error "please define UVM_API_REVISION macro to a desired version number or UVM_API_LATEST_REVISION macro"
|
||||
@ -180,12 +184,8 @@ NV_STATUS UvmSetDriverVersion(NvU32 major, NvU32 changelist);
|
||||
// because it is not very informative.
|
||||
//
|
||||
//------------------------------------------------------------------------------
|
||||
#if UVM_API_REV_IS_AT_MOST(4)
|
||||
NV_STATUS UvmInitialize(UvmFileDescriptor fd);
|
||||
#else
|
||||
NV_STATUS UvmInitialize(UvmFileDescriptor fd,
|
||||
NvU64 flags);
|
||||
#endif
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// UvmDeinitialize
|
||||
@ -329,7 +329,11 @@ NV_STATUS UvmIsPageableMemoryAccessSupportedOnGpu(const NvProcessorUuid *gpuUuid
|
||||
//
|
||||
// Arguments:
|
||||
// gpuUuid: (INPUT)
|
||||
// UUID of the GPU to register.
|
||||
// UUID of the physical GPU to register.
|
||||
//
|
||||
// platformParams: (INPUT)
|
||||
// User handles identifying the GPU partition to register.
|
||||
// This should be NULL if the GPU is not SMC capable or SMC enabled.
|
||||
//
|
||||
// Error codes:
|
||||
// NV_ERR_NO_MEMORY:
|
||||
@ -364,27 +368,31 @@ NV_STATUS UvmIsPageableMemoryAccessSupportedOnGpu(const NvProcessorUuid *gpuUuid
|
||||
// OS state required to register the GPU is not present.
|
||||
//
|
||||
// NV_ERR_INVALID_STATE:
|
||||
// OS state required to register the GPU is malformed.
|
||||
// OS state required to register the GPU is malformed, or the partition
|
||||
// identified by the user handles or its configuration changed.
|
||||
//
|
||||
// NV_ERR_GENERIC:
|
||||
// Unexpected error. We try hard to avoid returning this error code,
|
||||
// because it is not very informative.
|
||||
//
|
||||
//------------------------------------------------------------------------------
|
||||
#if UVM_API_REV_IS_AT_MOST(8)
|
||||
NV_STATUS UvmRegisterGpu(const NvProcessorUuid *gpuUuid);
|
||||
#else
|
||||
NV_STATUS UvmRegisterGpu(const NvProcessorUuid *gpuUuid,
|
||||
const UvmGpuPlatformParams *platformParams);
|
||||
#endif
|
||||
|
||||
#if UVM_API_REV_IS_AT_MOST(8)
|
||||
//------------------------------------------------------------------------------
|
||||
// UvmRegisterGpuSmc
|
||||
//
|
||||
// The same as UvmRegisterGpu, but takes additional parameters to specify the
|
||||
// GPU partition being registered if SMC is enabled.
|
||||
//
|
||||
// TODO: Bug 2844714: Merge UvmRegisterGpuSmc() with UvmRegisterGpu() once
|
||||
// the initial SMC support is in place.
|
||||
//
|
||||
// Arguments:
|
||||
// gpuUuid: (INPUT)
|
||||
// UUID of the parent GPU of the SMC partition to register.
|
||||
// UUID of the physical GPU of the SMC partition to register.
|
||||
//
|
||||
// platformParams: (INPUT)
|
||||
// User handles identifying the partition to register.
|
||||
@ -397,6 +405,7 @@ NV_STATUS UvmRegisterGpu(const NvProcessorUuid *gpuUuid);
|
||||
//
|
||||
NV_STATUS UvmRegisterGpuSmc(const NvProcessorUuid *gpuUuid,
|
||||
const UvmGpuPlatformParams *platformParams);
|
||||
#endif
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// UvmUnregisterGpu
|
||||
@ -1416,8 +1425,7 @@ NV_STATUS UvmAllocSemaphorePool(void *base,
|
||||
//
|
||||
// preferredCpuMemoryNode: (INPUT)
|
||||
// Preferred CPU NUMA memory node used if the destination processor is
|
||||
// the CPU. This argument is ignored if the given virtual address range
|
||||
// corresponds to managed memory.
|
||||
// the CPU.
|
||||
//
|
||||
// Error codes:
|
||||
// NV_ERR_INVALID_ADDRESS:
|
||||
@ -1456,16 +1464,10 @@ NV_STATUS UvmAllocSemaphorePool(void *base,
|
||||
// pages were associated with a non-migratable range group.
|
||||
//
|
||||
//------------------------------------------------------------------------------
|
||||
#if UVM_API_REV_IS_AT_MOST(5)
|
||||
NV_STATUS UvmMigrate(void *base,
|
||||
NvLength length,
|
||||
const NvProcessorUuid *destinationUuid);
|
||||
#else
|
||||
NV_STATUS UvmMigrate(void *base,
|
||||
NvLength length,
|
||||
const NvProcessorUuid *destinationUuid,
|
||||
NvS32 preferredCpuMemoryNode);
|
||||
#endif
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// UvmMigrateAsync
|
||||
@ -1547,20 +1549,12 @@ NV_STATUS UvmMigrate(void *base,
|
||||
// pages were associated with a non-migratable range group.
|
||||
//
|
||||
//------------------------------------------------------------------------------
|
||||
#if UVM_API_REV_IS_AT_MOST(5)
|
||||
NV_STATUS UvmMigrateAsync(void *base,
|
||||
NvLength length,
|
||||
const NvProcessorUuid *destinationUuid,
|
||||
void *semaphoreAddress,
|
||||
NvU32 semaphorePayload);
|
||||
#else
|
||||
NV_STATUS UvmMigrateAsync(void *base,
|
||||
NvLength length,
|
||||
const NvProcessorUuid *destinationUuid,
|
||||
NvS32 preferredCpuMemoryNode,
|
||||
void *semaphoreAddress,
|
||||
NvU32 semaphorePayload);
|
||||
#endif
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// UvmMigrateRangeGroup
|
||||
@ -1568,9 +1562,7 @@ NV_STATUS UvmMigrateAsync(void *base,
|
||||
// Migrates the backing of all virtual address ranges associated with the given
|
||||
// range group to the specified destination processor. The behavior of this API
|
||||
// is equivalent to calling UvmMigrate on each VA range associated with this
|
||||
// range group. The value for the preferredCpuMemoryNode is irrelevant in this
|
||||
// case as it only applies to migrations of pageable address, which cannot be
|
||||
// used to create range groups.
|
||||
// range group.
|
||||
//
|
||||
// Any errors encountered during migration are returned immediately. No attempt
|
||||
// is made to migrate the remaining unmigrated ranges and the ranges that are
|
||||
@ -2303,13 +2295,10 @@ NV_STATUS UvmDisableReadDuplication(void *base,
|
||||
// preferredLocationUuid: (INPUT)
|
||||
// UUID of the preferred location.
|
||||
//
|
||||
// preferredCpuNumaNode: (INPUT)
|
||||
// preferredCpuMemoryNode: (INPUT)
|
||||
// Preferred CPU NUMA memory node used if preferredLocationUuid is the
|
||||
// UUID of the CPU. -1 is a special value which indicates all CPU nodes
|
||||
// allowed by the global and thread memory policies. This argument is
|
||||
// ignored if preferredLocationUuid refers to a GPU or the given virtual
|
||||
// address range corresponds to managed memory. If NUMA is not enabled,
|
||||
// only 0 or -1 is allowed.
|
||||
// allowed by the global and thread memory policies.
|
||||
//
|
||||
// Errors:
|
||||
// NV_ERR_INVALID_ADDRESS:
|
||||
@ -2339,10 +2328,11 @@ NV_STATUS UvmDisableReadDuplication(void *base,
|
||||
//
|
||||
// NV_ERR_INVALID_ARGUMENT:
|
||||
// One of the following occured:
|
||||
// - preferredLocationUuid is the UUID of a CPU and preferredCpuNumaNode
|
||||
// refers to a registered GPU.
|
||||
// - preferredCpuNumaNode is invalid and preferredLocationUuid is the
|
||||
// UUID of the CPU.
|
||||
// - preferredLocationUuid is the UUID of the CPU and
|
||||
// preferredCpuMemoryNode is either:
|
||||
// - not a valid NUMA node,
|
||||
// - not a possible NUMA node, or
|
||||
// - a NUMA node ID corresponding to a registered GPU.
|
||||
//
|
||||
// NV_ERR_NOT_SUPPORTED:
|
||||
// The UVM file descriptor is associated with another process and the
|
||||
@ -2353,16 +2343,10 @@ NV_STATUS UvmDisableReadDuplication(void *base,
|
||||
// because it is not very informative.
|
||||
//
|
||||
//------------------------------------------------------------------------------
|
||||
#if UVM_API_REV_IS_AT_MOST(7)
|
||||
NV_STATUS UvmSetPreferredLocation(void *base,
|
||||
NvLength length,
|
||||
const NvProcessorUuid *preferredLocationUuid);
|
||||
#else
|
||||
NV_STATUS UvmSetPreferredLocation(void *base,
|
||||
NvLength length,
|
||||
const NvProcessorUuid *preferredLocationUuid,
|
||||
NvS32 preferredCpuNumaNode);
|
||||
#endif
|
||||
NvS32 preferredCpuMemoryNode);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// UvmUnsetPreferredLocation
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2021 NVIDIA Corporation
|
||||
Copyright (c) 2021-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -79,6 +79,8 @@ void uvm_hal_ada_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
parent_gpu->access_counters_supported = true;
|
||||
|
||||
parent_gpu->access_counters_can_use_physical_addresses = false;
|
||||
|
||||
parent_gpu->fault_cancel_va_supported = true;
|
||||
|
||||
parent_gpu->scoped_atomics_supported = true;
|
||||
@ -94,4 +96,6 @@ void uvm_hal_ada_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
parent_gpu->map_remap_larger_page_promotion = false;
|
||||
|
||||
parent_gpu->plc_supported = true;
|
||||
|
||||
parent_gpu->no_ats_range_required = false;
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2018-20221 NVIDIA Corporation
|
||||
Copyright (c) 2018-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -38,10 +38,12 @@ void uvm_hal_ampere_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
parent_gpu->utlb_per_gpc_count = uvm_ampere_get_utlbs_per_gpc(parent_gpu);
|
||||
|
||||
parent_gpu->fault_buffer_info.replayable.utlb_count = parent_gpu->rm_info.maxGpcCount * parent_gpu->utlb_per_gpc_count;
|
||||
parent_gpu->fault_buffer_info.replayable.utlb_count = parent_gpu->rm_info.maxGpcCount *
|
||||
parent_gpu->utlb_per_gpc_count;
|
||||
{
|
||||
uvm_fault_buffer_entry_t *dummy;
|
||||
UVM_ASSERT(parent_gpu->fault_buffer_info.replayable.utlb_count <= (1 << (sizeof(dummy->fault_source.utlb_id) * 8)));
|
||||
UVM_ASSERT(parent_gpu->fault_buffer_info.replayable.utlb_count <= (1 <<
|
||||
(sizeof(dummy->fault_source.utlb_id) * 8)));
|
||||
}
|
||||
|
||||
// A single top level PDE on Ampere covers 128 TB and that's the minimum
|
||||
@ -53,7 +55,7 @@ void uvm_hal_ampere_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
parent_gpu->uvm_mem_va_size = UVM_MEM_VA_SIZE;
|
||||
|
||||
// See uvm_mmu.h for mapping placement
|
||||
parent_gpu->flat_vidmem_va_base = 136 * UVM_SIZE_1TB;
|
||||
parent_gpu->flat_vidmem_va_base = 160 * UVM_SIZE_1TB;
|
||||
parent_gpu->flat_sysmem_va_base = 256 * UVM_SIZE_1TB;
|
||||
|
||||
parent_gpu->ce_phys_vidmem_write_supported = true;
|
||||
@ -81,6 +83,8 @@ void uvm_hal_ampere_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
parent_gpu->access_counters_supported = true;
|
||||
|
||||
parent_gpu->access_counters_can_use_physical_addresses = false;
|
||||
|
||||
parent_gpu->fault_cancel_va_supported = true;
|
||||
|
||||
parent_gpu->scoped_atomics_supported = true;
|
||||
@ -101,4 +105,6 @@ void uvm_hal_ampere_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
parent_gpu->map_remap_larger_page_promotion = false;
|
||||
|
||||
parent_gpu->plc_supported = true;
|
||||
|
||||
parent_gpu->no_ats_range_required = false;
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2018-2022 NVIDIA Corporation
|
||||
Copyright (c) 2018-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -117,7 +117,7 @@ bool uvm_hal_ampere_ce_memcopy_is_valid_c6b5(uvm_push_t *push, uvm_gpu_address_t
|
||||
NvU64 push_begin_gpu_va;
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||||
|
||||
if (!uvm_gpu_is_virt_mode_sriov_heavy(gpu))
|
||||
if (!uvm_parent_gpu_is_virt_mode_sriov_heavy(gpu->parent))
|
||||
return true;
|
||||
|
||||
if (uvm_channel_is_proxy(push->channel)) {
|
||||
@ -196,7 +196,7 @@ bool uvm_hal_ampere_ce_memset_is_valid_c6b5(uvm_push_t *push,
|
||||
{
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||||
|
||||
if (!uvm_gpu_is_virt_mode_sriov_heavy(gpu))
|
||||
if (!uvm_parent_gpu_is_virt_mode_sriov_heavy(gpu->parent))
|
||||
return true;
|
||||
|
||||
if (uvm_channel_is_proxy(push->channel)) {
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2018-2022 NVIDIA Corporation
|
||||
Copyright (c) 2018-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -33,7 +33,7 @@ bool uvm_hal_ampere_host_method_is_valid(uvm_push_t *push, NvU32 method_address,
|
||||
{
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||||
|
||||
if (!uvm_gpu_is_virt_mode_sriov_heavy(gpu))
|
||||
if (!uvm_parent_gpu_is_virt_mode_sriov_heavy(gpu->parent))
|
||||
return true;
|
||||
|
||||
if (uvm_channel_is_privileged(push->channel)) {
|
||||
|
@ -34,19 +34,32 @@
|
||||
#include <linux/hmm.h>
|
||||
#endif
|
||||
|
||||
static NV_STATUS service_ats_faults(uvm_gpu_va_space_t *gpu_va_space,
|
||||
struct vm_area_struct *vma,
|
||||
NvU64 start,
|
||||
size_t length,
|
||||
uvm_fault_access_type_t access_type,
|
||||
uvm_ats_fault_context_t *ats_context)
|
||||
typedef enum
|
||||
{
|
||||
UVM_ATS_SERVICE_TYPE_FAULTS = 0,
|
||||
UVM_ATS_SERVICE_TYPE_ACCESS_COUNTERS,
|
||||
UVM_ATS_SERVICE_TYPE_COUNT
|
||||
} uvm_ats_service_type_t;
|
||||
|
||||
static NV_STATUS service_ats_requests(uvm_gpu_va_space_t *gpu_va_space,
|
||||
struct vm_area_struct *vma,
|
||||
NvU64 start,
|
||||
size_t length,
|
||||
uvm_fault_access_type_t access_type,
|
||||
uvm_ats_service_type_t service_type,
|
||||
uvm_ats_fault_context_t *ats_context)
|
||||
{
|
||||
uvm_va_space_t *va_space = gpu_va_space->va_space;
|
||||
struct mm_struct *mm = va_space->va_space_mm.mm;
|
||||
bool write = (access_type >= UVM_FAULT_ACCESS_TYPE_WRITE);
|
||||
NV_STATUS status;
|
||||
NvU64 user_space_start;
|
||||
NvU64 user_space_length;
|
||||
bool write = (access_type >= UVM_FAULT_ACCESS_TYPE_WRITE);
|
||||
bool fault_service_type = (service_type == UVM_ATS_SERVICE_TYPE_FAULTS);
|
||||
uvm_populate_permissions_t populate_permissions = fault_service_type ?
|
||||
(write ? UVM_POPULATE_PERMISSIONS_WRITE : UVM_POPULATE_PERMISSIONS_ANY) :
|
||||
UVM_POPULATE_PERMISSIONS_INHERIT;
|
||||
|
||||
|
||||
// Request uvm_migrate_pageable() to touch the corresponding page after
|
||||
// population.
|
||||
@ -83,10 +96,10 @@ static NV_STATUS service_ats_faults(uvm_gpu_va_space_t *gpu_va_space,
|
||||
.dst_node_id = ats_context->residency_node,
|
||||
.start = start,
|
||||
.length = length,
|
||||
.populate_permissions = write ? UVM_POPULATE_PERMISSIONS_WRITE : UVM_POPULATE_PERMISSIONS_ANY,
|
||||
.touch = true,
|
||||
.skip_mapped = true,
|
||||
.populate_on_cpu_alloc_failures = true,
|
||||
.populate_permissions = populate_permissions,
|
||||
.touch = fault_service_type,
|
||||
.skip_mapped = fault_service_type,
|
||||
.populate_on_cpu_alloc_failures = fault_service_type,
|
||||
.user_space_start = &user_space_start,
|
||||
.user_space_length = &user_space_length,
|
||||
};
|
||||
@ -107,26 +120,24 @@ static NV_STATUS service_ats_faults(uvm_gpu_va_space_t *gpu_va_space,
|
||||
return status;
|
||||
}
|
||||
|
||||
static void flush_tlb_write_faults(uvm_gpu_va_space_t *gpu_va_space,
|
||||
NvU64 addr,
|
||||
size_t size,
|
||||
uvm_fault_client_type_t client_type)
|
||||
static void flush_tlb_va_region(uvm_gpu_va_space_t *gpu_va_space,
|
||||
NvU64 addr,
|
||||
size_t size,
|
||||
uvm_fault_client_type_t client_type)
|
||||
{
|
||||
uvm_ats_fault_invalidate_t *ats_invalidate;
|
||||
|
||||
uvm_ats_smmu_invalidate_tlbs(gpu_va_space, addr, size);
|
||||
|
||||
if (client_type == UVM_FAULT_CLIENT_TYPE_GPC)
|
||||
ats_invalidate = &gpu_va_space->gpu->parent->fault_buffer_info.replayable.ats_invalidate;
|
||||
else
|
||||
ats_invalidate = &gpu_va_space->gpu->parent->fault_buffer_info.non_replayable.ats_invalidate;
|
||||
|
||||
if (!ats_invalidate->write_faults_in_batch) {
|
||||
uvm_tlb_batch_begin(&gpu_va_space->page_tables, &ats_invalidate->write_faults_tlb_batch);
|
||||
ats_invalidate->write_faults_in_batch = true;
|
||||
if (!ats_invalidate->tlb_batch_pending) {
|
||||
uvm_tlb_batch_begin(&gpu_va_space->page_tables, &ats_invalidate->tlb_batch);
|
||||
ats_invalidate->tlb_batch_pending = true;
|
||||
}
|
||||
|
||||
uvm_tlb_batch_invalidate(&ats_invalidate->write_faults_tlb_batch, addr, size, PAGE_SIZE, UVM_MEMBAR_NONE);
|
||||
uvm_tlb_batch_invalidate(&ats_invalidate->tlb_batch, addr, size, PAGE_SIZE, UVM_MEMBAR_NONE);
|
||||
}
|
||||
|
||||
static void ats_batch_select_residency(uvm_gpu_va_space_t *gpu_va_space,
|
||||
@ -192,7 +203,7 @@ done:
|
||||
ats_context->prefetch_state.has_preferred_location = false;
|
||||
#endif
|
||||
|
||||
ats_context->residency_id = gpu ? gpu->parent->id : UVM_ID_CPU;
|
||||
ats_context->residency_id = gpu ? gpu->id : UVM_ID_CPU;
|
||||
ats_context->residency_node = residency;
|
||||
}
|
||||
|
||||
@ -364,51 +375,43 @@ static NV_STATUS ats_compute_residency_mask(uvm_gpu_va_space_t *gpu_va_space,
|
||||
return status;
|
||||
}
|
||||
|
||||
static void ats_expand_fault_region(uvm_gpu_va_space_t *gpu_va_space,
|
||||
struct vm_area_struct *vma,
|
||||
uvm_ats_fault_context_t *ats_context,
|
||||
uvm_va_block_region_t max_prefetch_region,
|
||||
uvm_page_mask_t *faulted_mask)
|
||||
static void ats_compute_prefetch_mask(uvm_gpu_va_space_t *gpu_va_space,
|
||||
struct vm_area_struct *vma,
|
||||
uvm_ats_fault_context_t *ats_context,
|
||||
uvm_va_block_region_t max_prefetch_region)
|
||||
{
|
||||
uvm_page_mask_t *read_fault_mask = &ats_context->read_fault_mask;
|
||||
uvm_page_mask_t *write_fault_mask = &ats_context->write_fault_mask;
|
||||
uvm_page_mask_t *accessed_mask = &ats_context->accessed_mask;
|
||||
uvm_page_mask_t *residency_mask = &ats_context->prefetch_state.residency_mask;
|
||||
uvm_page_mask_t *prefetch_mask = &ats_context->prefetch_state.prefetch_pages_mask;
|
||||
uvm_perf_prefetch_bitmap_tree_t *bitmap_tree = &ats_context->prefetch_state.bitmap_tree;
|
||||
|
||||
if (uvm_page_mask_empty(faulted_mask))
|
||||
if (uvm_page_mask_empty(accessed_mask))
|
||||
return;
|
||||
|
||||
uvm_perf_prefetch_compute_ats(gpu_va_space->va_space,
|
||||
faulted_mask,
|
||||
uvm_va_block_region_from_mask(NULL, faulted_mask),
|
||||
accessed_mask,
|
||||
uvm_va_block_region_from_mask(NULL, accessed_mask),
|
||||
max_prefetch_region,
|
||||
residency_mask,
|
||||
bitmap_tree,
|
||||
prefetch_mask);
|
||||
|
||||
uvm_page_mask_or(read_fault_mask, read_fault_mask, prefetch_mask);
|
||||
|
||||
if (vma->vm_flags & VM_WRITE)
|
||||
uvm_page_mask_or(write_fault_mask, write_fault_mask, prefetch_mask);
|
||||
}
|
||||
|
||||
static NV_STATUS ats_fault_prefetch(uvm_gpu_va_space_t *gpu_va_space,
|
||||
struct vm_area_struct *vma,
|
||||
NvU64 base,
|
||||
uvm_ats_fault_context_t *ats_context)
|
||||
static NV_STATUS ats_compute_prefetch(uvm_gpu_va_space_t *gpu_va_space,
|
||||
struct vm_area_struct *vma,
|
||||
NvU64 base,
|
||||
uvm_ats_service_type_t service_type,
|
||||
uvm_ats_fault_context_t *ats_context)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_page_mask_t *read_fault_mask = &ats_context->read_fault_mask;
|
||||
uvm_page_mask_t *write_fault_mask = &ats_context->write_fault_mask;
|
||||
uvm_page_mask_t *faulted_mask = &ats_context->faulted_mask;
|
||||
uvm_page_mask_t *accessed_mask = &ats_context->accessed_mask;
|
||||
uvm_page_mask_t *prefetch_mask = &ats_context->prefetch_state.prefetch_pages_mask;
|
||||
uvm_va_block_region_t max_prefetch_region = uvm_ats_region_from_vma(vma, base);
|
||||
|
||||
if (!uvm_perf_prefetch_enabled(gpu_va_space->va_space))
|
||||
return status;
|
||||
|
||||
if (uvm_page_mask_empty(faulted_mask))
|
||||
if (uvm_page_mask_empty(accessed_mask))
|
||||
return status;
|
||||
|
||||
status = ats_compute_residency_mask(gpu_va_space, vma, base, ats_context);
|
||||
@ -418,19 +421,27 @@ static NV_STATUS ats_fault_prefetch(uvm_gpu_va_space_t *gpu_va_space,
|
||||
// Prefetch the entire region if none of the pages are resident on any node
|
||||
// and if preferred_location is the faulting GPU.
|
||||
if (ats_context->prefetch_state.has_preferred_location &&
|
||||
ats_context->prefetch_state.first_touch &&
|
||||
uvm_id_equal(ats_context->residency_id, gpu_va_space->gpu->parent->id)) {
|
||||
(ats_context->prefetch_state.first_touch || (service_type == UVM_ATS_SERVICE_TYPE_ACCESS_COUNTERS)) &&
|
||||
uvm_id_equal(ats_context->residency_id, gpu_va_space->gpu->id)) {
|
||||
|
||||
uvm_page_mask_init_from_region(prefetch_mask, max_prefetch_region, NULL);
|
||||
}
|
||||
else {
|
||||
ats_compute_prefetch_mask(gpu_va_space, vma, ats_context, max_prefetch_region);
|
||||
}
|
||||
|
||||
if (service_type == UVM_ATS_SERVICE_TYPE_FAULTS) {
|
||||
uvm_page_mask_t *read_fault_mask = &ats_context->read_fault_mask;
|
||||
uvm_page_mask_t *write_fault_mask = &ats_context->write_fault_mask;
|
||||
|
||||
uvm_page_mask_or(read_fault_mask, read_fault_mask, prefetch_mask);
|
||||
|
||||
if (vma->vm_flags & VM_WRITE)
|
||||
uvm_page_mask_or(write_fault_mask, write_fault_mask, prefetch_mask);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
ats_expand_fault_region(gpu_va_space, vma, ats_context, max_prefetch_region, faulted_mask);
|
||||
else {
|
||||
uvm_page_mask_or(accessed_mask, accessed_mask, prefetch_mask);
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
@ -448,6 +459,7 @@ NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
|
||||
uvm_page_mask_t *faults_serviced_mask = &ats_context->faults_serviced_mask;
|
||||
uvm_page_mask_t *reads_serviced_mask = &ats_context->reads_serviced_mask;
|
||||
uvm_fault_client_type_t client_type = ats_context->client_type;
|
||||
uvm_ats_service_type_t service_type = UVM_ATS_SERVICE_TYPE_FAULTS;
|
||||
|
||||
UVM_ASSERT(vma);
|
||||
UVM_ASSERT(IS_ALIGNED(base, UVM_VA_BLOCK_SIZE));
|
||||
@ -456,6 +468,9 @@ NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
|
||||
UVM_ASSERT(gpu_va_space->ats.enabled);
|
||||
UVM_ASSERT(uvm_gpu_va_space_state(gpu_va_space) == UVM_GPU_VA_SPACE_STATE_ACTIVE);
|
||||
|
||||
uvm_assert_mmap_lock_locked(vma->vm_mm);
|
||||
uvm_assert_rwsem_locked(&gpu_va_space->va_space->lock);
|
||||
|
||||
uvm_page_mask_zero(faults_serviced_mask);
|
||||
uvm_page_mask_zero(reads_serviced_mask);
|
||||
|
||||
@ -481,7 +496,7 @@ NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
|
||||
|
||||
ats_batch_select_residency(gpu_va_space, vma, ats_context);
|
||||
|
||||
ats_fault_prefetch(gpu_va_space, vma, base, ats_context);
|
||||
ats_compute_prefetch(gpu_va_space, vma, base, service_type, ats_context);
|
||||
|
||||
for_each_va_block_subregion_in_mask(subregion, write_fault_mask, region) {
|
||||
NvU64 start = base + (subregion.first * PAGE_SIZE);
|
||||
@ -493,12 +508,13 @@ NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
|
||||
UVM_ASSERT(start >= vma->vm_start);
|
||||
UVM_ASSERT((start + length) <= vma->vm_end);
|
||||
|
||||
status = service_ats_faults(gpu_va_space, vma, start, length, access_type, ats_context);
|
||||
status = service_ats_requests(gpu_va_space, vma, start, length, access_type, service_type, ats_context);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
if (vma->vm_flags & VM_WRITE) {
|
||||
uvm_page_mask_region_fill(faults_serviced_mask, subregion);
|
||||
uvm_ats_smmu_invalidate_tlbs(gpu_va_space, start, length);
|
||||
|
||||
// The Linux kernel never invalidates TLB entries on mapping
|
||||
// permission upgrade. This is a problem if the GPU has cached
|
||||
@ -509,7 +525,7 @@ NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
|
||||
// infinite loop because we just forward the fault to the Linux
|
||||
// kernel and it will see that the permissions in the page table are
|
||||
// correct. Therefore, we flush TLB entries on ATS write faults.
|
||||
flush_tlb_write_faults(gpu_va_space, start, length, client_type);
|
||||
flush_tlb_va_region(gpu_va_space, start, length, client_type);
|
||||
}
|
||||
else {
|
||||
uvm_page_mask_region_fill(reads_serviced_mask, subregion);
|
||||
@ -527,11 +543,20 @@ NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
|
||||
UVM_ASSERT(start >= vma->vm_start);
|
||||
UVM_ASSERT((start + length) <= vma->vm_end);
|
||||
|
||||
status = service_ats_faults(gpu_va_space, vma, start, length, access_type, ats_context);
|
||||
status = service_ats_requests(gpu_va_space, vma, start, length, access_type, service_type, ats_context);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
uvm_page_mask_region_fill(faults_serviced_mask, subregion);
|
||||
|
||||
// Similarly to permission upgrade scenario, discussed above, GPU
|
||||
// will not re-fetch the entry if the PTE is invalid and page size
|
||||
// is 4K. To avoid infinite faulting loop, invalidate TLB for every
|
||||
// new translation written explicitly like in the case of permission
|
||||
// upgrade.
|
||||
if (PAGE_SIZE == UVM_PAGE_SIZE_4K)
|
||||
flush_tlb_va_region(gpu_va_space, start, length, client_type);
|
||||
|
||||
}
|
||||
|
||||
return status;
|
||||
@ -566,7 +591,7 @@ NV_STATUS uvm_ats_invalidate_tlbs(uvm_gpu_va_space_t *gpu_va_space,
|
||||
NV_STATUS status;
|
||||
uvm_push_t push;
|
||||
|
||||
if (!ats_invalidate->write_faults_in_batch)
|
||||
if (!ats_invalidate->tlb_batch_pending)
|
||||
return NV_OK;
|
||||
|
||||
UVM_ASSERT(gpu_va_space);
|
||||
@ -578,7 +603,7 @@ NV_STATUS uvm_ats_invalidate_tlbs(uvm_gpu_va_space_t *gpu_va_space,
|
||||
"Invalidate ATS entries");
|
||||
|
||||
if (status == NV_OK) {
|
||||
uvm_tlb_batch_end(&ats_invalidate->write_faults_tlb_batch, &push, UVM_MEMBAR_NONE);
|
||||
uvm_tlb_batch_end(&ats_invalidate->tlb_batch, &push, UVM_MEMBAR_NONE);
|
||||
uvm_push_end(&push);
|
||||
|
||||
// Add this push to the GPU's tracker so that fault replays/clears can
|
||||
@ -586,7 +611,47 @@ NV_STATUS uvm_ats_invalidate_tlbs(uvm_gpu_va_space_t *gpu_va_space,
|
||||
status = uvm_tracker_add_push_safe(out_tracker, &push);
|
||||
}
|
||||
|
||||
ats_invalidate->write_faults_in_batch = false;
|
||||
ats_invalidate->tlb_batch_pending = false;
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_ats_service_access_counters(uvm_gpu_va_space_t *gpu_va_space,
|
||||
struct vm_area_struct *vma,
|
||||
NvU64 base,
|
||||
uvm_ats_fault_context_t *ats_context)
|
||||
{
|
||||
uvm_va_block_region_t subregion;
|
||||
uvm_va_block_region_t region = uvm_va_block_region(0, PAGES_PER_UVM_VA_BLOCK);
|
||||
uvm_ats_service_type_t service_type = UVM_ATS_SERVICE_TYPE_ACCESS_COUNTERS;
|
||||
|
||||
UVM_ASSERT(vma);
|
||||
UVM_ASSERT(IS_ALIGNED(base, UVM_VA_BLOCK_SIZE));
|
||||
UVM_ASSERT(g_uvm_global.ats.enabled);
|
||||
UVM_ASSERT(gpu_va_space);
|
||||
UVM_ASSERT(gpu_va_space->ats.enabled);
|
||||
UVM_ASSERT(uvm_gpu_va_space_state(gpu_va_space) == UVM_GPU_VA_SPACE_STATE_ACTIVE);
|
||||
|
||||
uvm_assert_mmap_lock_locked(vma->vm_mm);
|
||||
uvm_assert_rwsem_locked(&gpu_va_space->va_space->lock);
|
||||
|
||||
ats_batch_select_residency(gpu_va_space, vma, ats_context);
|
||||
|
||||
ats_compute_prefetch(gpu_va_space, vma, base, service_type, ats_context);
|
||||
|
||||
for_each_va_block_subregion_in_mask(subregion, &ats_context->accessed_mask, region) {
|
||||
NV_STATUS status;
|
||||
NvU64 start = base + (subregion.first * PAGE_SIZE);
|
||||
size_t length = uvm_va_block_region_num_pages(subregion) * PAGE_SIZE;
|
||||
uvm_fault_access_type_t access_type = UVM_FAULT_ACCESS_TYPE_COUNT;
|
||||
|
||||
UVM_ASSERT(start >= vma->vm_start);
|
||||
UVM_ASSERT((start + length) <= vma->vm_end);
|
||||
|
||||
status = service_ats_requests(gpu_va_space, vma, start, length, access_type, service_type, ats_context);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
@ -42,17 +42,37 @@
|
||||
// corresponding bit in read_fault_mask. These returned masks are only valid if
|
||||
// the return status is NV_OK. Status other than NV_OK indicate system global
|
||||
// fault servicing failures.
|
||||
//
|
||||
// LOCKING: The caller must retain and hold the mmap_lock and hold the va_space
|
||||
// lock.
|
||||
NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
|
||||
struct vm_area_struct *vma,
|
||||
NvU64 base,
|
||||
uvm_ats_fault_context_t *ats_context);
|
||||
|
||||
// Service access counter notifications on ATS regions in the range (base, base
|
||||
// + UVM_VA_BLOCK_SIZE) for individual pages in the range requested by page_mask
|
||||
// set in ats_context->accessed_mask. base must be aligned to UVM_VA_BLOCK_SIZE.
|
||||
// The caller is responsible for ensuring that the addresses in the
|
||||
// accessed_mask is completely covered by the VMA. The caller is also
|
||||
// responsible for handling any errors returned by this function.
|
||||
//
|
||||
// Returns NV_OK if servicing was successful. Any other error indicates an error
|
||||
// while servicing the range.
|
||||
//
|
||||
// LOCKING: The caller must retain and hold the mmap_lock and hold the va_space
|
||||
// lock.
|
||||
NV_STATUS uvm_ats_service_access_counters(uvm_gpu_va_space_t *gpu_va_space,
|
||||
struct vm_area_struct *vma,
|
||||
NvU64 base,
|
||||
uvm_ats_fault_context_t *ats_context);
|
||||
|
||||
// Return whether there are any VA ranges (and thus GMMU mappings) within the
|
||||
// UVM_GMMU_ATS_GRANULARITY-aligned region containing address.
|
||||
bool uvm_ats_check_in_gmmu_region(uvm_va_space_t *va_space, NvU64 address, uvm_va_range_t *next);
|
||||
|
||||
// This function performs pending TLB invalidations for ATS and clears the
|
||||
// ats_invalidate->write_faults_in_batch flag
|
||||
// ats_invalidate->tlb_batch_pending flag
|
||||
NV_STATUS uvm_ats_invalidate_tlbs(uvm_gpu_va_space_t *gpu_va_space,
|
||||
uvm_ats_fault_invalidate_t *ats_invalidate,
|
||||
uvm_tracker_t *out_tracker);
|
||||
|
@ -30,6 +30,7 @@
|
||||
#include "uvm_va_space_mm.h"
|
||||
|
||||
#include <asm/io.h>
|
||||
#include <linux/log2.h>
|
||||
#include <linux/iommu.h>
|
||||
#include <linux/mm_types.h>
|
||||
#include <linux/acpi.h>
|
||||
@ -50,6 +51,12 @@
|
||||
#define UVM_IOMMU_SVA_BIND_DEVICE(dev, mm) iommu_sva_bind_device(dev, mm)
|
||||
#endif
|
||||
|
||||
// Type to represent a 128-bit SMMU command queue command.
|
||||
struct smmu_cmd {
|
||||
NvU64 low;
|
||||
NvU64 high;
|
||||
};
|
||||
|
||||
// Base address of SMMU CMDQ-V for GSMMU0.
|
||||
#define SMMU_CMDQV_BASE_ADDR(smmu_base) (smmu_base + 0x200000)
|
||||
#define SMMU_CMDQV_BASE_LEN 0x00830000
|
||||
@ -101,9 +108,9 @@
|
||||
// Base address offset for the VCMDQ registers.
|
||||
#define SMMU_VCMDQ_CMDQ_BASE 0x10000
|
||||
|
||||
// Size of the command queue. Each command is 8 bytes and we can't
|
||||
// have a command queue greater than one page.
|
||||
#define SMMU_VCMDQ_CMDQ_BASE_LOG2SIZE 9
|
||||
// Size of the command queue. Each command is 16 bytes and we can't
|
||||
// have a command queue greater than one page in size.
|
||||
#define SMMU_VCMDQ_CMDQ_BASE_LOG2SIZE (PAGE_SHIFT - ilog2(sizeof(struct smmu_cmd)))
|
||||
#define SMMU_VCMDQ_CMDQ_ENTRIES (1UL << SMMU_VCMDQ_CMDQ_BASE_LOG2SIZE)
|
||||
|
||||
// We always use VINTF63 for the WAR
|
||||
@ -175,7 +182,6 @@ static NV_STATUS uvm_ats_smmu_war_init(uvm_parent_gpu_t *parent_gpu)
|
||||
iowrite32((VINTF << SMMU_CMDQV_CMDQ_ALLOC_MAP_VIRT_INTF_INDX_SHIFT) | SMMU_CMDQV_CMDQ_ALLOC_MAP_ALLOC,
|
||||
smmu_cmdqv_base + SMMU_CMDQV_CMDQ_ALLOC_MAP(VCMDQ));
|
||||
|
||||
BUILD_BUG_ON((SMMU_VCMDQ_CMDQ_BASE_LOG2SIZE + 3) > PAGE_SHIFT);
|
||||
smmu_vcmdq_write64(smmu_cmdqv_base, SMMU_VCMDQ_CMDQ_BASE,
|
||||
page_to_phys(parent_gpu->smmu_war.smmu_cmdq) | SMMU_VCMDQ_CMDQ_BASE_LOG2SIZE);
|
||||
smmu_vcmdq_write32(smmu_cmdqv_base, SMMU_VCMDQ_CONS, 0);
|
||||
|
@ -53,10 +53,11 @@
|
||||
#define UVM_ATS_SVA_SUPPORTED() 0
|
||||
#endif
|
||||
|
||||
// If NV_ARCH_INVALIDATE_SECONDARY_TLBS is defined it means the upstream fix is
|
||||
// in place so no need for the WAR from Bug 4130089: [GH180][r535] WAR for
|
||||
// kernel not issuing SMMU TLB invalidates on read-only
|
||||
#if defined(NV_ARCH_INVALIDATE_SECONDARY_TLBS)
|
||||
// If NV_MMU_NOTIFIER_OPS_HAS_ARCH_INVALIDATE_SECONDARY_TLBS is defined it
|
||||
// means the upstream fix is in place so no need for the WAR from
|
||||
// Bug 4130089: [GH180][r535] WAR for kernel not issuing SMMU TLB
|
||||
// invalidates on read-only
|
||||
#if defined(NV_MMU_NOTIFIER_OPS_HAS_ARCH_INVALIDATE_SECONDARY_TLBS)
|
||||
#define UVM_ATS_SMMU_WAR_REQUIRED() 0
|
||||
#elif NVCPU_IS_AARCH64
|
||||
#define UVM_ATS_SMMU_WAR_REQUIRED() 1
|
||||
|
@ -56,7 +56,7 @@ static NV_STATUS test_non_pipelined(uvm_gpu_t *gpu)
|
||||
|
||||
// TODO: Bug 3839176: the test is waived on Confidential Computing because
|
||||
// it assumes that GPU can access system memory without using encryption.
|
||||
if (uvm_conf_computing_mode_enabled(gpu))
|
||||
if (g_uvm_global.conf_computing_enabled)
|
||||
return NV_OK;
|
||||
|
||||
status = uvm_rm_mem_alloc_and_map_cpu(gpu, UVM_RM_MEM_TYPE_SYS, CE_TEST_MEM_SIZE, 0, &host_mem);
|
||||
@ -176,7 +176,7 @@ static NV_STATUS test_membar(uvm_gpu_t *gpu)
|
||||
|
||||
// TODO: Bug 3839176: the test is waived on Confidential Computing because
|
||||
// it assumes that GPU can access system memory without using encryption.
|
||||
if (uvm_conf_computing_mode_enabled(gpu))
|
||||
if (g_uvm_global.conf_computing_enabled)
|
||||
return NV_OK;
|
||||
|
||||
status = uvm_rm_mem_alloc_and_map_cpu(gpu, UVM_RM_MEM_TYPE_SYS, sizeof(NvU32), 0, &host_mem);
|
||||
@ -411,10 +411,11 @@ static NV_STATUS test_memcpy_and_memset(uvm_gpu_t *gpu)
|
||||
size_t i, j, k, s;
|
||||
uvm_mem_alloc_params_t mem_params = {0};
|
||||
|
||||
if (uvm_conf_computing_mode_enabled(gpu))
|
||||
if (g_uvm_global.conf_computing_enabled)
|
||||
TEST_NV_CHECK_GOTO(uvm_mem_alloc_sysmem_dma_and_map_cpu_kernel(size, gpu, current->mm, &verif_mem), done);
|
||||
else
|
||||
TEST_NV_CHECK_GOTO(uvm_mem_alloc_sysmem_and_map_cpu_kernel(size, current->mm, &verif_mem), done);
|
||||
|
||||
TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_kernel(verif_mem, gpu), done);
|
||||
|
||||
gpu_verif_addr = uvm_mem_gpu_address_virtual_kernel(verif_mem, gpu);
|
||||
@ -436,7 +437,7 @@ static NV_STATUS test_memcpy_and_memset(uvm_gpu_t *gpu)
|
||||
TEST_NV_CHECK_GOTO(uvm_rm_mem_alloc(gpu, UVM_RM_MEM_TYPE_SYS, size, 0, &sys_rm_mem), done);
|
||||
gpu_addresses[0] = uvm_rm_mem_get_gpu_va(sys_rm_mem, gpu, is_proxy_va_space);
|
||||
|
||||
if (uvm_conf_computing_mode_enabled(gpu)) {
|
||||
if (g_uvm_global.conf_computing_enabled) {
|
||||
for (i = 0; i < iterations; ++i) {
|
||||
for (s = 0; s < ARRAY_SIZE(element_sizes); s++) {
|
||||
TEST_NV_CHECK_GOTO(test_memcpy_and_memset_inner(gpu,
|
||||
@ -559,7 +560,7 @@ static NV_STATUS test_semaphore_reduction_inc(uvm_gpu_t *gpu)
|
||||
|
||||
// TODO: Bug 3839176: the test is waived on Confidential Computing because
|
||||
// it assumes that GPU can access system memory without using encryption.
|
||||
if (uvm_conf_computing_mode_enabled(gpu))
|
||||
if (g_uvm_global.conf_computing_enabled)
|
||||
return NV_OK;
|
||||
|
||||
status = test_semaphore_alloc_sem(gpu, size, &mem);
|
||||
@ -611,7 +612,7 @@ static NV_STATUS test_semaphore_release(uvm_gpu_t *gpu)
|
||||
|
||||
// TODO: Bug 3839176: the test is waived on Confidential Computing because
|
||||
// it assumes that GPU can access system memory without using encryption.
|
||||
if (uvm_conf_computing_mode_enabled(gpu))
|
||||
if (g_uvm_global.conf_computing_enabled)
|
||||
return NV_OK;
|
||||
|
||||
status = test_semaphore_alloc_sem(gpu, size, &mem);
|
||||
@ -665,7 +666,7 @@ static NV_STATUS test_semaphore_timestamp(uvm_gpu_t *gpu)
|
||||
|
||||
// TODO: Bug 3839176: the test is waived on Confidential Computing because
|
||||
// it assumes that GPU can access system memory without using encryption.
|
||||
if (uvm_conf_computing_mode_enabled(gpu))
|
||||
if (g_uvm_global.conf_computing_enabled)
|
||||
return NV_OK;
|
||||
|
||||
status = test_semaphore_alloc_sem(gpu, size, &mem);
|
||||
@ -1153,7 +1154,7 @@ static NV_STATUS test_encryption_decryption(uvm_gpu_t *gpu,
|
||||
} small_sizes[] = {{1, 1}, {3, 1}, {8, 1}, {2, 2}, {8, 4}, {UVM_PAGE_SIZE_4K - 8, 8}, {UVM_PAGE_SIZE_4K + 8, 8}};
|
||||
|
||||
// Only Confidential Computing uses CE encryption/decryption
|
||||
if (!uvm_conf_computing_mode_enabled(gpu))
|
||||
if (!g_uvm_global.conf_computing_enabled)
|
||||
return NV_OK;
|
||||
|
||||
// Use a size, and copy size, that are not a multiple of common page sizes.
|
||||
|
@ -83,7 +83,7 @@ bool uvm_channel_pool_uses_mutex(uvm_channel_pool_t *pool)
|
||||
// submission uses UVM_SPIN_LOOP, which can call 'schedule', to wait for
|
||||
// LCIC completion. Indirect submission is synchronous, calling
|
||||
// uvm_push_wait which again uses UVM_SPIN_LOOP.
|
||||
if (uvm_conf_computing_mode_enabled(pool->manager->gpu))
|
||||
if (g_uvm_global.conf_computing_enabled)
|
||||
return true;
|
||||
|
||||
// Unless the mutex is required, the spinlock is preferred when work
|
||||
@ -95,7 +95,7 @@ static void channel_pool_lock_init(uvm_channel_pool_t *pool)
|
||||
{
|
||||
uvm_lock_order_t order = UVM_LOCK_ORDER_CHANNEL;
|
||||
|
||||
if (uvm_conf_computing_mode_enabled(pool->manager->gpu) && uvm_channel_pool_is_wlc(pool))
|
||||
if (g_uvm_global.conf_computing_enabled && uvm_channel_pool_is_wlc(pool))
|
||||
order = UVM_LOCK_ORDER_WLC_CHANNEL;
|
||||
|
||||
if (uvm_channel_pool_uses_mutex(pool))
|
||||
@ -137,7 +137,7 @@ static NvU32 uvm_channel_update_progress_with_max(uvm_channel_t *channel,
|
||||
// Completed value should never exceed the queued value
|
||||
UVM_ASSERT_MSG_RELEASE(completed_value <= channel->tracking_sem.queued_value,
|
||||
"GPU %s channel %s unexpected completed_value 0x%llx > queued_value 0x%llx\n",
|
||||
channel->pool->manager->gpu->parent->name,
|
||||
uvm_gpu_name(uvm_channel_get_gpu(channel)),
|
||||
channel->name,
|
||||
completed_value,
|
||||
channel->tracking_sem.queued_value);
|
||||
@ -273,9 +273,8 @@ static bool try_claim_channel(uvm_channel_t *channel, NvU32 num_gpfifo_entries)
|
||||
static void unlock_channel_for_push(uvm_channel_t *channel)
|
||||
{
|
||||
NvU32 index;
|
||||
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
|
||||
|
||||
if (!uvm_conf_computing_mode_enabled(gpu))
|
||||
if (!g_uvm_global.conf_computing_enabled)
|
||||
return;
|
||||
|
||||
index = uvm_channel_index_in_pool(channel);
|
||||
@ -287,25 +286,22 @@ static void unlock_channel_for_push(uvm_channel_t *channel)
|
||||
uvm_up_out_of_order(&channel->pool->push_sem);
|
||||
}
|
||||
|
||||
static bool is_channel_locked_for_push(uvm_channel_t *channel)
|
||||
bool uvm_channel_is_locked_for_push(uvm_channel_t *channel)
|
||||
{
|
||||
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
|
||||
|
||||
if (uvm_conf_computing_mode_enabled(gpu))
|
||||
if (g_uvm_global.conf_computing_enabled)
|
||||
return test_bit(uvm_channel_index_in_pool(channel), channel->pool->push_locks);
|
||||
|
||||
// For CE and proxy channels, we always return that the channel is locked,
|
||||
// which has no functional impact in the UVM channel code-flow, this is only
|
||||
// used on UVM_ASSERTs.
|
||||
// used in UVM_ASSERTs.
|
||||
return true;
|
||||
}
|
||||
|
||||
static void lock_channel_for_push(uvm_channel_t *channel)
|
||||
{
|
||||
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
|
||||
NvU32 index = uvm_channel_index_in_pool(channel);
|
||||
|
||||
UVM_ASSERT(uvm_conf_computing_mode_enabled(gpu));
|
||||
UVM_ASSERT(g_uvm_global.conf_computing_enabled);
|
||||
uvm_channel_pool_assert_locked(channel->pool);
|
||||
UVM_ASSERT(!test_bit(index, channel->pool->push_locks));
|
||||
|
||||
@ -314,10 +310,9 @@ static void lock_channel_for_push(uvm_channel_t *channel)
|
||||
|
||||
static bool test_claim_and_lock_channel(uvm_channel_t *channel, NvU32 num_gpfifo_entries)
|
||||
{
|
||||
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
|
||||
NvU32 index = uvm_channel_index_in_pool(channel);
|
||||
|
||||
UVM_ASSERT(uvm_conf_computing_mode_enabled(gpu));
|
||||
UVM_ASSERT(g_uvm_global.conf_computing_enabled);
|
||||
uvm_channel_pool_assert_locked(channel->pool);
|
||||
|
||||
if (!test_bit(index, channel->pool->push_locks) && try_claim_channel_locked(channel, num_gpfifo_entries)) {
|
||||
@ -337,7 +332,7 @@ static NV_STATUS channel_reserve_and_lock_in_pool(uvm_channel_pool_t *pool, uvm_
|
||||
NvU32 index;
|
||||
|
||||
UVM_ASSERT(pool);
|
||||
UVM_ASSERT(uvm_conf_computing_mode_enabled(pool->manager->gpu));
|
||||
UVM_ASSERT(g_uvm_global.conf_computing_enabled);
|
||||
|
||||
// This semaphore is uvm_up() in unlock_channel_for_push() as part of the
|
||||
// uvm_channel_end_push() routine.
|
||||
@ -399,7 +394,7 @@ static NV_STATUS channel_reserve_in_pool(uvm_channel_pool_t *pool, uvm_channel_t
|
||||
|
||||
UVM_ASSERT(pool);
|
||||
|
||||
if (uvm_conf_computing_mode_enabled(pool->manager->gpu))
|
||||
if (g_uvm_global.conf_computing_enabled)
|
||||
return channel_reserve_and_lock_in_pool(pool, channel_out);
|
||||
|
||||
uvm_for_each_channel_in_pool(channel, pool) {
|
||||
@ -509,7 +504,7 @@ static void channel_semaphore_gpu_encrypt_payload(uvm_push_t *push, NvU64 semaph
|
||||
NvU32 payload_size = sizeof(*semaphore->payload);
|
||||
NvU32 *last_pushed_notifier = &semaphore->conf_computing.last_pushed_notifier;
|
||||
|
||||
UVM_ASSERT(uvm_conf_computing_mode_enabled(gpu));
|
||||
UVM_ASSERT(g_uvm_global.conf_computing_enabled);
|
||||
UVM_ASSERT(uvm_channel_is_ce(channel));
|
||||
|
||||
encrypted_payload_gpu_va = uvm_rm_mem_get_gpu_va(semaphore->conf_computing.encrypted_payload, gpu, false);
|
||||
@ -540,29 +535,97 @@ static void push_reserve_csl_sign_buf(uvm_push_t *push)
|
||||
UVM_ASSERT((buf - UVM_METHOD_SIZE / sizeof(*buf)) == push->begin);
|
||||
}
|
||||
|
||||
static uvm_channel_t *get_paired_channel(uvm_channel_t *channel)
|
||||
{
|
||||
unsigned index;
|
||||
uvm_channel_pool_t *paired_pool;
|
||||
uvm_channel_type_t paired_channel_type;
|
||||
|
||||
UVM_ASSERT(channel);
|
||||
UVM_ASSERT(uvm_channel_is_wlc(channel) || uvm_channel_is_lcic(channel));
|
||||
|
||||
index = uvm_channel_index_in_pool(channel);
|
||||
paired_channel_type = uvm_channel_is_wlc(channel) ? UVM_CHANNEL_TYPE_LCIC : UVM_CHANNEL_TYPE_WLC;
|
||||
paired_pool = channel->pool->manager->pool_to_use.default_for_type[paired_channel_type];
|
||||
return paired_pool->channels + index;
|
||||
}
|
||||
|
||||
uvm_channel_t *uvm_channel_lcic_get_paired_wlc(uvm_channel_t *lcic_channel)
|
||||
{
|
||||
UVM_ASSERT(lcic_channel);
|
||||
UVM_ASSERT(uvm_channel_is_lcic(lcic_channel));
|
||||
|
||||
return get_paired_channel(lcic_channel);
|
||||
}
|
||||
|
||||
uvm_channel_t *uvm_channel_wlc_get_paired_lcic(uvm_channel_t *wlc_channel)
|
||||
{
|
||||
UVM_ASSERT(wlc_channel);
|
||||
UVM_ASSERT(uvm_channel_is_wlc(wlc_channel));
|
||||
|
||||
return get_paired_channel(wlc_channel);
|
||||
}
|
||||
|
||||
static NV_STATUS channel_rotate_and_reserve_launch_channel(uvm_channel_t *channel, uvm_channel_t **launch_channel)
|
||||
{
|
||||
uvm_channel_manager_t *manager = channel->pool->manager;
|
||||
NV_STATUS status;
|
||||
|
||||
status = uvm_conf_computing_maybe_rotate_channel_ivs(channel);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
// CE channels, other than WLC fix launch schedule setup, need a launch
|
||||
// channel that needs to be reserved
|
||||
if (uvm_channel_is_ce(channel) &&
|
||||
!(uvm_channel_is_wlc(channel) && uvm_channel_manager_is_wlc_ready(manager))) {
|
||||
uvm_channel_t *local_launch_channel = NULL;
|
||||
uvm_channel_type_t indirect_channel_type = uvm_channel_manager_is_wlc_ready(manager) ?
|
||||
UVM_CHANNEL_TYPE_WLC :
|
||||
UVM_CHANNEL_TYPE_SEC2;
|
||||
status = uvm_channel_reserve_type(manager, indirect_channel_type, &local_launch_channel);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
// Indirect launch relies on pre-allocated resources to avoid failure
|
||||
// paths. This includes pre-allocating IV space. There's no way to
|
||||
// undo the launch channel reservation, so just return an error.
|
||||
status = uvm_conf_computing_maybe_rotate_channel_ivs(local_launch_channel);
|
||||
if (status != NV_OK) {
|
||||
uvm_channel_release(local_launch_channel, 1);
|
||||
return status;
|
||||
}
|
||||
|
||||
if (uvm_channel_is_wlc(local_launch_channel)) {
|
||||
status = uvm_conf_computing_maybe_rotate_channel_ivs(uvm_channel_wlc_get_paired_lcic(local_launch_channel));
|
||||
if (status != NV_OK) {
|
||||
uvm_channel_release(local_launch_channel, 1);
|
||||
return status;
|
||||
}
|
||||
}
|
||||
*launch_channel = local_launch_channel;
|
||||
}
|
||||
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_channel_begin_push(uvm_channel_t *channel, uvm_push_t *push)
|
||||
{
|
||||
NV_STATUS status;
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_channel_manager_t *manager;
|
||||
uvm_gpu_t *gpu;
|
||||
|
||||
UVM_ASSERT(channel);
|
||||
UVM_ASSERT(push);
|
||||
|
||||
manager = channel->pool->manager;
|
||||
|
||||
gpu = uvm_channel_get_gpu(channel);
|
||||
|
||||
// Only SEC2 and WLC with set up fixed schedule can use direct push
|
||||
// submission. All other cases (including WLC pre-schedule) need to
|
||||
// reserve a launch channel that will be used to submit this push
|
||||
// indirectly.
|
||||
if (uvm_conf_computing_mode_enabled(gpu) && uvm_channel_is_ce(channel) &&
|
||||
!(uvm_channel_is_wlc(channel) && uvm_channel_manager_is_wlc_ready(manager))) {
|
||||
uvm_channel_type_t indirect_channel_type = uvm_channel_manager_is_wlc_ready(manager) ?
|
||||
UVM_CHANNEL_TYPE_WLC :
|
||||
UVM_CHANNEL_TYPE_SEC2;
|
||||
status = uvm_channel_reserve_type(manager, indirect_channel_type, &push->launch_channel);
|
||||
if (g_uvm_global.conf_computing_enabled) {
|
||||
status = channel_rotate_and_reserve_launch_channel(channel, &push->launch_channel);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
}
|
||||
@ -570,7 +633,7 @@ NV_STATUS uvm_channel_begin_push(uvm_channel_t *channel, uvm_push_t *push)
|
||||
// When the Confidential Computing feature is enabled, the channel's lock
|
||||
// should have already been acquired in uvm_channel_reserve() or
|
||||
// channel_reserve_and_lock_in_pool().
|
||||
UVM_ASSERT(is_channel_locked_for_push(channel));
|
||||
UVM_ASSERT(uvm_channel_is_locked_for_push(channel));
|
||||
|
||||
push->channel = channel;
|
||||
push->channel_tracking_value = 0;
|
||||
@ -603,7 +666,7 @@ static void internal_channel_submit_work(uvm_push_t *push, NvU32 push_size, NvU3
|
||||
gpfifo_entry = (NvU64*)channel->channel_info.gpFifoEntries + channel->cpu_put;
|
||||
pushbuffer_va = uvm_pushbuffer_get_gpu_va_for_push(pushbuffer, push);
|
||||
|
||||
if (uvm_conf_computing_mode_enabled(gpu)) {
|
||||
if (g_uvm_global.conf_computing_enabled) {
|
||||
void *unprotected_pb = uvm_pushbuffer_get_unprotected_cpu_va_for_push(pushbuffer, push);
|
||||
UVM_ASSERT(uvm_channel_is_sec2(channel));
|
||||
|
||||
@ -674,45 +737,14 @@ static void uvm_channel_tracking_semaphore_release(uvm_push_t *push, NvU64 semap
|
||||
// needs to be scheduled to get an encrypted shadow copy in unprotected
|
||||
// sysmem. This allows UVM to later decrypt it and observe the new
|
||||
// semaphore value.
|
||||
if (uvm_conf_computing_mode_enabled(push->gpu) && uvm_channel_is_ce(push->channel))
|
||||
if (g_uvm_global.conf_computing_enabled && uvm_channel_is_ce(push->channel))
|
||||
channel_semaphore_gpu_encrypt_payload(push, semaphore_va);
|
||||
}
|
||||
|
||||
static uvm_channel_t *get_paired_channel(uvm_channel_t *channel)
|
||||
{
|
||||
unsigned index;
|
||||
uvm_channel_pool_t *paired_pool;
|
||||
uvm_channel_type_t paired_channel_type;
|
||||
|
||||
UVM_ASSERT(channel);
|
||||
UVM_ASSERT(uvm_channel_is_wlc(channel) || uvm_channel_is_lcic(channel));
|
||||
|
||||
index = uvm_channel_index_in_pool(channel);
|
||||
paired_channel_type = uvm_channel_is_wlc(channel) ? UVM_CHANNEL_TYPE_LCIC : UVM_CHANNEL_TYPE_WLC;
|
||||
paired_pool = channel->pool->manager->pool_to_use.default_for_type[paired_channel_type];
|
||||
return paired_pool->channels + index;
|
||||
}
|
||||
|
||||
static uvm_channel_t *wlc_get_paired_lcic(uvm_channel_t *wlc_channel)
|
||||
{
|
||||
UVM_ASSERT(wlc_channel);
|
||||
UVM_ASSERT(uvm_channel_is_wlc(wlc_channel));
|
||||
|
||||
return get_paired_channel(wlc_channel);
|
||||
}
|
||||
|
||||
static uvm_channel_t *lcic_get_paired_wlc(uvm_channel_t *lcic_channel)
|
||||
{
|
||||
UVM_ASSERT(lcic_channel);
|
||||
UVM_ASSERT(uvm_channel_is_lcic(lcic_channel));
|
||||
|
||||
return get_paired_channel(lcic_channel);
|
||||
}
|
||||
|
||||
static void internal_channel_submit_work_wlc(uvm_push_t *push)
|
||||
{
|
||||
uvm_channel_t *wlc_channel = push->channel;
|
||||
uvm_channel_t *lcic_channel = wlc_get_paired_lcic(wlc_channel);
|
||||
uvm_channel_t *lcic_channel = uvm_channel_wlc_get_paired_lcic(wlc_channel);
|
||||
UvmCslIv *iv_cpu_addr = lcic_channel->tracking_sem.semaphore.conf_computing.ivs;
|
||||
NvU32 *last_pushed_notifier;
|
||||
NvU32 iv_index;
|
||||
@ -926,7 +958,7 @@ static void set_gpfifo_via_sec2(uvm_push_t *sec2_push, uvm_channel_t *channel, N
|
||||
uvm_pushbuffer_t *pushbuffer = uvm_channel_get_pushbuffer(channel);
|
||||
NvU64 prev_pb_va = uvm_pushbuffer_get_gpu_va_base(pushbuffer) + previous_gpfifo->pushbuffer_offset;
|
||||
|
||||
// Reconstruct the previous gpfifo entry. UVM_GPFIFO_SYNC_WAIT is
|
||||
// Reconstruct the previous GPFIFO entry. UVM_GPFIFO_SYNC_WAIT is
|
||||
// used only in static WLC schedule.
|
||||
// Overwriting the previous entry with the same value doesn't hurt,
|
||||
// whether the previous entry has been processed or not
|
||||
@ -1053,7 +1085,7 @@ static void encrypt_push(uvm_push_t *push)
|
||||
uvm_pushbuffer_t *pushbuffer = uvm_channel_get_pushbuffer(channel);
|
||||
unsigned auth_tag_offset = UVM_CONF_COMPUTING_AUTH_TAG_SIZE * push->push_info_index;
|
||||
|
||||
if (!uvm_conf_computing_mode_enabled(gpu))
|
||||
if (!g_uvm_global.conf_computing_enabled)
|
||||
return;
|
||||
|
||||
if (!push_info->on_complete)
|
||||
@ -1111,7 +1143,7 @@ void uvm_channel_end_push(uvm_push_t *push)
|
||||
uvm_channel_tracking_semaphore_release(push, semaphore_va, new_payload);
|
||||
|
||||
if (uvm_channel_is_wlc(channel) && uvm_channel_manager_is_wlc_ready(channel_manager)) {
|
||||
uvm_channel_t *paired_lcic = wlc_get_paired_lcic(channel);
|
||||
uvm_channel_t *paired_lcic = uvm_channel_wlc_get_paired_lcic(channel);
|
||||
|
||||
gpu->parent->ce_hal->semaphore_reduction_inc(push,
|
||||
paired_lcic->channel_info.gpPutGpuVa,
|
||||
@ -1125,7 +1157,7 @@ void uvm_channel_end_push(uvm_push_t *push)
|
||||
// The UVM_MAX_WLC_PUSH_SIZE is set to fit indirect work launch
|
||||
// pushes. However, direct pushes to WLC can be smaller than this
|
||||
// size. This is used e.g. by indirect submission of control
|
||||
// gpfifo entries.
|
||||
// GPFIFO entries.
|
||||
gpu->parent->host_hal->noop(push, UVM_MAX_WLC_PUSH_SIZE - uvm_push_get_size(push));
|
||||
}
|
||||
}
|
||||
@ -1144,8 +1176,9 @@ void uvm_channel_end_push(uvm_push_t *push)
|
||||
// Indirect submission via SEC2/WLC needs pushes to be aligned for
|
||||
// encryption/decryption. The pushbuffer_size of this push
|
||||
// influences starting address of the next push.
|
||||
if (uvm_conf_computing_mode_enabled(gpu))
|
||||
if (g_uvm_global.conf_computing_enabled)
|
||||
entry->pushbuffer_size = UVM_ALIGN_UP(push_size, UVM_CONF_COMPUTING_BUF_ALIGNMENT);
|
||||
|
||||
entry->push_info = &channel->push_infos[push->push_info_index];
|
||||
entry->type = UVM_GPFIFO_ENTRY_TYPE_NORMAL;
|
||||
|
||||
@ -1158,7 +1191,7 @@ void uvm_channel_end_push(uvm_push_t *push)
|
||||
else if (uvm_channel_is_wlc(channel) && uvm_channel_manager_is_wlc_ready(channel_manager)) {
|
||||
internal_channel_submit_work_wlc(push);
|
||||
}
|
||||
else if (uvm_conf_computing_mode_enabled(gpu) && uvm_channel_is_ce(channel)) {
|
||||
else if (g_uvm_global.conf_computing_enabled && uvm_channel_is_ce(channel)) {
|
||||
if (uvm_channel_manager_is_wlc_ready(channel_manager)) {
|
||||
internal_channel_submit_work_indirect_wlc(push, cpu_put, new_cpu_put);
|
||||
}
|
||||
@ -1209,7 +1242,7 @@ static void submit_ctrl_gpfifo(uvm_channel_t *channel, uvm_gpfifo_entry_t *entry
|
||||
|
||||
UVM_ASSERT(entry == &channel->gpfifo_entries[cpu_put]);
|
||||
|
||||
if (uvm_conf_computing_mode_enabled(gpu) && uvm_channel_is_ce(channel))
|
||||
if (g_uvm_global.conf_computing_enabled && uvm_channel_is_ce(channel))
|
||||
return;
|
||||
|
||||
gpfifo_entry = (NvU64*)channel->channel_info.gpFifoEntries + cpu_put;
|
||||
@ -1291,8 +1324,6 @@ static void write_ctrl_gpfifo(uvm_channel_t *channel, NvU64 ctrl_fifo_entry_valu
|
||||
uvm_gpfifo_entry_t *entry;
|
||||
NvU32 cpu_put;
|
||||
NvU32 new_cpu_put;
|
||||
bool needs_indirect_submit = false;
|
||||
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
|
||||
|
||||
channel_pool_lock(channel->pool);
|
||||
|
||||
@ -1315,8 +1346,6 @@ static void write_ctrl_gpfifo(uvm_channel_t *channel, NvU64 ctrl_fifo_entry_valu
|
||||
--channel->current_gpfifo_count;
|
||||
|
||||
submit_ctrl_gpfifo(channel, entry, new_cpu_put);
|
||||
if (uvm_conf_computing_mode_enabled(gpu) && uvm_channel_is_ce(channel))
|
||||
needs_indirect_submit = true;
|
||||
|
||||
channel->cpu_put = new_cpu_put;
|
||||
|
||||
@ -1327,7 +1356,8 @@ static void write_ctrl_gpfifo(uvm_channel_t *channel, NvU64 ctrl_fifo_entry_valu
|
||||
// semaphore release, where the channel is unlocked.
|
||||
channel_pool_unlock(channel->pool);
|
||||
|
||||
if (needs_indirect_submit) {
|
||||
// Trigger indirect submission when needed.
|
||||
if (g_uvm_global.conf_computing_enabled && uvm_channel_is_ce(channel)) {
|
||||
NV_STATUS status = submit_ctrl_gpfifo_indirect(channel, entry, cpu_put, new_cpu_put);
|
||||
|
||||
// All failures are globally fatal. There's nothing we do to recover.
|
||||
@ -1344,12 +1374,11 @@ static void write_ctrl_gpfifo(uvm_channel_t *channel, NvU64 ctrl_fifo_entry_valu
|
||||
NV_STATUS uvm_channel_write_ctrl_gpfifo(uvm_channel_t *channel, NvU64 ctrl_fifo_entry_value)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_gpu_t *gpu = channel->pool->manager->gpu;
|
||||
uvm_push_t push;
|
||||
|
||||
UVM_ASSERT(!uvm_channel_is_proxy(channel));
|
||||
|
||||
// WLC/LCIC channels can only process custom gpfifo entries before
|
||||
// WLC/LCIC channels can only process custom GPFIFO entries before
|
||||
// their schedule is set up.
|
||||
UVM_ASSERT(!uvm_channel_is_lcic(channel) || !uvm_channel_manager_is_wlc_ready(channel->pool->manager));
|
||||
UVM_ASSERT(!uvm_channel_is_wlc(channel) || !uvm_channel_manager_is_wlc_ready(channel->pool->manager));
|
||||
@ -1373,10 +1402,28 @@ NV_STATUS uvm_channel_write_ctrl_gpfifo(uvm_channel_t *channel, NvU64 ctrl_fifo_
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
if (g_uvm_global.conf_computing_enabled) {
|
||||
// Rotating IV needs to idle the channel. However, there's no semaphore
|
||||
// release after submitting a control entry. It is not possible to wait
|
||||
// for in-flight entries after the GPFIFO submission.
|
||||
// Instead, check for IV rotation early. Secure channels are locked for
|
||||
// pushes after reservation so the IV space gained here can't be used
|
||||
// up by concurrent pushes.
|
||||
status = uvm_conf_computing_maybe_rotate_channel_ivs_retry_busy(channel);
|
||||
if (status != NV_OK) {
|
||||
uvm_channel_release(channel, 2);
|
||||
return status;
|
||||
}
|
||||
}
|
||||
|
||||
write_ctrl_gpfifo(channel, ctrl_fifo_entry_value);
|
||||
|
||||
status = uvm_push_begin_on_reserved_channel(channel, &push, "write_ctrl_GPFIFO");
|
||||
if (status != NV_OK) {
|
||||
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
|
||||
|
||||
// One entry was consumed by GPFIFO entry
|
||||
uvm_channel_release(channel, 1);
|
||||
UVM_ERR_PRINT("Failed to begin push on channel: %s, GPU %s\n", nvstatusToString(status), uvm_gpu_name(gpu));
|
||||
return status;
|
||||
}
|
||||
@ -1440,9 +1487,8 @@ NV_STATUS uvm_channel_reserve(uvm_channel_t *channel, NvU32 num_gpfifo_entries)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_spin_loop_t spin;
|
||||
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
|
||||
|
||||
if (uvm_conf_computing_mode_enabled(gpu))
|
||||
if (g_uvm_global.conf_computing_enabled)
|
||||
return channel_reserve_and_lock(channel, num_gpfifo_entries);
|
||||
|
||||
if (try_claim_channel(channel, num_gpfifo_entries))
|
||||
@ -1460,6 +1506,18 @@ NV_STATUS uvm_channel_reserve(uvm_channel_t *channel, NvU32 num_gpfifo_entries)
|
||||
return status;
|
||||
}
|
||||
|
||||
void uvm_channel_release(uvm_channel_t *channel, NvU32 num_gpfifo_entries)
|
||||
{
|
||||
channel_pool_lock(channel->pool);
|
||||
|
||||
UVM_ASSERT(uvm_channel_is_locked_for_push(channel));
|
||||
unlock_channel_for_push(channel);
|
||||
|
||||
UVM_ASSERT(channel->current_gpfifo_count >= num_gpfifo_entries);
|
||||
channel->current_gpfifo_count -= num_gpfifo_entries;
|
||||
channel_pool_unlock(channel->pool);
|
||||
}
|
||||
|
||||
// Get the first pending GPFIFO entry, if any.
|
||||
// This doesn't stop the entry from being reused.
|
||||
static uvm_gpfifo_entry_t *uvm_channel_get_first_pending_entry(uvm_channel_t *channel)
|
||||
@ -1580,35 +1638,55 @@ NvU64 uvm_channel_update_completed_value(uvm_channel_t *channel)
|
||||
return uvm_gpu_tracking_semaphore_update_completed_value(&channel->tracking_sem);
|
||||
}
|
||||
|
||||
static NV_STATUS csl_init(uvm_channel_t *channel)
|
||||
NV_STATUS uvm_channel_wait(uvm_channel_t *channel)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
|
||||
NV_STATUS status = uvm_global_get_status();
|
||||
uvm_spin_loop_t spin;
|
||||
|
||||
UVM_ASSERT(uvm_conf_computing_mode_enabled(gpu));
|
||||
if (uvm_channel_update_progress(channel) == 0 && status == NV_OK)
|
||||
return uvm_channel_check_errors(channel);
|
||||
|
||||
uvm_mutex_init(&channel->csl.ctx_lock, UVM_LOCK_ORDER_LEAF);
|
||||
uvm_spin_loop_init(&spin);
|
||||
while (uvm_channel_update_progress(channel) > 0 && status == NV_OK) {
|
||||
UVM_SPIN_LOOP(&spin);
|
||||
status = uvm_global_get_status();
|
||||
|
||||
status = uvm_rm_locked_call(nvUvmInterfaceCslInitContext(&channel->csl.ctx, channel->handle));
|
||||
if (status == NV_OK) {
|
||||
channel->csl.is_ctx_initialized = true;
|
||||
}
|
||||
else {
|
||||
UVM_DBG_PRINT("nvUvmInterfaceCslInitContext() failed: %s, GPU %s\n",
|
||||
nvstatusToString(status),
|
||||
uvm_gpu_name(gpu));
|
||||
if (status == NV_OK)
|
||||
status = uvm_channel_check_errors(channel);
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS csl_init(uvm_channel_t *channel)
|
||||
{
|
||||
NV_STATUS status;
|
||||
|
||||
UVM_ASSERT(g_uvm_global.conf_computing_enabled);
|
||||
|
||||
status = uvm_rm_locked_call(nvUvmInterfaceCslInitContext(&channel->csl.ctx, channel->handle));
|
||||
if (status != NV_OK) {
|
||||
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
|
||||
|
||||
UVM_DBG_PRINT("nvUvmInterfaceCslInitContext() failed: %s, GPU %s\n",
|
||||
nvstatusToString(status),
|
||||
uvm_gpu_name(gpu));
|
||||
return status;
|
||||
}
|
||||
|
||||
uvm_mutex_init(&channel->csl.ctx_lock, UVM_LOCK_ORDER_CSL_CTX);
|
||||
channel->csl.is_ctx_initialized = true;
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static void csl_destroy(uvm_channel_t *channel)
|
||||
{
|
||||
if (!channel->csl.is_ctx_initialized)
|
||||
return;
|
||||
|
||||
uvm_assert_mutex_unlocked(&channel->csl.ctx_lock);
|
||||
UVM_ASSERT(!is_channel_locked_for_push(channel));
|
||||
UVM_ASSERT(!uvm_channel_is_locked_for_push(channel));
|
||||
|
||||
uvm_rm_locked_call_void(nvUvmInterfaceDeinitCslContext(&channel->csl.ctx));
|
||||
channel->csl.is_ctx_initialized = false;
|
||||
@ -1616,9 +1694,7 @@ static void csl_destroy(uvm_channel_t *channel)
|
||||
|
||||
static void free_conf_computing_buffers(uvm_channel_t *channel)
|
||||
{
|
||||
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
|
||||
|
||||
UVM_ASSERT(uvm_conf_computing_mode_enabled(gpu));
|
||||
UVM_ASSERT(g_uvm_global.conf_computing_enabled);
|
||||
UVM_ASSERT(uvm_channel_is_ce(channel));
|
||||
|
||||
uvm_rm_mem_free(channel->conf_computing.static_pb_protected_vidmem);
|
||||
@ -1650,7 +1726,7 @@ static NV_STATUS alloc_conf_computing_buffers_semaphore(uvm_channel_t *channel)
|
||||
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
|
||||
NV_STATUS status;
|
||||
|
||||
UVM_ASSERT(uvm_conf_computing_mode_enabled(gpu));
|
||||
UVM_ASSERT(g_uvm_global.conf_computing_enabled);
|
||||
UVM_ASSERT(uvm_channel_is_ce(channel));
|
||||
|
||||
status = uvm_rm_mem_alloc_and_map_cpu(gpu,
|
||||
@ -1770,9 +1846,8 @@ static NV_STATUS alloc_conf_computing_buffers_lcic(uvm_channel_t *channel)
|
||||
static NV_STATUS alloc_conf_computing_buffers(uvm_channel_t *channel)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
|
||||
|
||||
UVM_ASSERT(uvm_conf_computing_mode_enabled(gpu));
|
||||
UVM_ASSERT(g_uvm_global.conf_computing_enabled);
|
||||
UVM_ASSERT(uvm_channel_is_ce(channel));
|
||||
|
||||
status = alloc_conf_computing_buffers_semaphore(channel);
|
||||
@ -1786,6 +1861,7 @@ static NV_STATUS alloc_conf_computing_buffers(uvm_channel_t *channel)
|
||||
status = alloc_conf_computing_buffers_lcic(channel);
|
||||
}
|
||||
else {
|
||||
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
|
||||
void *push_crypto_bundles = uvm_kvmalloc_zero(sizeof(*channel->conf_computing.push_crypto_bundles) *
|
||||
channel->num_gpfifo_entries);
|
||||
|
||||
@ -1806,8 +1882,6 @@ static NV_STATUS alloc_conf_computing_buffers(uvm_channel_t *channel)
|
||||
|
||||
static void channel_destroy(uvm_channel_pool_t *pool, uvm_channel_t *channel)
|
||||
{
|
||||
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
|
||||
|
||||
UVM_ASSERT(pool->num_channels > 0);
|
||||
|
||||
if (channel->tracking_sem.queued_value > 0) {
|
||||
@ -1831,7 +1905,7 @@ static void channel_destroy(uvm_channel_pool_t *pool, uvm_channel_t *channel)
|
||||
|
||||
uvm_kvfree(channel->gpfifo_entries);
|
||||
|
||||
if (uvm_conf_computing_mode_enabled(gpu)) {
|
||||
if (g_uvm_global.conf_computing_enabled) {
|
||||
csl_destroy(channel);
|
||||
|
||||
if (uvm_channel_is_ce(channel))
|
||||
@ -1889,7 +1963,7 @@ static uvmGpuTsgHandle channel_get_tsg(uvm_channel_t *channel)
|
||||
|
||||
if (uvm_channel_pool_is_wlc(pool) || uvm_channel_pool_is_lcic(pool)) {
|
||||
if (uvm_channel_pool_is_lcic(pool)) {
|
||||
channel = lcic_get_paired_wlc(channel);
|
||||
channel = uvm_channel_lcic_get_paired_wlc(channel);
|
||||
pool = channel->pool;
|
||||
}
|
||||
|
||||
@ -1906,7 +1980,6 @@ static NV_STATUS internal_channel_create(uvm_channel_t *channel)
|
||||
UvmGpuChannelAllocParams channel_alloc_params;
|
||||
UvmGpuChannelInfo *channel_info = &channel->channel_info;
|
||||
uvm_channel_manager_t *manager = channel->pool->manager;
|
||||
uvm_gpu_t *gpu = manager->gpu;
|
||||
|
||||
memset(&channel_alloc_params, 0, sizeof(channel_alloc_params));
|
||||
channel_alloc_params.numGpFifoEntries = channel_pool_type_num_gpfifo_entries(manager, channel->pool->pool_type);
|
||||
@ -1914,7 +1987,7 @@ static NV_STATUS internal_channel_create(uvm_channel_t *channel)
|
||||
channel_alloc_params.gpPutLoc = manager->conf.gpput_loc;
|
||||
|
||||
if (uvm_channel_is_sec2(channel)) {
|
||||
UVM_ASSERT(uvm_conf_computing_mode_enabled(gpu));
|
||||
UVM_ASSERT(g_uvm_global.conf_computing_enabled);
|
||||
|
||||
// SEC2 channels' GPPUT and GPFIFO must be allocated in sysmem.
|
||||
channel_alloc_params.gpFifoLoc = UVM_BUFFER_LOCATION_SYS;
|
||||
@ -1928,7 +2001,7 @@ static NV_STATUS internal_channel_create(uvm_channel_t *channel)
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("nvUvmInterfaceChannelAllocate() failed: %s, GPU %s, type %s\n",
|
||||
nvstatusToString(status),
|
||||
uvm_gpu_name(gpu),
|
||||
uvm_gpu_name(manager->gpu),
|
||||
uvm_channel_pool_type_to_string(channel->pool->pool_type));
|
||||
return status;
|
||||
}
|
||||
@ -1994,7 +2067,7 @@ static NV_STATUS channel_create(uvm_channel_pool_t *pool, uvm_channel_t *channel
|
||||
channel->tools.pending_event_count = 0;
|
||||
INIT_LIST_HEAD(&channel->tools.channel_list_node);
|
||||
|
||||
if (uvm_conf_computing_mode_enabled(gpu) && uvm_channel_is_ce(channel))
|
||||
if (g_uvm_global.conf_computing_enabled && uvm_channel_is_ce(channel))
|
||||
semaphore_pool = gpu->secure_semaphore_pool;
|
||||
|
||||
status = uvm_gpu_tracking_semaphore_alloc(semaphore_pool, &channel->tracking_sem);
|
||||
@ -2020,7 +2093,7 @@ static NV_STATUS channel_create(uvm_channel_pool_t *pool, uvm_channel_t *channel
|
||||
goto error;
|
||||
}
|
||||
|
||||
if (uvm_conf_computing_mode_enabled(gpu)) {
|
||||
if (g_uvm_global.conf_computing_enabled) {
|
||||
status = csl_init(channel);
|
||||
if (status != NV_OK)
|
||||
goto error;
|
||||
@ -2079,14 +2152,14 @@ static NV_STATUS channel_init(uvm_channel_t *channel)
|
||||
NV_STATUS status;
|
||||
NvU32 num_entries = 1;
|
||||
|
||||
if (uvm_gpu_has_pushbuffer_segments(gpu))
|
||||
if (uvm_parent_gpu_needs_pushbuffer_segments(gpu->parent))
|
||||
num_entries++;
|
||||
|
||||
status = uvm_channel_reserve(channel, num_entries);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
if (uvm_gpu_has_pushbuffer_segments(gpu)) {
|
||||
if (uvm_parent_gpu_needs_pushbuffer_segments(gpu->parent)) {
|
||||
NvU64 gpfifo_entry;
|
||||
uvm_pushbuffer_t *pushbuffer = uvm_channel_get_pushbuffer(channel);
|
||||
NvU64 pb_base = uvm_pushbuffer_get_gpu_va_base(pushbuffer);
|
||||
@ -2102,6 +2175,10 @@ static NV_STATUS channel_init(uvm_channel_t *channel)
|
||||
|
||||
status = uvm_push_begin_on_reserved_channel(channel, &push, "Init channel");
|
||||
if (status != NV_OK) {
|
||||
|
||||
// One entry was consumed by control GPFIFO entry above, release the
|
||||
// second one.
|
||||
uvm_channel_release(channel, 1);
|
||||
UVM_ERR_PRINT("Failed to begin push on channel: %s, GPU %s\n", nvstatusToString(status), uvm_gpu_name(gpu));
|
||||
return status;
|
||||
}
|
||||
@ -2126,7 +2203,7 @@ static NV_STATUS channel_init(uvm_channel_t *channel)
|
||||
|
||||
static bool channel_manager_uses_proxy_pool(uvm_channel_manager_t *manager)
|
||||
{
|
||||
return uvm_gpu_is_virt_mode_sriov_heavy(manager->gpu);
|
||||
return uvm_parent_gpu_is_virt_mode_sriov_heavy(manager->gpu->parent);
|
||||
}
|
||||
|
||||
// Number of channels to create in a pool of the given type.
|
||||
@ -2266,7 +2343,7 @@ static NV_STATUS channel_pool_add(uvm_channel_manager_t *channel_manager,
|
||||
num_channels = channel_pool_type_num_channels(pool_type);
|
||||
UVM_ASSERT(num_channels <= UVM_CHANNEL_MAX_NUM_CHANNELS_PER_POOL);
|
||||
|
||||
if (uvm_conf_computing_mode_enabled(channel_manager->gpu)) {
|
||||
if (g_uvm_global.conf_computing_enabled) {
|
||||
// Use different order lock for SEC2 and WLC channels.
|
||||
// This allows reserving a SEC2 or WLC channel for indirect work
|
||||
// submission while holding a reservation for a channel.
|
||||
@ -2721,11 +2798,11 @@ static unsigned channel_manager_get_max_pools(uvm_channel_manager_t *manager)
|
||||
num_channel_pools = bitmap_weight(manager->ce_mask, UVM_COPY_ENGINE_COUNT_MAX);
|
||||
|
||||
// CE proxy channel pool.
|
||||
if (uvm_gpu_uses_proxy_channel_pool(manager->gpu))
|
||||
if (uvm_parent_gpu_needs_proxy_channel_pool(manager->gpu->parent))
|
||||
num_channel_pools++;
|
||||
|
||||
// SEC2 pool, WLC pool, LCIC pool
|
||||
if (uvm_conf_computing_mode_enabled(manager->gpu))
|
||||
if (g_uvm_global.conf_computing_enabled)
|
||||
num_channel_pools += 3;
|
||||
|
||||
return num_channel_pools;
|
||||
@ -3093,7 +3170,7 @@ static NV_STATUS channel_manager_create_conf_computing_pools(uvm_channel_manager
|
||||
uvm_channel_pool_t *wlc_pool = NULL;
|
||||
uvm_channel_pool_t *lcic_pool = NULL;
|
||||
|
||||
if (!uvm_conf_computing_mode_enabled(manager->gpu))
|
||||
if (!g_uvm_global.conf_computing_enabled)
|
||||
return NV_OK;
|
||||
|
||||
status = uvm_rm_mem_alloc(manager->gpu,
|
||||
@ -3173,7 +3250,7 @@ static NV_STATUS channel_manager_create_pools(uvm_channel_manager_t *manager)
|
||||
|
||||
// In SR-IOV heavy, add an additional, single-channel, pool that is
|
||||
// dedicated to the MEMOPS type.
|
||||
if (uvm_gpu_uses_proxy_channel_pool(manager->gpu)) {
|
||||
if (uvm_parent_gpu_needs_proxy_channel_pool(manager->gpu->parent)) {
|
||||
uvm_channel_pool_t *proxy_pool = NULL;
|
||||
uvm_channel_type_t channel_type = uvm_channel_proxy_channel_type();
|
||||
|
||||
@ -3295,7 +3372,7 @@ void uvm_channel_manager_destroy(uvm_channel_manager_t *channel_manager)
|
||||
|
||||
bool uvm_channel_is_privileged(uvm_channel_t *channel)
|
||||
{
|
||||
if (uvm_gpu_is_virt_mode_sriov_heavy(uvm_channel_get_gpu(channel)))
|
||||
if (uvm_parent_gpu_is_virt_mode_sriov_heavy(uvm_channel_get_gpu(channel)->parent))
|
||||
return uvm_channel_is_proxy(channel);
|
||||
|
||||
return true;
|
||||
|
@ -497,6 +497,10 @@ static bool uvm_channel_is_lcic(uvm_channel_t *channel)
|
||||
return uvm_channel_pool_is_lcic(channel->pool);
|
||||
}
|
||||
|
||||
uvm_channel_t *uvm_channel_lcic_get_paired_wlc(uvm_channel_t *lcic_channel);
|
||||
|
||||
uvm_channel_t *uvm_channel_wlc_get_paired_lcic(uvm_channel_t *wlc_channel);
|
||||
|
||||
static bool uvm_channel_pool_is_proxy(uvm_channel_pool_t *pool)
|
||||
{
|
||||
UVM_ASSERT(uvm_pool_type_is_valid(pool->pool_type));
|
||||
@ -603,6 +607,11 @@ bool uvm_channel_is_value_completed(uvm_channel_t *channel, NvU64 value);
|
||||
// Update and get the latest completed value by the channel
|
||||
NvU64 uvm_channel_update_completed_value(uvm_channel_t *channel);
|
||||
|
||||
// Wait for the channel to idle
|
||||
// It waits for anything that is running, but doesn't prevent new work from
|
||||
// beginning.
|
||||
NV_STATUS uvm_channel_wait(uvm_channel_t *channel);
|
||||
|
||||
// Select and reserve a channel with the specified type for a push
|
||||
NV_STATUS uvm_channel_reserve_type(uvm_channel_manager_t *manager,
|
||||
uvm_channel_type_t type,
|
||||
@ -617,6 +626,9 @@ NV_STATUS uvm_channel_reserve_gpu_to_gpu(uvm_channel_manager_t *channel_manager,
|
||||
// Reserve a specific channel for a push or for a control GPFIFO entry.
|
||||
NV_STATUS uvm_channel_reserve(uvm_channel_t *channel, NvU32 num_gpfifo_entries);
|
||||
|
||||
// Release reservation on a specific channel
|
||||
void uvm_channel_release(uvm_channel_t *channel, NvU32 num_gpfifo_entries);
|
||||
|
||||
// Set optimal CE for P2P transfers between manager->gpu and peer
|
||||
void uvm_channel_manager_set_p2p_ce(uvm_channel_manager_t *manager, uvm_gpu_t *peer, NvU32 optimal_ce);
|
||||
|
||||
@ -648,6 +660,8 @@ NvU32 uvm_channel_get_available_gpfifo_entries(uvm_channel_t *channel);
|
||||
|
||||
void uvm_channel_print_pending_pushes(uvm_channel_t *channel);
|
||||
|
||||
bool uvm_channel_is_locked_for_push(uvm_channel_t *channel);
|
||||
|
||||
static uvm_gpu_t *uvm_channel_get_gpu(uvm_channel_t *channel)
|
||||
{
|
||||
return channel->pool->manager->gpu;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2022 NVIDIA Corporation
|
||||
Copyright (c) 2015-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -24,6 +24,7 @@
|
||||
#include "uvm_global.h"
|
||||
#include "uvm_channel.h"
|
||||
#include "uvm_hal.h"
|
||||
#include "uvm_mem.h"
|
||||
#include "uvm_push.h"
|
||||
#include "uvm_test.h"
|
||||
#include "uvm_test_rng.h"
|
||||
@ -57,14 +58,14 @@ static NV_STATUS test_ordering(uvm_va_space_t *va_space)
|
||||
const NvU32 values_count = iters_per_channel_type_per_gpu;
|
||||
const size_t buffer_size = sizeof(NvU32) * values_count;
|
||||
|
||||
gpu = uvm_va_space_find_first_gpu(va_space);
|
||||
TEST_CHECK_RET(gpu != NULL);
|
||||
|
||||
// TODO: Bug 3839176: the test is waived on Confidential Computing because
|
||||
// it assumes that GPU can access system memory without using encryption.
|
||||
if (uvm_conf_computing_mode_enabled(gpu))
|
||||
if (g_uvm_global.conf_computing_enabled)
|
||||
return NV_OK;
|
||||
|
||||
gpu = uvm_va_space_find_first_gpu(va_space);
|
||||
TEST_CHECK_RET(gpu != NULL);
|
||||
|
||||
status = uvm_rm_mem_alloc_and_map_all(gpu, UVM_RM_MEM_TYPE_SYS, buffer_size, 0, &mem);
|
||||
TEST_CHECK_GOTO(status == NV_OK, done);
|
||||
|
||||
@ -84,7 +85,7 @@ static NV_STATUS test_ordering(uvm_va_space_t *va_space)
|
||||
|
||||
TEST_NV_CHECK_GOTO(uvm_tracker_add_push(&tracker, &push), done);
|
||||
|
||||
exclude_proxy_channel_type = uvm_gpu_uses_proxy_channel_pool(gpu);
|
||||
exclude_proxy_channel_type = uvm_parent_gpu_needs_proxy_channel_pool(gpu->parent);
|
||||
|
||||
for (i = 0; i < iters_per_channel_type_per_gpu; ++i) {
|
||||
for (j = 0; j < UVM_CHANNEL_TYPE_CE_COUNT; ++j) {
|
||||
@ -222,7 +223,7 @@ static NV_STATUS uvm_test_rc_for_gpu(uvm_gpu_t *gpu)
|
||||
// Check RC on a proxy channel (SR-IOV heavy) or internal channel (any other
|
||||
// mode). It is not allowed to use a virtual address in a memset pushed to
|
||||
// a proxy channel, so we use a physical address instead.
|
||||
if (uvm_gpu_uses_proxy_channel_pool(gpu)) {
|
||||
if (uvm_parent_gpu_needs_proxy_channel_pool(gpu->parent)) {
|
||||
uvm_gpu_address_t dst_address;
|
||||
|
||||
// Save the line number the push that's supposed to fail was started on
|
||||
@ -314,6 +315,110 @@ static NV_STATUS test_rc(uvm_va_space_t *va_space)
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_test_iommu_rc_for_gpu(uvm_gpu_t *gpu)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
|
||||
#if defined(NV_IOMMU_IS_DMA_DOMAIN_PRESENT) && defined(CONFIG_IOMMU_DEFAULT_DMA_STRICT)
|
||||
// This test needs the DMA API to immediately invalidate IOMMU mappings on
|
||||
// DMA unmap (as apposed to lazy invalidation). The policy can be changed
|
||||
// on boot (e.g. iommu.strict=1), but there isn't a good way to check for
|
||||
// the runtime setting. CONFIG_IOMMU_DEFAULT_DMA_STRICT checks for the
|
||||
// default value.
|
||||
|
||||
uvm_push_t push;
|
||||
uvm_mem_t *sysmem;
|
||||
uvm_gpu_address_t sysmem_dma_addr;
|
||||
char *cpu_ptr = NULL;
|
||||
const size_t data_size = PAGE_SIZE;
|
||||
size_t i;
|
||||
|
||||
struct device *dev = &gpu->parent->pci_dev->dev;
|
||||
struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
|
||||
|
||||
// Check that the iommu domain is controlled by linux DMA API
|
||||
if (!domain || !iommu_is_dma_domain(domain))
|
||||
return NV_OK;
|
||||
|
||||
// Only run if ATS is enabled. Otherwise the CE doesn't get response on
|
||||
// writing to unmapped location.
|
||||
if (!g_uvm_global.ats.enabled)
|
||||
return NV_OK;
|
||||
|
||||
status = uvm_mem_alloc_sysmem_and_map_cpu_kernel(data_size, NULL, &sysmem);
|
||||
TEST_NV_CHECK_RET(status);
|
||||
|
||||
status = uvm_mem_map_gpu_phys(sysmem, gpu);
|
||||
TEST_NV_CHECK_GOTO(status, done);
|
||||
|
||||
cpu_ptr = uvm_mem_get_cpu_addr_kernel(sysmem);
|
||||
sysmem_dma_addr = uvm_mem_gpu_address_physical(sysmem, gpu, 0, data_size);
|
||||
|
||||
status = uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_TO_CPU, &push, "Test memset to IOMMU mapped sysmem");
|
||||
TEST_NV_CHECK_GOTO(status, done);
|
||||
|
||||
gpu->parent->ce_hal->memset_8(&push, sysmem_dma_addr, 0, data_size);
|
||||
|
||||
status = uvm_push_end_and_wait(&push);
|
||||
TEST_NV_CHECK_GOTO(status, done);
|
||||
|
||||
// Check that we have zeroed the memory
|
||||
for (i = 0; i < data_size; ++i)
|
||||
TEST_CHECK_GOTO(cpu_ptr[i] == 0, done);
|
||||
|
||||
// Unmap the buffer and try write again to the same address
|
||||
uvm_mem_unmap_gpu_phys(sysmem, gpu);
|
||||
|
||||
status = uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_TO_CPU, &push, "Test memset after IOMMU unmap");
|
||||
TEST_NV_CHECK_GOTO(status, done);
|
||||
|
||||
gpu->parent->ce_hal->memset_4(&push, sysmem_dma_addr, 0xffffffff, data_size);
|
||||
|
||||
status = uvm_push_end_and_wait(&push);
|
||||
|
||||
TEST_CHECK_GOTO(status == NV_ERR_RC_ERROR, done);
|
||||
TEST_CHECK_GOTO(uvm_channel_get_status(push.channel) == NV_ERR_RC_ERROR, done);
|
||||
TEST_CHECK_GOTO(uvm_global_reset_fatal_error() == NV_ERR_RC_ERROR, done);
|
||||
|
||||
// Check that writes after unmap did not succeed
|
||||
for (i = 0; i < data_size; ++i)
|
||||
TEST_CHECK_GOTO(cpu_ptr[i] == 0, done);
|
||||
|
||||
status = NV_OK;
|
||||
|
||||
done:
|
||||
uvm_mem_free(sysmem);
|
||||
#endif
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS test_iommu(uvm_va_space_t *va_space)
|
||||
{
|
||||
uvm_gpu_t *gpu;
|
||||
|
||||
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
|
||||
|
||||
for_each_va_space_gpu(gpu, va_space) {
|
||||
NV_STATUS test_status, create_status;
|
||||
|
||||
// The GPU channel manager is destroyed and then re-created after
|
||||
// testing ATS RC fault, so this test requires exclusive access to the GPU.
|
||||
TEST_CHECK_RET(uvm_gpu_retained_count(gpu) == 1);
|
||||
|
||||
g_uvm_global.disable_fatal_error_assert = true;
|
||||
test_status = uvm_test_iommu_rc_for_gpu(gpu);
|
||||
g_uvm_global.disable_fatal_error_assert = false;
|
||||
|
||||
uvm_channel_manager_destroy(gpu->channel_manager);
|
||||
create_status = uvm_channel_manager_create(gpu, &gpu->channel_manager);
|
||||
|
||||
TEST_NV_CHECK_RET(test_status);
|
||||
TEST_NV_CHECK_RET(create_status);
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
typedef struct
|
||||
{
|
||||
uvm_push_t push;
|
||||
@ -403,7 +508,7 @@ static uvm_channel_type_t random_ce_channel_type_except(uvm_test_rng_t *rng, uvm
|
||||
|
||||
static uvm_channel_type_t gpu_random_internal_ce_channel_type(uvm_gpu_t *gpu, uvm_test_rng_t *rng)
|
||||
{
|
||||
if (uvm_gpu_uses_proxy_channel_pool(gpu))
|
||||
if (uvm_parent_gpu_needs_proxy_channel_pool(gpu->parent))
|
||||
return random_ce_channel_type_except(rng, uvm_channel_proxy_channel_type());
|
||||
|
||||
return random_ce_channel_type(rng);
|
||||
@ -693,9 +798,7 @@ NV_STATUS test_conf_computing_channel_selection(uvm_va_space_t *va_space)
|
||||
NvU32 i;
|
||||
NvU32 num_pushes;
|
||||
|
||||
gpu = uvm_va_space_find_first_gpu(va_space);
|
||||
|
||||
if (!uvm_conf_computing_mode_enabled(gpu))
|
||||
if (!g_uvm_global.conf_computing_enabled)
|
||||
return NV_OK;
|
||||
|
||||
uvm_thread_context_lock_disable_tracking();
|
||||
@ -746,6 +849,101 @@ error:
|
||||
return status;
|
||||
}
|
||||
|
||||
NV_STATUS test_channel_iv_rotation(uvm_va_space_t *va_space)
|
||||
{
|
||||
uvm_gpu_t *gpu;
|
||||
|
||||
if (!g_uvm_global.conf_computing_enabled)
|
||||
return NV_OK;
|
||||
|
||||
for_each_va_space_gpu(gpu, va_space) {
|
||||
uvm_channel_pool_t *pool;
|
||||
|
||||
uvm_for_each_pool(pool, gpu->channel_manager) {
|
||||
NvU64 before_rotation_enc, before_rotation_dec, after_rotation_enc, after_rotation_dec;
|
||||
NV_STATUS status = NV_OK;
|
||||
|
||||
// Check one (the first) channel per pool
|
||||
uvm_channel_t *channel = pool->channels;
|
||||
|
||||
// Create a dummy encrypt/decrypt push to use few IVs.
|
||||
// SEC2 used encrypt during initialization, no need to use a dummy
|
||||
// push.
|
||||
if (!uvm_channel_is_sec2(channel)) {
|
||||
uvm_push_t push;
|
||||
size_t data_size;
|
||||
uvm_conf_computing_dma_buffer_t *cipher_text;
|
||||
void *cipher_cpu_va, *plain_cpu_va, *tag_cpu_va;
|
||||
uvm_gpu_address_t cipher_gpu_address, plain_gpu_address, tag_gpu_address;
|
||||
uvm_channel_t *work_channel = uvm_channel_is_lcic(channel) ? uvm_channel_lcic_get_paired_wlc(channel) : channel;
|
||||
|
||||
plain_cpu_va = &status;
|
||||
data_size = sizeof(status);
|
||||
|
||||
TEST_NV_CHECK_RET(uvm_conf_computing_dma_buffer_alloc(&gpu->conf_computing.dma_buffer_pool,
|
||||
&cipher_text,
|
||||
NULL));
|
||||
cipher_cpu_va = uvm_mem_get_cpu_addr_kernel(cipher_text->alloc);
|
||||
tag_cpu_va = uvm_mem_get_cpu_addr_kernel(cipher_text->auth_tag);
|
||||
|
||||
cipher_gpu_address = uvm_mem_gpu_address_virtual_kernel(cipher_text->alloc, gpu);
|
||||
tag_gpu_address = uvm_mem_gpu_address_virtual_kernel(cipher_text->auth_tag, gpu);
|
||||
|
||||
TEST_NV_CHECK_GOTO(uvm_push_begin_on_channel(work_channel, &push, "Dummy push for IV rotation"), free);
|
||||
|
||||
(void)uvm_push_get_single_inline_buffer(&push,
|
||||
data_size,
|
||||
UVM_CONF_COMPUTING_BUF_ALIGNMENT,
|
||||
&plain_gpu_address);
|
||||
|
||||
uvm_conf_computing_cpu_encrypt(work_channel, cipher_cpu_va, plain_cpu_va, NULL, data_size, tag_cpu_va);
|
||||
gpu->parent->ce_hal->decrypt(&push, plain_gpu_address, cipher_gpu_address, data_size, tag_gpu_address);
|
||||
|
||||
TEST_NV_CHECK_GOTO(uvm_push_end_and_wait(&push), free);
|
||||
|
||||
free:
|
||||
uvm_conf_computing_dma_buffer_free(&gpu->conf_computing.dma_buffer_pool, cipher_text, NULL);
|
||||
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
}
|
||||
|
||||
// Reserve a channel to hold the push lock during rotation
|
||||
if (!uvm_channel_is_lcic(channel))
|
||||
TEST_NV_CHECK_RET(uvm_channel_reserve(channel, 1));
|
||||
|
||||
uvm_conf_computing_query_message_pools(channel, &before_rotation_enc, &before_rotation_dec);
|
||||
TEST_NV_CHECK_GOTO(uvm_conf_computing_rotate_channel_ivs_below_limit(channel, -1, true), release);
|
||||
uvm_conf_computing_query_message_pools(channel, &after_rotation_enc, &after_rotation_dec);
|
||||
|
||||
release:
|
||||
if (!uvm_channel_is_lcic(channel))
|
||||
uvm_channel_release(channel, 1);
|
||||
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
// All channels except SEC2 used at least a single IV to release tracking.
|
||||
// SEC2 doesn't support decrypt direction.
|
||||
if (uvm_channel_is_sec2(channel))
|
||||
TEST_CHECK_RET(before_rotation_dec == after_rotation_dec);
|
||||
else
|
||||
TEST_CHECK_RET(before_rotation_dec < after_rotation_dec);
|
||||
|
||||
// All channels used one CPU encrypt/GPU decrypt, either during
|
||||
// initialization or in the push above, with the exception of LCIC.
|
||||
// LCIC is used in tandem with WLC, but it never uses CPU encrypt/
|
||||
// GPU decrypt ops.
|
||||
if (uvm_channel_is_lcic(channel))
|
||||
TEST_CHECK_RET(before_rotation_enc == after_rotation_enc);
|
||||
else
|
||||
TEST_CHECK_RET(before_rotation_enc < after_rotation_enc);
|
||||
}
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
NV_STATUS test_write_ctrl_gpfifo_noop(uvm_va_space_t *va_space)
|
||||
{
|
||||
uvm_gpu_t *gpu;
|
||||
@ -845,11 +1043,9 @@ NV_STATUS test_write_ctrl_gpfifo_tight(uvm_va_space_t *va_space)
|
||||
NvU64 entry;
|
||||
uvm_push_t push;
|
||||
|
||||
gpu = uvm_va_space_find_first_gpu(va_space);
|
||||
|
||||
// TODO: Bug 3839176: the test is waived on Confidential Computing because
|
||||
// it assumes that GPU can access system memory without using encryption.
|
||||
if (uvm_conf_computing_mode_enabled(gpu))
|
||||
if (g_uvm_global.conf_computing_enabled)
|
||||
return NV_OK;
|
||||
|
||||
for_each_va_space_gpu(gpu, va_space) {
|
||||
@ -924,7 +1120,7 @@ static NV_STATUS test_channel_pushbuffer_extension_base(uvm_va_space_t *va_space
|
||||
uvm_channel_manager_t *manager;
|
||||
uvm_channel_pool_t *pool;
|
||||
|
||||
if (!uvm_gpu_has_pushbuffer_segments(gpu))
|
||||
if (!uvm_parent_gpu_needs_pushbuffer_segments(gpu->parent))
|
||||
continue;
|
||||
|
||||
// The GPU channel manager pushbuffer is destroyed and then re-created
|
||||
@ -999,6 +1195,10 @@ NV_STATUS uvm_test_channel_sanity(UVM_TEST_CHANNEL_SANITY_PARAMS *params, struct
|
||||
if (status != NV_OK)
|
||||
goto done;
|
||||
|
||||
status = test_channel_iv_rotation(va_space);
|
||||
if (status != NV_OK)
|
||||
goto done;
|
||||
|
||||
// The following tests have side effects, they reset the GPU's
|
||||
// channel_manager.
|
||||
status = test_channel_pushbuffer_extension_base(va_space);
|
||||
@ -1019,6 +1219,10 @@ NV_STATUS uvm_test_channel_sanity(UVM_TEST_CHANNEL_SANITY_PARAMS *params, struct
|
||||
goto done;
|
||||
}
|
||||
|
||||
status = test_iommu(va_space);
|
||||
if (status != NV_OK)
|
||||
goto done;
|
||||
|
||||
done:
|
||||
uvm_va_space_up_read_rm(va_space);
|
||||
uvm_mutex_unlock(&g_uvm_global.global_lock);
|
||||
@ -1034,23 +1238,22 @@ static NV_STATUS uvm_test_channel_stress_stream(uvm_va_space_t *va_space,
|
||||
if (params->iterations == 0 || params->num_streams == 0)
|
||||
return NV_ERR_INVALID_PARAMETER;
|
||||
|
||||
// TODO: Bug 3839176: the test is waived on Confidential Computing because
|
||||
// it assumes that GPU can access system memory without using encryption.
|
||||
if (g_uvm_global.conf_computing_enabled)
|
||||
return NV_OK;
|
||||
|
||||
// TODO: Bug 1764963: Rework the test to not rely on the global lock as that
|
||||
// serializes all the threads calling this at the same time.
|
||||
uvm_mutex_lock(&g_uvm_global.global_lock);
|
||||
uvm_va_space_down_read_rm(va_space);
|
||||
|
||||
// TODO: Bug 3839176: the test is waived on Confidential Computing because
|
||||
// it assumes that GPU can access system memory without using encryption.
|
||||
if (uvm_conf_computing_mode_enabled(uvm_va_space_find_first_gpu(va_space)))
|
||||
goto done;
|
||||
|
||||
status = stress_test_all_gpus_in_va(va_space,
|
||||
params->num_streams,
|
||||
params->iterations,
|
||||
params->seed,
|
||||
params->verbose);
|
||||
|
||||
done:
|
||||
uvm_va_space_up_read_rm(va_space);
|
||||
uvm_mutex_unlock(&g_uvm_global.global_lock);
|
||||
|
||||
|
@ -21,8 +21,8 @@
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#ifndef _UVM_COMMON_H
|
||||
#define _UVM_COMMON_H
|
||||
#ifndef __UVM_COMMON_H__
|
||||
#define __UVM_COMMON_H__
|
||||
|
||||
#ifdef DEBUG
|
||||
#define UVM_IS_DEBUG() 1
|
||||
@ -204,13 +204,6 @@ extern bool uvm_release_asserts_set_global_error_for_tests;
|
||||
#define UVM_ASSERT_MSG_RELEASE(expr, fmt, ...) _UVM_ASSERT_MSG_RELEASE(expr, #expr, ": " fmt, ##__VA_ARGS__)
|
||||
#define UVM_ASSERT_RELEASE(expr) _UVM_ASSERT_MSG_RELEASE(expr, #expr, "\n")
|
||||
|
||||
// Provide a short form of UUID's, typically for use in debug printing:
|
||||
#define ABBREV_UUID(uuid) (unsigned)(uuid)
|
||||
|
||||
static inline NvBool uvm_uuid_is_cpu(const NvProcessorUuid *uuid)
|
||||
{
|
||||
return memcmp(uuid, &NV_PROCESSOR_UUID_CPU_DEFAULT, sizeof(*uuid)) == 0;
|
||||
}
|
||||
#define UVM_SIZE_1KB (1024ULL)
|
||||
#define UVM_SIZE_1MB (1024 * UVM_SIZE_1KB)
|
||||
#define UVM_SIZE_1GB (1024 * UVM_SIZE_1MB)
|
||||
@ -409,4 +402,40 @@ static inline void uvm_touch_page(struct page *page)
|
||||
// Return true if the VMA is one used by UVM managed allocations.
|
||||
bool uvm_vma_is_managed(struct vm_area_struct *vma);
|
||||
|
||||
#endif /* _UVM_COMMON_H */
|
||||
static bool uvm_platform_uses_canonical_form_address(void)
|
||||
{
|
||||
if (NVCPU_IS_PPC64LE)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Similar to the GPU MMU HAL num_va_bits(), it returns the CPU's num_va_bits().
|
||||
static NvU32 uvm_cpu_num_va_bits(void)
|
||||
{
|
||||
return fls64(TASK_SIZE - 1) + 1;
|
||||
}
|
||||
|
||||
// Return the unaddressable range in a num_va_bits-wide VA space, [first, outer)
|
||||
static void uvm_get_unaddressable_range(NvU32 num_va_bits, NvU64 *first, NvU64 *outer)
|
||||
{
|
||||
UVM_ASSERT(num_va_bits < 64);
|
||||
UVM_ASSERT(first);
|
||||
UVM_ASSERT(outer);
|
||||
|
||||
if (uvm_platform_uses_canonical_form_address()) {
|
||||
*first = 1ULL << (num_va_bits - 1);
|
||||
*outer = (NvU64)((NvS64)(1ULL << 63) >> (64 - num_va_bits));
|
||||
}
|
||||
else {
|
||||
*first = 1ULL << num_va_bits;
|
||||
*outer = ~0Ull;
|
||||
}
|
||||
}
|
||||
|
||||
static void uvm_cpu_get_unaddressable_range(NvU64 *first, NvU64 *outer)
|
||||
{
|
||||
return uvm_get_unaddressable_range(uvm_cpu_num_va_bits(), first, outer);
|
||||
}
|
||||
|
||||
#endif /* __UVM_COMMON_H__ */
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2021 NVIDIA Corporation
|
||||
Copyright (c) 2021-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -33,44 +33,55 @@
|
||||
#include "nv_uvm_interface.h"
|
||||
#include "uvm_va_block.h"
|
||||
|
||||
// The maximum number of secure operations per push is:
|
||||
// UVM_MAX_PUSH_SIZE / min(CE encryption size, CE decryption size)
|
||||
// + 1 (tracking semaphore) = 128 * 1024 / 56 + 1 = 2342
|
||||
#define UVM_CONF_COMPUTING_IV_REMAINING_LIMIT_MIN 2342lu
|
||||
|
||||
// Channels use 32-bit counters so the value after rotation is 0xffffffff.
|
||||
// setting the limit to this value (or higher) will result in rotation
|
||||
// on every check. However, pre-emptive rotation when submitting control
|
||||
// GPFIFO entries relies on the fact that multiple successive checks after
|
||||
// rotation do not trigger more rotations if there was no IV used in between.
|
||||
#define UVM_CONF_COMPUTING_IV_REMAINING_LIMIT_MAX 0xfffffffelu
|
||||
|
||||
// Attempt rotation when two billion IVs are left. IV rotation call can fail if
|
||||
// the necessary locks are not available, so multiple attempts may be need for
|
||||
// IV rotation to succeed.
|
||||
#define UVM_CONF_COMPUTING_IV_REMAINING_LIMIT_DEFAULT (1lu << 31)
|
||||
|
||||
// Start rotating after 500 encryption/decryptions when running tests.
|
||||
#define UVM_CONF_COMPUTING_IV_REMAINING_LIMIT_TESTS ((1lu << 32) - 500lu)
|
||||
static ulong uvm_conf_computing_channel_iv_rotation_limit = UVM_CONF_COMPUTING_IV_REMAINING_LIMIT_DEFAULT;
|
||||
|
||||
module_param(uvm_conf_computing_channel_iv_rotation_limit, ulong, S_IRUGO);
|
||||
|
||||
static UvmGpuConfComputeMode uvm_conf_computing_get_mode(const uvm_parent_gpu_t *parent)
|
||||
{
|
||||
return parent->rm_info.gpuConfComputeCaps.mode;
|
||||
}
|
||||
|
||||
bool uvm_conf_computing_mode_enabled_parent(const uvm_parent_gpu_t *parent)
|
||||
{
|
||||
return uvm_conf_computing_get_mode(parent) != UVM_GPU_CONF_COMPUTE_MODE_NONE;
|
||||
}
|
||||
|
||||
bool uvm_conf_computing_mode_enabled(const uvm_gpu_t *gpu)
|
||||
{
|
||||
return uvm_conf_computing_mode_enabled_parent(gpu->parent);
|
||||
}
|
||||
|
||||
bool uvm_conf_computing_mode_is_hcc(const uvm_gpu_t *gpu)
|
||||
{
|
||||
return uvm_conf_computing_get_mode(gpu->parent) == UVM_GPU_CONF_COMPUTE_MODE_HCC;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_conf_computing_init_parent_gpu(const uvm_parent_gpu_t *parent)
|
||||
void uvm_conf_computing_check_parent_gpu(const uvm_parent_gpu_t *parent)
|
||||
{
|
||||
UvmGpuConfComputeMode cc, sys_cc;
|
||||
uvm_gpu_t *first;
|
||||
uvm_parent_gpu_t *other_parent;
|
||||
UvmGpuConfComputeMode parent_mode = uvm_conf_computing_get_mode(parent);
|
||||
|
||||
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
|
||||
|
||||
// TODO: Bug 2844714: since we have no routine to traverse parent GPUs,
|
||||
// find first child GPU and get its parent.
|
||||
first = uvm_global_processor_mask_find_first_gpu(&g_uvm_global.retained_gpus);
|
||||
if (!first)
|
||||
return NV_OK;
|
||||
// The Confidential Computing state of the GPU should match that of the
|
||||
// system.
|
||||
UVM_ASSERT((parent_mode != UVM_GPU_CONF_COMPUTE_MODE_NONE) == g_uvm_global.conf_computing_enabled);
|
||||
|
||||
sys_cc = uvm_conf_computing_get_mode(first->parent);
|
||||
cc = uvm_conf_computing_get_mode(parent);
|
||||
|
||||
return cc == sys_cc ? NV_OK : NV_ERR_NOT_SUPPORTED;
|
||||
// All GPUs derive Confidential Computing status from their parent. By
|
||||
// current policy all parent GPUs have identical Confidential Computing
|
||||
// status.
|
||||
for_each_parent_gpu(other_parent)
|
||||
UVM_ASSERT(parent_mode == uvm_conf_computing_get_mode(other_parent));
|
||||
}
|
||||
|
||||
static void dma_buffer_destroy_locked(uvm_conf_computing_dma_buffer_pool_t *dma_buffer_pool,
|
||||
@ -184,15 +195,11 @@ static void dma_buffer_pool_add(uvm_conf_computing_dma_buffer_pool_t *dma_buffer
|
||||
static NV_STATUS conf_computing_dma_buffer_pool_init(uvm_conf_computing_dma_buffer_pool_t *dma_buffer_pool)
|
||||
{
|
||||
size_t i;
|
||||
uvm_gpu_t *gpu;
|
||||
size_t num_dma_buffers = 32;
|
||||
NV_STATUS status = NV_OK;
|
||||
|
||||
UVM_ASSERT(dma_buffer_pool->num_dma_buffers == 0);
|
||||
|
||||
gpu = dma_buffer_pool_to_gpu(dma_buffer_pool);
|
||||
|
||||
UVM_ASSERT(uvm_conf_computing_mode_enabled(gpu));
|
||||
UVM_ASSERT(g_uvm_global.conf_computing_enabled);
|
||||
|
||||
INIT_LIST_HEAD(&dma_buffer_pool->free_dma_buffers);
|
||||
uvm_mutex_init(&dma_buffer_pool->lock, UVM_LOCK_ORDER_CONF_COMPUTING_DMA_BUFFER_POOL);
|
||||
@ -349,7 +356,7 @@ NV_STATUS uvm_conf_computing_gpu_init(uvm_gpu_t *gpu)
|
||||
{
|
||||
NV_STATUS status;
|
||||
|
||||
if (!uvm_conf_computing_mode_enabled(gpu))
|
||||
if (!g_uvm_global.conf_computing_enabled)
|
||||
return NV_OK;
|
||||
|
||||
status = conf_computing_dma_buffer_pool_init(&gpu->conf_computing.dma_buffer_pool);
|
||||
@ -360,6 +367,20 @@ NV_STATUS uvm_conf_computing_gpu_init(uvm_gpu_t *gpu)
|
||||
if (status != NV_OK)
|
||||
goto error;
|
||||
|
||||
if (uvm_enable_builtin_tests && uvm_conf_computing_channel_iv_rotation_limit == UVM_CONF_COMPUTING_IV_REMAINING_LIMIT_DEFAULT)
|
||||
uvm_conf_computing_channel_iv_rotation_limit = UVM_CONF_COMPUTING_IV_REMAINING_LIMIT_TESTS;
|
||||
|
||||
if (uvm_conf_computing_channel_iv_rotation_limit < UVM_CONF_COMPUTING_IV_REMAINING_LIMIT_MIN ||
|
||||
uvm_conf_computing_channel_iv_rotation_limit > UVM_CONF_COMPUTING_IV_REMAINING_LIMIT_MAX) {
|
||||
UVM_ERR_PRINT("Value of uvm_conf_computing_channel_iv_rotation_limit: %lu is outside of the safe "
|
||||
"range: <%lu, %lu>. Using the default value instead (%lu)\n",
|
||||
uvm_conf_computing_channel_iv_rotation_limit,
|
||||
UVM_CONF_COMPUTING_IV_REMAINING_LIMIT_MIN,
|
||||
UVM_CONF_COMPUTING_IV_REMAINING_LIMIT_MAX,
|
||||
UVM_CONF_COMPUTING_IV_REMAINING_LIMIT_DEFAULT);
|
||||
uvm_conf_computing_channel_iv_rotation_limit = UVM_CONF_COMPUTING_IV_REMAINING_LIMIT_DEFAULT;
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
|
||||
error:
|
||||
@ -381,9 +402,8 @@ void uvm_conf_computing_log_gpu_encryption(uvm_channel_t *channel, UvmCslIv *iv)
|
||||
status = nvUvmInterfaceCslIncrementIv(&channel->csl.ctx, UVM_CSL_OPERATION_DECRYPT, 1, iv);
|
||||
uvm_mutex_unlock(&channel->csl.ctx_lock);
|
||||
|
||||
// TODO: Bug 4014720: If nvUvmInterfaceCslIncrementIv returns with
|
||||
// NV_ERR_INSUFFICIENT_RESOURCES then the IV needs to be rotated via
|
||||
// nvUvmInterfaceCslRotateIv.
|
||||
// IV rotation is done preemptively as needed, so the above
|
||||
// call cannot return failure.
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
}
|
||||
|
||||
@ -395,9 +415,8 @@ void uvm_conf_computing_acquire_encryption_iv(uvm_channel_t *channel, UvmCslIv *
|
||||
status = nvUvmInterfaceCslIncrementIv(&channel->csl.ctx, UVM_CSL_OPERATION_ENCRYPT, 1, iv);
|
||||
uvm_mutex_unlock(&channel->csl.ctx_lock);
|
||||
|
||||
// TODO: Bug 4014720: If nvUvmInterfaceCslIncrementIv returns with
|
||||
// NV_ERR_INSUFFICIENT_RESOURCES then the IV needs to be rotated via
|
||||
// nvUvmInterfaceCslRotateIv.
|
||||
// IV rotation is done preemptively as needed, so the above
|
||||
// call cannot return failure.
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
}
|
||||
|
||||
@ -421,8 +440,8 @@ void uvm_conf_computing_cpu_encrypt(uvm_channel_t *channel,
|
||||
(NvU8 *) auth_tag_buffer);
|
||||
uvm_mutex_unlock(&channel->csl.ctx_lock);
|
||||
|
||||
// nvUvmInterfaceCslEncrypt fails when a 64-bit encryption counter
|
||||
// overflows. This is not supposed to happen on CC.
|
||||
// IV rotation is done preemptively as needed, so the above
|
||||
// call cannot return failure.
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
}
|
||||
|
||||
@ -435,6 +454,16 @@ NV_STATUS uvm_conf_computing_cpu_decrypt(uvm_channel_t *channel,
|
||||
{
|
||||
NV_STATUS status;
|
||||
|
||||
// The CSL context associated with a channel can be used by multiple
|
||||
// threads. The IV sequence is thus guaranteed only while the channel is
|
||||
// "locked for push". The channel/push lock is released in
|
||||
// "uvm_channel_end_push", and at that time the GPU encryption operations
|
||||
// have not executed, yet. Therefore the caller has to use
|
||||
// "uvm_conf_computing_log_gpu_encryption" to explicitly store IVs needed
|
||||
// to perform CPU decryption and pass those IVs to this function after the
|
||||
// push that did the encryption completes.
|
||||
UVM_ASSERT(src_iv);
|
||||
|
||||
uvm_mutex_lock(&channel->csl.ctx_lock);
|
||||
status = nvUvmInterfaceCslDecrypt(&channel->csl.ctx,
|
||||
size,
|
||||
@ -463,7 +492,7 @@ NV_STATUS uvm_conf_computing_fault_decrypt(uvm_parent_gpu_t *parent_gpu,
|
||||
// decryption is invoked as part of fault servicing.
|
||||
UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.replayable_faults.service_lock));
|
||||
|
||||
UVM_ASSERT(!uvm_parent_gpu_replayable_fault_buffer_is_uvm_owned(parent_gpu));
|
||||
UVM_ASSERT(g_uvm_global.conf_computing_enabled);
|
||||
|
||||
status = nvUvmInterfaceCslDecrypt(&parent_gpu->fault_buffer_info.rm_info.replayable.cslCtx,
|
||||
parent_gpu->fault_buffer_hal->entry_size(parent_gpu),
|
||||
@ -475,7 +504,9 @@ NV_STATUS uvm_conf_computing_fault_decrypt(uvm_parent_gpu_t *parent_gpu,
|
||||
(const NvU8 *) auth_tag_buffer);
|
||||
|
||||
if (status != NV_OK)
|
||||
UVM_ERR_PRINT("nvUvmInterfaceCslDecrypt() failed: %s, GPU %s\n", nvstatusToString(status), parent_gpu->name);
|
||||
UVM_ERR_PRINT("nvUvmInterfaceCslDecrypt() failed: %s, GPU %s\n",
|
||||
nvstatusToString(status),
|
||||
uvm_parent_gpu_name(parent_gpu));
|
||||
|
||||
return status;
|
||||
}
|
||||
@ -487,7 +518,7 @@ void uvm_conf_computing_fault_increment_decrypt_iv(uvm_parent_gpu_t *parent_gpu,
|
||||
// See comment in uvm_conf_computing_fault_decrypt
|
||||
UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.replayable_faults.service_lock));
|
||||
|
||||
UVM_ASSERT(!uvm_parent_gpu_replayable_fault_buffer_is_uvm_owned(parent_gpu));
|
||||
UVM_ASSERT(g_uvm_global.conf_computing_enabled);
|
||||
|
||||
status = nvUvmInterfaceCslIncrementIv(&parent_gpu->fault_buffer_info.rm_info.replayable.cslCtx,
|
||||
UVM_CSL_OPERATION_DECRYPT,
|
||||
@ -496,3 +527,101 @@ void uvm_conf_computing_fault_increment_decrypt_iv(uvm_parent_gpu_t *parent_gpu,
|
||||
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
}
|
||||
|
||||
void uvm_conf_computing_query_message_pools(uvm_channel_t *channel,
|
||||
NvU64 *remaining_encryptions,
|
||||
NvU64 *remaining_decryptions)
|
||||
{
|
||||
NV_STATUS status;
|
||||
|
||||
UVM_ASSERT(channel);
|
||||
UVM_ASSERT(remaining_encryptions);
|
||||
UVM_ASSERT(remaining_decryptions);
|
||||
|
||||
uvm_mutex_lock(&channel->csl.ctx_lock);
|
||||
status = nvUvmInterfaceCslQueryMessagePool(&channel->csl.ctx, UVM_CSL_OPERATION_ENCRYPT, remaining_encryptions);
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
UVM_ASSERT(*remaining_encryptions <= NV_U32_MAX);
|
||||
|
||||
status = nvUvmInterfaceCslQueryMessagePool(&channel->csl.ctx, UVM_CSL_OPERATION_DECRYPT, remaining_decryptions);
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
UVM_ASSERT(*remaining_decryptions <= NV_U32_MAX);
|
||||
|
||||
// LCIC channels never use CPU encrypt/GPU decrypt
|
||||
if (uvm_channel_is_lcic(channel))
|
||||
UVM_ASSERT(*remaining_encryptions == NV_U32_MAX);
|
||||
|
||||
uvm_mutex_unlock(&channel->csl.ctx_lock);
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_conf_computing_rotate_channel_ivs_below_limit_internal(uvm_channel_t *channel, NvU64 limit)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
NvU64 remaining_encryptions, remaining_decryptions;
|
||||
bool rotate_encryption_iv, rotate_decryption_iv;
|
||||
|
||||
UVM_ASSERT(uvm_channel_is_locked_for_push(channel) ||
|
||||
(uvm_channel_is_lcic(channel) && uvm_channel_manager_is_wlc_ready(channel->pool->manager)));
|
||||
|
||||
uvm_conf_computing_query_message_pools(channel, &remaining_encryptions, &remaining_decryptions);
|
||||
|
||||
// Ignore decryption limit for SEC2, only CE channels support
|
||||
// GPU encrypt/CPU decrypt. However, RM reports _some_ decrementing
|
||||
// value for SEC2 decryption counter.
|
||||
rotate_decryption_iv = (remaining_decryptions <= limit) && uvm_channel_is_ce(channel);
|
||||
rotate_encryption_iv = remaining_encryptions <= limit;
|
||||
|
||||
if (!rotate_encryption_iv && !rotate_decryption_iv)
|
||||
return NV_OK;
|
||||
|
||||
// Wait for all in-flight pushes. The caller needs to guarantee that there
|
||||
// are no concurrent pushes created, e.g. by only calling rotate after
|
||||
// a channel is locked_for_push.
|
||||
status = uvm_channel_wait(channel);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
uvm_mutex_lock(&channel->csl.ctx_lock);
|
||||
|
||||
if (rotate_encryption_iv)
|
||||
status = nvUvmInterfaceCslRotateIv(&channel->csl.ctx, UVM_CSL_OPERATION_ENCRYPT);
|
||||
|
||||
if (status == NV_OK && rotate_decryption_iv)
|
||||
status = nvUvmInterfaceCslRotateIv(&channel->csl.ctx, UVM_CSL_OPERATION_DECRYPT);
|
||||
|
||||
uvm_mutex_unlock(&channel->csl.ctx_lock);
|
||||
|
||||
// Change the error to out of resources if the available IVs are running
|
||||
// too low
|
||||
if (status == NV_ERR_STATE_IN_USE &&
|
||||
(remaining_encryptions < UVM_CONF_COMPUTING_IV_REMAINING_LIMIT_MIN ||
|
||||
remaining_decryptions < UVM_CONF_COMPUTING_IV_REMAINING_LIMIT_MIN))
|
||||
return NV_ERR_INSUFFICIENT_RESOURCES;
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_conf_computing_rotate_channel_ivs_below_limit(uvm_channel_t *channel, NvU64 limit, bool retry_if_busy)
|
||||
{
|
||||
NV_STATUS status;
|
||||
|
||||
do {
|
||||
status = uvm_conf_computing_rotate_channel_ivs_below_limit_internal(channel, limit);
|
||||
} while (retry_if_busy && status == NV_ERR_STATE_IN_USE);
|
||||
|
||||
// Hide "busy" error. The rotation will be retried at the next opportunity.
|
||||
if (!retry_if_busy && status == NV_ERR_STATE_IN_USE)
|
||||
status = NV_OK;
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_conf_computing_maybe_rotate_channel_ivs(uvm_channel_t *channel)
|
||||
{
|
||||
return uvm_conf_computing_rotate_channel_ivs_below_limit(channel, uvm_conf_computing_channel_iv_rotation_limit, false);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_conf_computing_maybe_rotate_channel_ivs_retry_busy(uvm_channel_t *channel)
|
||||
{
|
||||
return uvm_conf_computing_rotate_channel_ivs_below_limit(channel, uvm_conf_computing_channel_iv_rotation_limit, true);
|
||||
}
|
||||
|
@ -60,12 +60,8 @@
|
||||
// UVM_METHOD_SIZE * 2 * 10 = 80.
|
||||
#define UVM_CONF_COMPUTING_SIGN_BUF_MAX_SIZE 80
|
||||
|
||||
// All GPUs derive confidential computing status from their parent.
|
||||
// By current policy all parent GPUs have identical confidential
|
||||
// computing status.
|
||||
NV_STATUS uvm_conf_computing_init_parent_gpu(const uvm_parent_gpu_t *parent);
|
||||
bool uvm_conf_computing_mode_enabled_parent(const uvm_parent_gpu_t *parent);
|
||||
bool uvm_conf_computing_mode_enabled(const uvm_gpu_t *gpu);
|
||||
void uvm_conf_computing_check_parent_gpu(const uvm_parent_gpu_t *parent);
|
||||
|
||||
bool uvm_conf_computing_mode_is_hcc(const uvm_gpu_t *gpu);
|
||||
|
||||
typedef struct
|
||||
@ -201,4 +197,21 @@ NV_STATUS uvm_conf_computing_fault_decrypt(uvm_parent_gpu_t *parent_gpu,
|
||||
//
|
||||
// Locking: this function must be invoked while holding the replayable ISR lock.
|
||||
void uvm_conf_computing_fault_increment_decrypt_iv(uvm_parent_gpu_t *parent_gpu, NvU64 increment);
|
||||
|
||||
// Query the number of remaining messages before IV needs to be rotated.
|
||||
void uvm_conf_computing_query_message_pools(uvm_channel_t *channel,
|
||||
NvU64 *remaining_encryptions,
|
||||
NvU64 *remaining_decryptions);
|
||||
|
||||
// Check if there are more than uvm_conf_computing_channel_iv_rotation_limit
|
||||
// messages available in the channel and try to rotate if not.
|
||||
NV_STATUS uvm_conf_computing_maybe_rotate_channel_ivs(uvm_channel_t *channel);
|
||||
|
||||
// Check if there are more than uvm_conf_computing_channel_iv_rotation_limit
|
||||
// messages available in the channel and rotate if not.
|
||||
NV_STATUS uvm_conf_computing_maybe_rotate_channel_ivs_retry_busy(uvm_channel_t *channel);
|
||||
|
||||
// Check if there are fewer than 'limit' messages available in either direction
|
||||
// and rotate if not.
|
||||
NV_STATUS uvm_conf_computing_rotate_channel_ivs_below_limit(uvm_channel_t *channel, NvU64 limit, bool retry_if_busy);
|
||||
#endif // __UVM_CONF_COMPUTING_H__
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2016-2019 NVIDIA Corporation
|
||||
Copyright (c) 2016-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -34,23 +34,24 @@ NV_STATUS uvm_test_fault_buffer_flush(UVM_TEST_FAULT_BUFFER_FLUSH_PARAMS *params
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_va_space_t *va_space = uvm_va_space_get(filp);
|
||||
uvm_gpu_t *gpu;
|
||||
uvm_global_processor_mask_t retained_gpus;
|
||||
uvm_processor_mask_t *retained_gpus;
|
||||
NvU64 i;
|
||||
|
||||
uvm_global_processor_mask_zero(&retained_gpus);
|
||||
retained_gpus = uvm_processor_mask_cache_alloc();
|
||||
if (!retained_gpus)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
uvm_processor_mask_zero(retained_gpus);
|
||||
|
||||
uvm_va_space_down_read(va_space);
|
||||
|
||||
for_each_va_space_gpu(gpu, va_space) {
|
||||
if (gpu->parent->replayable_faults_supported)
|
||||
uvm_global_processor_mask_set(&retained_gpus, gpu->global_id);
|
||||
}
|
||||
uvm_processor_mask_and(retained_gpus, &va_space->faultable_processors, &va_space->registered_gpus);
|
||||
|
||||
uvm_global_mask_retain(&retained_gpus);
|
||||
uvm_global_gpu_retain(retained_gpus);
|
||||
|
||||
uvm_va_space_up_read(va_space);
|
||||
|
||||
if (uvm_global_processor_mask_empty(&retained_gpus))
|
||||
if (uvm_processor_mask_empty(retained_gpus))
|
||||
return NV_ERR_INVALID_DEVICE;
|
||||
|
||||
for (i = 0; i < params->iterations; i++) {
|
||||
@ -59,11 +60,12 @@ NV_STATUS uvm_test_fault_buffer_flush(UVM_TEST_FAULT_BUFFER_FLUSH_PARAMS *params
|
||||
break;
|
||||
}
|
||||
|
||||
for_each_global_gpu_in_mask(gpu, &retained_gpus)
|
||||
for_each_gpu_in_mask(gpu, retained_gpus)
|
||||
TEST_CHECK_GOTO(uvm_gpu_fault_buffer_flush(gpu) == NV_OK, out);
|
||||
}
|
||||
|
||||
out:
|
||||
uvm_global_mask_release(&retained_gpus);
|
||||
uvm_global_gpu_release(retained_gpus);
|
||||
uvm_processor_mask_cache_free(retained_gpus);
|
||||
return status;
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2016-2021 NVidia Corporation
|
||||
Copyright (c) 2016-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -151,6 +151,22 @@ static NV_STATUS verify_mapping_info(uvm_va_space_t *va_space,
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static void fix_memory_info_uuid(uvm_va_space_t *va_space, UvmGpuMemoryInfo *mem_info)
|
||||
{
|
||||
uvm_gpu_t *gpu;
|
||||
|
||||
// TODO: Bug 4351121: RM will return the GI UUID, but
|
||||
// uvm_va_space_get_gpu_by_uuid() currently matches on physical GPU UUIDs.
|
||||
// Match on GI UUID until the UVM user level API has been updated to use
|
||||
// the GI UUID.
|
||||
for_each_va_space_gpu(gpu, va_space) {
|
||||
if (uvm_uuid_eq(&gpu->uuid, &mem_info->uuid)) {
|
||||
mem_info->uuid = gpu->parent->uuid;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static NV_STATUS test_get_rm_ptes_single_gpu(uvm_va_space_t *va_space, UVM_TEST_GET_RM_PTES_PARAMS *params)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
@ -168,7 +184,8 @@ static NV_STATUS test_get_rm_ptes_single_gpu(uvm_va_space_t *va_space, UVM_TEST_
|
||||
client = params->hClient;
|
||||
memory = params->hMemory;
|
||||
|
||||
// Note: This check is safe as single GPU test does not run on SLI enabled devices.
|
||||
// Note: This check is safe as single GPU test does not run on SLI enabled
|
||||
// devices.
|
||||
memory_mapping_gpu = uvm_va_space_get_gpu_by_uuid_with_gpu_va_space(va_space, ¶ms->gpu_uuid);
|
||||
if (!memory_mapping_gpu)
|
||||
return NV_ERR_INVALID_DEVICE;
|
||||
@ -180,7 +197,12 @@ static NV_STATUS test_get_rm_ptes_single_gpu(uvm_va_space_t *va_space, UVM_TEST_
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
TEST_CHECK_GOTO(uvm_processor_uuid_eq(&memory_info.uuid, ¶ms->gpu_uuid), done);
|
||||
// TODO: Bug 4351121: RM will return the GI UUID. Replace it with the
|
||||
// physical GPU UUID until the UVM user level has been updated to use
|
||||
// the GI UUID.
|
||||
fix_memory_info_uuid(va_space, &memory_info);
|
||||
|
||||
TEST_CHECK_GOTO(uvm_uuid_eq(&memory_info.uuid, ¶ms->gpu_uuid), done);
|
||||
|
||||
TEST_CHECK_GOTO((memory_info.size == params->size), done);
|
||||
|
||||
@ -287,6 +309,11 @@ static NV_STATUS test_get_rm_ptes_multi_gpu(uvm_va_space_t *va_space, UVM_TEST_G
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
// TODO: Bug 4351121: RM will return the GI UUID. Replace it with the
|
||||
// physical GPU UUID until the UVM user level has been updated to use
|
||||
// the GI UUID.
|
||||
fix_memory_info_uuid(va_space, &memory_info);
|
||||
|
||||
memset(&ext_mapping_info, 0, sizeof(ext_mapping_info));
|
||||
|
||||
memset(pte_buffer, 0, sizeof(pte_buffer));
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2022 NVIDIA Corporation
|
||||
Copyright (c) 2015-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -27,6 +27,7 @@
|
||||
#include "uvm_gpu_replayable_faults.h"
|
||||
#include "uvm_mem.h"
|
||||
#include "uvm_perf_events.h"
|
||||
#include "uvm_processors.h"
|
||||
#include "uvm_procfs.h"
|
||||
#include "uvm_thread_context.h"
|
||||
#include "uvm_va_range.h"
|
||||
@ -71,11 +72,6 @@ static void uvm_unregister_callbacks(void)
|
||||
}
|
||||
}
|
||||
|
||||
static void sev_init(const UvmPlatformInfo *platform_info)
|
||||
{
|
||||
g_uvm_global.sev_enabled = platform_info->sevEnabled;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_global_init(void)
|
||||
{
|
||||
NV_STATUS status;
|
||||
@ -124,8 +120,13 @@ NV_STATUS uvm_global_init(void)
|
||||
|
||||
uvm_ats_init(&platform_info);
|
||||
g_uvm_global.num_simulated_devices = 0;
|
||||
g_uvm_global.conf_computing_enabled = platform_info.confComputingEnabled;
|
||||
|
||||
sev_init(&platform_info);
|
||||
status = uvm_processor_mask_cache_init();
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("uvm_processor_mask_cache_init() failed: %s\n", nvstatusToString(status));
|
||||
goto error;
|
||||
}
|
||||
|
||||
status = uvm_gpu_init();
|
||||
if (status != NV_OK) {
|
||||
@ -229,6 +230,7 @@ void uvm_global_exit(void)
|
||||
uvm_mem_global_exit();
|
||||
uvm_pmm_sysmem_exit();
|
||||
uvm_gpu_exit();
|
||||
uvm_processor_mask_cache_exit();
|
||||
|
||||
if (g_uvm_global.rm_session_handle != 0)
|
||||
uvm_rm_locked_call_void(nvUvmInterfaceSessionDestroy(g_uvm_global.rm_session_handle));
|
||||
@ -247,19 +249,19 @@ void uvm_global_exit(void)
|
||||
|
||||
// Signal to the top-half ISR whether calls from the RM's top-half ISR are to
|
||||
// be completed without processing.
|
||||
static void uvm_gpu_set_isr_suspended(uvm_gpu_t *gpu, bool is_suspended)
|
||||
static void uvm_parent_gpu_set_isr_suspended(uvm_parent_gpu_t *parent_gpu, bool is_suspended)
|
||||
{
|
||||
uvm_spin_lock_irqsave(&gpu->parent->isr.interrupts_lock);
|
||||
uvm_spin_lock_irqsave(&parent_gpu->isr.interrupts_lock);
|
||||
|
||||
gpu->parent->isr.is_suspended = is_suspended;
|
||||
parent_gpu->isr.is_suspended = is_suspended;
|
||||
|
||||
uvm_spin_unlock_irqrestore(&gpu->parent->isr.interrupts_lock);
|
||||
uvm_spin_unlock_irqrestore(&parent_gpu->isr.interrupts_lock);
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_suspend(void)
|
||||
{
|
||||
uvm_va_space_t *va_space = NULL;
|
||||
uvm_global_gpu_id_t gpu_id;
|
||||
uvm_gpu_id_t gpu_id;
|
||||
uvm_gpu_t *gpu;
|
||||
|
||||
// Upon entry into this function, the following is true:
|
||||
@ -293,7 +295,7 @@ static NV_STATUS uvm_suspend(void)
|
||||
// Though global_lock isn't held here, pm.lock indirectly prevents the
|
||||
// addition and removal of GPUs, since these operations can currently
|
||||
// only occur in response to ioctl() calls.
|
||||
for_each_global_gpu_id_in_mask(gpu_id, &g_uvm_global.retained_gpus) {
|
||||
for_each_gpu_id_in_mask(gpu_id, &g_uvm_global.retained_gpus) {
|
||||
gpu = uvm_gpu_get(gpu_id);
|
||||
|
||||
// Since fault buffer state may be lost across sleep cycles, UVM must
|
||||
@ -314,7 +316,7 @@ static NV_STATUS uvm_suspend(void)
|
||||
// notifications have been handled.
|
||||
uvm_gpu_access_counters_set_ignore(gpu, true);
|
||||
|
||||
uvm_gpu_set_isr_suspended(gpu, true);
|
||||
uvm_parent_gpu_set_isr_suspended(gpu->parent, true);
|
||||
|
||||
nv_kthread_q_flush(&gpu->parent->isr.bottom_half_q);
|
||||
|
||||
@ -347,7 +349,7 @@ NV_STATUS uvm_suspend_entry(void)
|
||||
static NV_STATUS uvm_resume(void)
|
||||
{
|
||||
uvm_va_space_t *va_space = NULL;
|
||||
uvm_global_gpu_id_t gpu_id;
|
||||
uvm_gpu_id_t gpu_id;
|
||||
uvm_gpu_t *gpu;
|
||||
|
||||
g_uvm_global.pm.is_suspended = false;
|
||||
@ -366,14 +368,14 @@ static NV_STATUS uvm_resume(void)
|
||||
uvm_mutex_unlock(&g_uvm_global.va_spaces.lock);
|
||||
|
||||
// pm.lock is held in lieu of global_lock to prevent GPU addition/removal
|
||||
for_each_global_gpu_id_in_mask(gpu_id, &g_uvm_global.retained_gpus) {
|
||||
for_each_gpu_id_in_mask(gpu_id, &g_uvm_global.retained_gpus) {
|
||||
gpu = uvm_gpu_get(gpu_id);
|
||||
|
||||
// Bring the fault buffer software state back in sync with the
|
||||
// hardware state.
|
||||
uvm_gpu_fault_buffer_resume(gpu->parent);
|
||||
|
||||
uvm_gpu_set_isr_suspended(gpu, false);
|
||||
uvm_parent_gpu_set_isr_suspended(gpu->parent, false);
|
||||
|
||||
// Reenable access counter interrupt processing unless notifications
|
||||
// have been set to be suppressed.
|
||||
@ -431,35 +433,36 @@ NV_STATUS uvm_global_reset_fatal_error(void)
|
||||
return nv_atomic_xchg(&g_uvm_global.fatal_error, NV_OK);
|
||||
}
|
||||
|
||||
void uvm_global_mask_retain(const uvm_global_processor_mask_t *mask)
|
||||
void uvm_global_gpu_retain(const uvm_processor_mask_t *mask)
|
||||
{
|
||||
uvm_gpu_t *gpu;
|
||||
for_each_global_gpu_in_mask(gpu, mask)
|
||||
|
||||
for_each_gpu_in_mask(gpu, mask)
|
||||
uvm_gpu_retain(gpu);
|
||||
}
|
||||
|
||||
void uvm_global_mask_release(const uvm_global_processor_mask_t *mask)
|
||||
void uvm_global_gpu_release(const uvm_processor_mask_t *mask)
|
||||
{
|
||||
uvm_global_gpu_id_t gpu_id;
|
||||
uvm_gpu_id_t gpu_id;
|
||||
|
||||
if (uvm_global_processor_mask_empty(mask))
|
||||
if (uvm_processor_mask_empty(mask))
|
||||
return;
|
||||
|
||||
uvm_mutex_lock(&g_uvm_global.global_lock);
|
||||
|
||||
// Do not use for_each_global_gpu_in_mask as it reads the GPU state and it
|
||||
// might get destroyed
|
||||
for_each_global_gpu_id_in_mask(gpu_id, mask)
|
||||
// Do not use for_each_gpu_in_mask as it reads the GPU state and it
|
||||
// might get destroyed.
|
||||
for_each_gpu_id_in_mask(gpu_id, mask)
|
||||
uvm_gpu_release_locked(uvm_gpu_get(gpu_id));
|
||||
|
||||
uvm_mutex_unlock(&g_uvm_global.global_lock);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_global_mask_check_ecc_error(uvm_global_processor_mask_t *gpus)
|
||||
NV_STATUS uvm_global_gpu_check_ecc_error(uvm_processor_mask_t *gpus)
|
||||
{
|
||||
uvm_gpu_t *gpu;
|
||||
|
||||
for_each_global_gpu_in_mask(gpu, gpus) {
|
||||
for_each_gpu_in_mask(gpu, gpus) {
|
||||
NV_STATUS status = uvm_gpu_check_ecc_error(gpu);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2021 NVIDIA Corporation
|
||||
Copyright (c) 2015-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -40,13 +40,13 @@ struct uvm_global_struct
|
||||
// Note that GPUs are added to this mask as the last step of add_gpu() and
|
||||
// removed from it as the first step of remove_gpu() implying that a GPU
|
||||
// that's being initialized or deinitialized will not be in it.
|
||||
uvm_global_processor_mask_t retained_gpus;
|
||||
uvm_processor_mask_t retained_gpus;
|
||||
|
||||
// Array of the parent GPUs registered with UVM. Note that GPUs will have
|
||||
// ids offset by 1 to accomodate the UVM_GLOBAL_ID_CPU so e.g.
|
||||
// parent_gpus[0] will have GPU id = 1. A GPU entry is unused iff it does
|
||||
// not exist (is a NULL pointer) in this table.
|
||||
uvm_parent_gpu_t *parent_gpus[UVM_MAX_GPUS];
|
||||
// ids offset by 1 to accomodate the UVM_ID_CPU so e.g., parent_gpus[0]
|
||||
// will have GPU id = 1. A GPU entry is unused iff it does not exist
|
||||
// (is a NULL pointer) in this table.
|
||||
uvm_parent_gpu_t *parent_gpus[UVM_PARENT_ID_MAX_GPUS];
|
||||
|
||||
// A global RM session (RM client)
|
||||
// Created on module load and destroyed on module unload
|
||||
@ -143,11 +143,16 @@ struct uvm_global_struct
|
||||
struct page *page;
|
||||
} unload_state;
|
||||
|
||||
// AMD Secure Encrypted Virtualization (SEV) status. True if VM has SEV
|
||||
// enabled. This field is set once during global initialization
|
||||
// (uvm_global_init), and can be read afterwards without acquiring any
|
||||
// locks.
|
||||
bool sev_enabled;
|
||||
// True if the VM has AMD's SEV, or equivalent HW security extensions such
|
||||
// as Intel's TDX, enabled. The flag is always false on the host.
|
||||
//
|
||||
// This value moves in tandem with that of Confidential Computing in the
|
||||
// GPU(s) in all supported configurations, so it is used as a proxy for the
|
||||
// Confidential Computing state.
|
||||
//
|
||||
// This field is set once during global initialization (uvm_global_init),
|
||||
// and can be read afterwards without acquiring any locks.
|
||||
bool conf_computing_enabled;
|
||||
};
|
||||
|
||||
// Initialize global uvm state
|
||||
@ -167,7 +172,7 @@ NV_STATUS uvm_resume_entry(void);
|
||||
// LOCKING: requires that you hold the global lock and gpu_table_lock
|
||||
static void uvm_global_add_parent_gpu(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
NvU32 gpu_index = uvm_id_gpu_index(parent_gpu->id);
|
||||
NvU32 gpu_index = uvm_parent_id_gpu_index(parent_gpu->id);
|
||||
|
||||
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
|
||||
uvm_assert_spinlock_locked(&g_uvm_global.gpu_table_lock);
|
||||
@ -181,7 +186,7 @@ static void uvm_global_add_parent_gpu(uvm_parent_gpu_t *parent_gpu)
|
||||
// LOCKING: requires that you hold the global lock and gpu_table_lock
|
||||
static void uvm_global_remove_parent_gpu(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
NvU32 gpu_index = uvm_id_gpu_index(parent_gpu->id);
|
||||
NvU32 gpu_index = uvm_parent_id_gpu_index(parent_gpu->id);
|
||||
|
||||
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
|
||||
uvm_assert_spinlock_locked(&g_uvm_global.gpu_table_lock);
|
||||
@ -196,41 +201,25 @@ static void uvm_global_remove_parent_gpu(uvm_parent_gpu_t *parent_gpu)
|
||||
//
|
||||
// LOCKING: requires that you hold the gpu_table_lock, the global lock, or have
|
||||
// retained at least one of the child GPUs.
|
||||
static uvm_parent_gpu_t *uvm_parent_gpu_get(uvm_gpu_id_t id)
|
||||
static uvm_parent_gpu_t *uvm_parent_gpu_get(uvm_parent_gpu_id_t id)
|
||||
{
|
||||
return g_uvm_global.parent_gpus[uvm_id_gpu_index(id)];
|
||||
return g_uvm_global.parent_gpus[uvm_parent_id_gpu_index(id)];
|
||||
}
|
||||
|
||||
// Get a gpu by its global id.
|
||||
// Get a gpu by its GPU id.
|
||||
// Returns a pointer to the GPU object, or NULL if not found.
|
||||
//
|
||||
// LOCKING: requires that you hold the gpu_table_lock, the global_lock, or have
|
||||
// retained the gpu.
|
||||
static uvm_gpu_t *uvm_gpu_get(uvm_global_gpu_id_t global_gpu_id)
|
||||
static uvm_gpu_t *uvm_gpu_get(uvm_gpu_id_t gpu_id)
|
||||
{
|
||||
uvm_parent_gpu_t *parent_gpu;
|
||||
|
||||
parent_gpu = g_uvm_global.parent_gpus[uvm_id_gpu_index_from_global_gpu_id(global_gpu_id)];
|
||||
parent_gpu = g_uvm_global.parent_gpus[uvm_parent_id_gpu_index_from_gpu_id(gpu_id)];
|
||||
if (!parent_gpu)
|
||||
return NULL;
|
||||
|
||||
return parent_gpu->gpus[uvm_global_id_sub_processor_index(global_gpu_id)];
|
||||
}
|
||||
|
||||
// Get a gpu by its processor id.
|
||||
// Returns a pointer to the GPU object, or NULL if not found.
|
||||
//
|
||||
// LOCKING: requires that you hold the gpu_table_lock, the global_lock, or have
|
||||
// retained the gpu.
|
||||
static uvm_gpu_t *uvm_gpu_get_by_processor_id(uvm_processor_id_t id)
|
||||
{
|
||||
uvm_global_gpu_id_t global_id = uvm_global_gpu_id_from_gpu_id(id);
|
||||
uvm_gpu_t *gpu = uvm_gpu_get(global_id);
|
||||
|
||||
if (gpu)
|
||||
UVM_ASSERT(!gpu->parent->smc.enabled);
|
||||
|
||||
return gpu;
|
||||
return parent_gpu->gpus[uvm_id_sub_processor_index(gpu_id)];
|
||||
}
|
||||
|
||||
static uvmGpuSessionHandle uvm_global_session_handle(void)
|
||||
@ -287,56 +276,57 @@ static NV_STATUS uvm_global_get_status(void)
|
||||
// reset call was made.
|
||||
NV_STATUS uvm_global_reset_fatal_error(void);
|
||||
|
||||
static uvm_gpu_t *uvm_global_processor_mask_find_first_gpu(const uvm_global_processor_mask_t *global_gpus)
|
||||
static uvm_gpu_t *uvm_processor_mask_find_first_gpu(const uvm_processor_mask_t *gpus)
|
||||
{
|
||||
uvm_gpu_t *gpu;
|
||||
uvm_global_gpu_id_t gpu_id = uvm_global_processor_mask_find_first_gpu_id(global_gpus);
|
||||
uvm_gpu_id_t gpu_id = uvm_processor_mask_find_first_gpu_id(gpus);
|
||||
|
||||
if (UVM_GLOBAL_ID_IS_INVALID(gpu_id))
|
||||
if (UVM_ID_IS_INVALID(gpu_id))
|
||||
return NULL;
|
||||
|
||||
gpu = uvm_gpu_get(gpu_id);
|
||||
|
||||
// If there is valid GPU id in the mask, assert that the corresponding
|
||||
// uvm_gpu_t is present. Otherwise it would stop a
|
||||
// for_each_global_gpu_in_mask() loop pre-maturely. Today, this could only
|
||||
// for_each_gpu_in_mask() loop pre-maturely. Today, this could only
|
||||
// happen in remove_gpu() because the GPU being removed is deleted from the
|
||||
// global table very early.
|
||||
UVM_ASSERT_MSG(gpu, "gpu_id %u\n", uvm_global_id_value(gpu_id));
|
||||
UVM_ASSERT_MSG(gpu, "gpu_id %u\n", uvm_id_value(gpu_id));
|
||||
|
||||
return gpu;
|
||||
}
|
||||
|
||||
static uvm_gpu_t *__uvm_global_processor_mask_find_next_gpu(const uvm_global_processor_mask_t *global_gpus, uvm_gpu_t *gpu)
|
||||
static uvm_gpu_t *__uvm_processor_mask_find_next_gpu(const uvm_processor_mask_t *gpus, uvm_gpu_t *gpu)
|
||||
{
|
||||
uvm_global_gpu_id_t gpu_id;
|
||||
uvm_gpu_id_t gpu_id;
|
||||
|
||||
UVM_ASSERT(gpu);
|
||||
|
||||
gpu_id = uvm_global_processor_mask_find_next_id(global_gpus, uvm_global_gpu_id_next(gpu->global_id));
|
||||
if (UVM_GLOBAL_ID_IS_INVALID(gpu_id))
|
||||
gpu_id = uvm_processor_mask_find_next_id(gpus, uvm_gpu_id_next(gpu->id));
|
||||
if (UVM_ID_IS_INVALID(gpu_id))
|
||||
return NULL;
|
||||
|
||||
gpu = uvm_gpu_get(gpu_id);
|
||||
|
||||
// See comment in uvm_global_processor_mask_find_first_gpu().
|
||||
UVM_ASSERT_MSG(gpu, "gpu_id %u\n", uvm_global_id_value(gpu_id));
|
||||
// See comment in uvm_processor_mask_find_first_gpu().
|
||||
UVM_ASSERT_MSG(gpu, "gpu_id %u\n", uvm_id_value(gpu_id));
|
||||
|
||||
return gpu;
|
||||
}
|
||||
|
||||
// Helper to iterate over all GPUs in the input mask
|
||||
#define for_each_global_gpu_in_mask(gpu, global_mask) \
|
||||
for (gpu = uvm_global_processor_mask_find_first_gpu(global_mask); \
|
||||
gpu != NULL; \
|
||||
gpu = __uvm_global_processor_mask_find_next_gpu(global_mask, gpu))
|
||||
#define for_each_gpu_in_mask(gpu, mask) \
|
||||
for (gpu = uvm_processor_mask_find_first_gpu(mask); \
|
||||
gpu != NULL; \
|
||||
gpu = __uvm_processor_mask_find_next_gpu(mask, gpu))
|
||||
|
||||
// Helper to iterate over all GPUs retained by the UVM driver (across all va spaces)
|
||||
#define for_each_global_gpu(gpu) \
|
||||
for (({uvm_assert_mutex_locked(&g_uvm_global.global_lock); \
|
||||
gpu = uvm_global_processor_mask_find_first_gpu(&g_uvm_global.retained_gpus);}); \
|
||||
gpu != NULL; \
|
||||
gpu = __uvm_global_processor_mask_find_next_gpu(&g_uvm_global.retained_gpus, gpu))
|
||||
// Helper to iterate over all GPUs retained by the UVM driver
|
||||
// (across all va spaces).
|
||||
#define for_each_gpu(gpu) \
|
||||
for (({uvm_assert_mutex_locked(&g_uvm_global.global_lock); \
|
||||
gpu = uvm_processor_mask_find_first_gpu(&g_uvm_global.retained_gpus);}); \
|
||||
gpu != NULL; \
|
||||
gpu = __uvm_processor_mask_find_next_gpu(&g_uvm_global.retained_gpus, gpu))
|
||||
|
||||
// LOCKING: Must hold either the global_lock or the gpu_table_lock
|
||||
static uvm_parent_gpu_t *uvm_global_find_next_parent_gpu(uvm_parent_gpu_t *parent_gpu)
|
||||
@ -344,7 +334,7 @@ static uvm_parent_gpu_t *uvm_global_find_next_parent_gpu(uvm_parent_gpu_t *paren
|
||||
NvU32 i;
|
||||
|
||||
if (parent_gpu) {
|
||||
NvU32 gpu_index = uvm_id_gpu_index(parent_gpu->id);
|
||||
NvU32 gpu_index = uvm_parent_id_gpu_index(parent_gpu->id);
|
||||
i = gpu_index + 1;
|
||||
}
|
||||
else {
|
||||
@ -353,7 +343,7 @@ static uvm_parent_gpu_t *uvm_global_find_next_parent_gpu(uvm_parent_gpu_t *paren
|
||||
|
||||
parent_gpu = NULL;
|
||||
|
||||
while (i < UVM_MAX_GPUS) {
|
||||
while (i < UVM_PARENT_ID_MAX_GPUS) {
|
||||
if (g_uvm_global.parent_gpus[i]) {
|
||||
parent_gpu = g_uvm_global.parent_gpus[i];
|
||||
break;
|
||||
@ -369,18 +359,18 @@ static uvm_parent_gpu_t *uvm_global_find_next_parent_gpu(uvm_parent_gpu_t *paren
|
||||
static uvm_gpu_t *uvm_gpu_find_next_valid_gpu_in_parent(uvm_parent_gpu_t *parent_gpu, uvm_gpu_t *cur_gpu)
|
||||
{
|
||||
uvm_gpu_t *gpu = NULL;
|
||||
uvm_global_gpu_id_t global_gpu_id;
|
||||
uvm_gpu_id_t gpu_id;
|
||||
NvU32 sub_processor_index;
|
||||
NvU32 cur_sub_processor_index;
|
||||
|
||||
UVM_ASSERT(parent_gpu);
|
||||
|
||||
global_gpu_id = uvm_global_gpu_id_from_gpu_id(parent_gpu->id);
|
||||
cur_sub_processor_index = cur_gpu ? uvm_global_id_sub_processor_index(cur_gpu->global_id) : -1;
|
||||
gpu_id = uvm_gpu_id_from_parent_gpu_id(parent_gpu->id);
|
||||
cur_sub_processor_index = cur_gpu ? uvm_id_sub_processor_index(cur_gpu->id) : -1;
|
||||
|
||||
sub_processor_index = find_next_bit(parent_gpu->valid_gpus, UVM_ID_MAX_SUB_PROCESSORS, cur_sub_processor_index + 1);
|
||||
if (sub_processor_index < UVM_ID_MAX_SUB_PROCESSORS) {
|
||||
gpu = uvm_gpu_get(uvm_global_id_from_value(uvm_global_id_value(global_gpu_id) + sub_processor_index));
|
||||
sub_processor_index = find_next_bit(parent_gpu->valid_gpus, UVM_PARENT_ID_MAX_SUB_PROCESSORS, cur_sub_processor_index + 1);
|
||||
if (sub_processor_index < UVM_PARENT_ID_MAX_SUB_PROCESSORS) {
|
||||
gpu = uvm_gpu_get(uvm_id_from_value(uvm_id_value(gpu_id) + sub_processor_index));
|
||||
UVM_ASSERT(gpu != NULL);
|
||||
}
|
||||
|
||||
@ -400,18 +390,18 @@ static uvm_gpu_t *uvm_gpu_find_next_valid_gpu_in_parent(uvm_parent_gpu_t *parent
|
||||
(gpu) != NULL; \
|
||||
(gpu) = uvm_gpu_find_next_valid_gpu_in_parent((parent_gpu), (gpu)))
|
||||
|
||||
// Helper which calls uvm_gpu_retain on each GPU in mask
|
||||
void uvm_global_mask_retain(const uvm_global_processor_mask_t *mask);
|
||||
// Helper which calls uvm_gpu_retain() on each GPU in mask.
|
||||
void uvm_global_gpu_retain(const uvm_processor_mask_t *mask);
|
||||
|
||||
// Helper which calls uvm_gpu_release_locked on each GPU in mask.
|
||||
//
|
||||
// LOCKING: this function takes and releases the global lock if the input mask
|
||||
// is not empty
|
||||
void uvm_global_mask_release(const uvm_global_processor_mask_t *mask);
|
||||
void uvm_global_gpu_release(const uvm_processor_mask_t *mask);
|
||||
|
||||
// Check for ECC errors for all GPUs in a mask
|
||||
// Notably this check cannot be performed where it's not safe to call into RM.
|
||||
NV_STATUS uvm_global_mask_check_ecc_error(uvm_global_processor_mask_t *gpus);
|
||||
NV_STATUS uvm_global_gpu_check_ecc_error(uvm_processor_mask_t *gpus);
|
||||
|
||||
// Pre-allocate fault service contexts.
|
||||
NV_STATUS uvm_service_block_context_init(void);
|
||||
|
@ -87,7 +87,7 @@ static uvm_gpu_link_type_t get_gpu_link_type(UVM_LINK_TYPE link_type)
|
||||
}
|
||||
}
|
||||
|
||||
static void fill_gpu_info(uvm_parent_gpu_t *parent_gpu, const UvmGpuInfo *gpu_info)
|
||||
static void fill_parent_gpu_info(uvm_parent_gpu_t *parent_gpu, const UvmGpuInfo *gpu_info)
|
||||
{
|
||||
char uuid_buffer[UVM_GPU_UUID_TEXT_BUFFER_LENGTH];
|
||||
|
||||
@ -119,7 +119,7 @@ static void fill_gpu_info(uvm_parent_gpu_t *parent_gpu, const UvmGpuInfo *gpu_in
|
||||
snprintf(parent_gpu->name,
|
||||
sizeof(parent_gpu->name),
|
||||
"ID %u: %s: %s",
|
||||
uvm_id_value(parent_gpu->id),
|
||||
uvm_parent_id_value(parent_gpu->id),
|
||||
parent_gpu->rm_info.name,
|
||||
uuid_buffer);
|
||||
}
|
||||
@ -147,6 +147,62 @@ static NV_STATUS get_gpu_caps(uvm_gpu_t *gpu)
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
|
||||
// Return a PASID to use with the internal address space (AS), or -1 if not
|
||||
// supported. This PASID is needed to enable ATS in the internal AS, but it is
|
||||
// not used in address translation requests, which only translate GPA->SPA.
|
||||
// The buffer management thus remains the same: DMA mapped GPA addresses can
|
||||
// be accessed by the GPU, while unmapped addresses can not and any access is
|
||||
// blocked and potentially unrecoverable to the engine that made it.
|
||||
static int gpu_get_internal_pasid(const uvm_gpu_t *gpu)
|
||||
{
|
||||
#if UVM_ATS_SVA_SUPPORTED() && defined(NV_IOMMU_IS_DMA_DOMAIN_PRESENT)
|
||||
// iommu_is_dma_domain() was added in Linux 5.15 by commit bf3aed4660c6
|
||||
// ("iommu: Introduce explicit type for non-strict DMA domains")
|
||||
//
|
||||
// SVA is not required for enabling ATS for internal UVM address spaces.
|
||||
// However, it conveniently combines the necessary check for permissive license
|
||||
// and the special behaviour of PASID 0 on SMMUv3 described below.
|
||||
//
|
||||
// PASID 0 is reserved on aarch64 SMMUv3 (see section 3.3.2 of the SMMU spec)
|
||||
// because the corresponding page table is used to translate requests and
|
||||
// transactions without an associated PASID.
|
||||
// Linux 6.6+ generalized this value as IOMMU_NO_PASID for all architectures
|
||||
// commit 4298780126c2 ("iommu: Generalize PASID 0 for normal DMA w/o PASID")
|
||||
#ifdef IOMMU_NO_PASID
|
||||
#define UVM_INTERNAL_PASID IOMMU_NO_PASID
|
||||
#else
|
||||
#define UVM_INTERNAL_PASID 0
|
||||
#endif
|
||||
|
||||
// Enable internal ATS only if ATS is enabled in general and we are using
|
||||
// 64kB base page size. The base page size limitation is needed to avoid
|
||||
// GH180 MMU behaviour which does not refetch invalid 4K ATS translations
|
||||
// on access (see bug 3949400). This also works in virtualized environments
|
||||
// because the entire 64kB guest page has to be mapped and pinned by the
|
||||
// hypervisor for device access.
|
||||
if (g_uvm_global.ats.enabled && PAGE_SIZE == UVM_PAGE_SIZE_64K) {
|
||||
struct device *dev = &gpu->parent->pci_dev->dev;
|
||||
struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
|
||||
|
||||
// Check that the iommu domain is controlled by linux DMA API and
|
||||
// return a valid reserved PASID. Using a reserved PASID is OK since
|
||||
// this value is only used for the internal UVM address space that
|
||||
// uses ATS only for GPA->SPA translations that don't use PASID.
|
||||
//
|
||||
// If a general reserved PASID is not available (e.g. non-smmuv3, <6.6)
|
||||
// we'd need to to reserve a PASID from the IOMMU driver here, or risk
|
||||
// PASID collision. Note that since the PASID should not be used during
|
||||
// normal operation, the collision would only manifest in error paths.
|
||||
if (domain && iommu_is_dma_domain(domain))
|
||||
return UVM_INTERNAL_PASID;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Invalid PASID for internal RM address space */
|
||||
return -1;
|
||||
}
|
||||
|
||||
static NV_STATUS alloc_and_init_address_space(uvm_gpu_t *gpu)
|
||||
{
|
||||
NV_STATUS status;
|
||||
@ -155,6 +211,7 @@ static NV_STATUS alloc_and_init_address_space(uvm_gpu_t *gpu)
|
||||
status = uvm_rm_locked_call(nvUvmInterfaceAddressSpaceCreate(uvm_gpu_device_handle(gpu),
|
||||
gpu->parent->rm_va_base,
|
||||
gpu->parent->rm_va_size,
|
||||
gpu_get_internal_pasid(gpu) != -1,
|
||||
&gpu->rm_address_space,
|
||||
&gpu_address_space_info));
|
||||
if (status != NV_OK)
|
||||
@ -218,19 +275,12 @@ static bool gpu_supports_uvm(uvm_parent_gpu_t *parent_gpu)
|
||||
return parent_gpu->rm_info.subdeviceCount == 1;
|
||||
}
|
||||
|
||||
static bool platform_uses_canonical_form_address(void)
|
||||
{
|
||||
if (NVCPU_IS_PPC64LE)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool uvm_gpu_can_address(uvm_gpu_t *gpu, NvU64 addr, NvU64 size)
|
||||
{
|
||||
// Lower and upper address spaces are typically found in platforms that use
|
||||
// the canonical address form.
|
||||
NvU64 max_va_lower;
|
||||
NvU64 min_va_upper;
|
||||
NvU64 addr_end = addr + size - 1;
|
||||
NvU8 gpu_addr_shift;
|
||||
NvU8 cpu_addr_shift;
|
||||
@ -243,7 +293,7 @@ bool uvm_gpu_can_address(uvm_gpu_t *gpu, NvU64 addr, NvU64 size)
|
||||
UVM_ASSERT(size > 0);
|
||||
|
||||
gpu_addr_shift = gpu->address_space_tree.hal->num_va_bits();
|
||||
cpu_addr_shift = fls64(TASK_SIZE - 1) + 1;
|
||||
cpu_addr_shift = uvm_cpu_num_va_bits();
|
||||
addr_shift = gpu_addr_shift;
|
||||
|
||||
// Pascal+ GPUs are capable of accessing kernel pointers in various modes
|
||||
@ -279,9 +329,7 @@ bool uvm_gpu_can_address(uvm_gpu_t *gpu, NvU64 addr, NvU64 size)
|
||||
// 0 +----------------+ 0 +----------------+
|
||||
|
||||
// On canonical form address platforms and Pascal+ GPUs.
|
||||
if (platform_uses_canonical_form_address() && gpu_addr_shift > 40) {
|
||||
NvU64 min_va_upper;
|
||||
|
||||
if (uvm_platform_uses_canonical_form_address() && gpu_addr_shift > 40) {
|
||||
// On x86, when cpu_addr_shift > gpu_addr_shift, it means the CPU uses
|
||||
// 5-level paging and the GPU is pre-Hopper. On Pascal-Ada GPUs (49b
|
||||
// wide VA) we set addr_shift to match a 4-level paging x86 (48b wide).
|
||||
@ -292,15 +340,11 @@ bool uvm_gpu_can_address(uvm_gpu_t *gpu, NvU64 addr, NvU64 size)
|
||||
addr_shift = gpu_addr_shift;
|
||||
else
|
||||
addr_shift = cpu_addr_shift;
|
||||
}
|
||||
|
||||
min_va_upper = (NvU64)((NvS64)(1ULL << 63) >> (64 - addr_shift));
|
||||
max_va_lower = 1ULL << (addr_shift - 1);
|
||||
return (addr_end < max_va_lower) || (addr >= min_va_upper);
|
||||
}
|
||||
else {
|
||||
max_va_lower = 1ULL << addr_shift;
|
||||
return addr_end < max_va_lower;
|
||||
}
|
||||
uvm_get_unaddressable_range(addr_shift, &max_va_lower, &min_va_upper);
|
||||
|
||||
return (addr_end < max_va_lower) || (addr >= min_va_upper);
|
||||
}
|
||||
|
||||
// The internal UVM VAS does not use canonical form addresses.
|
||||
@ -326,14 +370,14 @@ NvU64 uvm_parent_gpu_canonical_address(uvm_parent_gpu_t *parent_gpu, NvU64 addr)
|
||||
NvU8 addr_shift;
|
||||
NvU64 input_addr = addr;
|
||||
|
||||
if (platform_uses_canonical_form_address()) {
|
||||
if (uvm_platform_uses_canonical_form_address()) {
|
||||
// When the CPU VA width is larger than GPU's, it means that:
|
||||
// On ARM: the CPU is on LVA mode and the GPU is pre-Hopper.
|
||||
// On x86: the CPU uses 5-level paging and the GPU is pre-Hopper.
|
||||
// We sign-extend on the 48b on ARM and on the 47b on x86 to mirror the
|
||||
// behavior of CPUs with smaller (than GPU) VA widths.
|
||||
gpu_addr_shift = parent_gpu->arch_hal->mmu_mode_hal(UVM_PAGE_SIZE_64K)->num_va_bits();
|
||||
cpu_addr_shift = fls64(TASK_SIZE - 1) + 1;
|
||||
cpu_addr_shift = uvm_cpu_num_va_bits();
|
||||
|
||||
if (cpu_addr_shift > gpu_addr_shift)
|
||||
addr_shift = NVCPU_IS_X86_64 ? 48 : 49;
|
||||
@ -567,7 +611,7 @@ static void gpu_info_print_common(uvm_gpu_t *gpu, struct seq_file *s)
|
||||
|
||||
gpu_info_print_ce_caps(gpu, s);
|
||||
|
||||
if (uvm_conf_computing_mode_enabled(gpu)) {
|
||||
if (g_uvm_global.conf_computing_enabled) {
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "dma_buffer_pool_num_buffers %lu\n",
|
||||
gpu->conf_computing.dma_buffer_pool.num_dma_buffers);
|
||||
}
|
||||
@ -792,7 +836,7 @@ static void deinit_parent_procfs_files(uvm_parent_gpu_t *parent_gpu)
|
||||
static NV_STATUS init_procfs_dirs(uvm_gpu_t *gpu)
|
||||
{
|
||||
struct proc_dir_entry *gpu_base_dir_entry;
|
||||
char symlink_name[16]; // Hold a global_gpu_id_t value in decimal.
|
||||
char symlink_name[16]; // Hold a uvm_gpu_id_t value in decimal.
|
||||
char uuid_text_buffer[UVM_GPU_UUID_TEXT_BUFFER_LENGTH];
|
||||
char gpu_dir_name[sizeof(symlink_name) + sizeof(uuid_text_buffer) + 1];
|
||||
|
||||
@ -804,20 +848,20 @@ static NV_STATUS init_procfs_dirs(uvm_gpu_t *gpu)
|
||||
gpu_base_dir_entry = uvm_procfs_get_gpu_base_dir();
|
||||
|
||||
// Create UVM-GPU-${UUID}/${sub_processor_index} directory
|
||||
snprintf(gpu_dir_name, sizeof(gpu_dir_name), "%u", uvm_global_id_sub_processor_index(gpu->global_id));
|
||||
snprintf(gpu_dir_name, sizeof(gpu_dir_name), "%u", uvm_id_sub_processor_index(gpu->id));
|
||||
|
||||
gpu->procfs.dir = NV_CREATE_PROC_DIR(gpu_dir_name, gpu->parent->procfs.dir);
|
||||
if (gpu->procfs.dir == NULL)
|
||||
return NV_ERR_OPERATING_SYSTEM;
|
||||
|
||||
// Create symlink from ${global_gpu_id} to
|
||||
// Create symlink from ${gpu_id} to
|
||||
// gpus/UVM-GPU-${UUID}/${sub_processor_index}
|
||||
snprintf(symlink_name, sizeof(symlink_name), "%u", uvm_global_id_value(gpu->global_id));
|
||||
snprintf(symlink_name, sizeof(symlink_name), "%u", uvm_id_value(gpu->id));
|
||||
snprintf(gpu_dir_name,
|
||||
sizeof(gpu_dir_name),
|
||||
"%s/%u",
|
||||
uuid_text_buffer,
|
||||
uvm_global_id_sub_processor_index(gpu->global_id));
|
||||
uvm_id_sub_processor_index(gpu->id));
|
||||
|
||||
gpu->procfs.dir_symlink = proc_symlink(symlink_name, gpu_base_dir_entry, gpu_dir_name);
|
||||
if (gpu->procfs.dir_symlink == NULL)
|
||||
@ -875,15 +919,10 @@ static NV_STATUS init_semaphore_pools(uvm_gpu_t *gpu)
|
||||
|
||||
// When the Confidential Computing feature is enabled, a separate secure
|
||||
// pool is created that holds page allocated in the CPR of vidmem.
|
||||
if (uvm_conf_computing_mode_enabled(gpu)) {
|
||||
status = uvm_gpu_semaphore_secure_pool_create(gpu, &gpu->secure_semaphore_pool);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
}
|
||||
if (g_uvm_global.conf_computing_enabled)
|
||||
return uvm_gpu_semaphore_secure_pool_create(gpu, &gpu->secure_semaphore_pool);
|
||||
|
||||
for_each_global_gpu(other_gpu) {
|
||||
if (uvm_conf_computing_mode_enabled(gpu))
|
||||
break;
|
||||
for_each_gpu(other_gpu) {
|
||||
if (other_gpu == gpu)
|
||||
continue;
|
||||
status = uvm_gpu_semaphore_pool_map_gpu(other_gpu->semaphore_pool, gpu);
|
||||
@ -898,7 +937,7 @@ static void deinit_semaphore_pools(uvm_gpu_t *gpu)
|
||||
{
|
||||
uvm_gpu_t *other_gpu;
|
||||
|
||||
for_each_global_gpu(other_gpu) {
|
||||
for_each_gpu(other_gpu) {
|
||||
if (other_gpu == gpu)
|
||||
continue;
|
||||
uvm_gpu_semaphore_pool_unmap_gpu(other_gpu->semaphore_pool, gpu);
|
||||
@ -908,24 +947,26 @@ static void deinit_semaphore_pools(uvm_gpu_t *gpu)
|
||||
uvm_gpu_semaphore_pool_destroy(gpu->secure_semaphore_pool);
|
||||
}
|
||||
|
||||
static NV_STATUS find_unused_global_gpu_id(uvm_parent_gpu_t *parent_gpu, uvm_global_gpu_id_t *out_id)
|
||||
static NV_STATUS find_unused_gpu_id(uvm_parent_gpu_t *parent_gpu, uvm_gpu_id_t *out_id)
|
||||
{
|
||||
NvU32 i;
|
||||
|
||||
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
|
||||
|
||||
if (!parent_gpu) {
|
||||
for (i = 0; i < UVM_MAX_GPUS; i++) {
|
||||
for (i = 0; i < UVM_PARENT_ID_MAX_GPUS; i++) {
|
||||
if (!g_uvm_global.parent_gpus[i]) {
|
||||
*out_id = uvm_global_gpu_id_from_parent_index(i);
|
||||
*out_id = uvm_gpu_id_from_sub_processor_index(i, 0);
|
||||
return NV_OK;
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
NvU32 sub_processor_index = find_first_zero_bit(parent_gpu->valid_gpus, UVM_ID_MAX_SUB_PROCESSORS);
|
||||
if (sub_processor_index < UVM_ID_MAX_SUB_PROCESSORS) {
|
||||
*out_id = uvm_global_gpu_id_from_sub_processor_index(parent_gpu->id, sub_processor_index);
|
||||
NvU32 sub_processor_index = find_first_zero_bit(parent_gpu->valid_gpus, UVM_PARENT_ID_MAX_SUB_PROCESSORS);
|
||||
|
||||
if (sub_processor_index < UVM_PARENT_ID_MAX_SUB_PROCESSORS) {
|
||||
i = uvm_parent_id_gpu_index(parent_gpu->id);
|
||||
*out_id = uvm_gpu_id_from_sub_processor_index(i, sub_processor_index);
|
||||
return NV_OK;
|
||||
}
|
||||
}
|
||||
@ -936,7 +977,7 @@ static NV_STATUS find_unused_global_gpu_id(uvm_parent_gpu_t *parent_gpu, uvm_glo
|
||||
// Allocates a uvm_parent_gpu_t, assigns the GPU ID, and sets up basic data
|
||||
// structures, but leaves all other initialization up to the caller.
|
||||
static NV_STATUS alloc_parent_gpu(const NvProcessorUuid *gpu_uuid,
|
||||
uvm_gpu_id_t gpu_id,
|
||||
uvm_parent_gpu_id_t gpu_id,
|
||||
uvm_parent_gpu_t **parent_gpu_out)
|
||||
{
|
||||
uvm_parent_gpu_t *parent_gpu;
|
||||
@ -948,7 +989,7 @@ static NV_STATUS alloc_parent_gpu(const NvProcessorUuid *gpu_uuid,
|
||||
|
||||
parent_gpu->id = gpu_id;
|
||||
|
||||
uvm_processor_uuid_copy(&parent_gpu->uuid, gpu_uuid);
|
||||
uvm_uuid_copy(&parent_gpu->uuid, gpu_uuid);
|
||||
uvm_sema_init(&parent_gpu->isr.replayable_faults.service_lock, 1, UVM_LOCK_ORDER_ISR);
|
||||
uvm_sema_init(&parent_gpu->isr.non_replayable_faults.service_lock, 1, UVM_LOCK_ORDER_ISR);
|
||||
uvm_sema_init(&parent_gpu->isr.access_counters.service_lock, 1, UVM_LOCK_ORDER_ISR);
|
||||
@ -969,7 +1010,7 @@ static NV_STATUS alloc_parent_gpu(const NvProcessorUuid *gpu_uuid,
|
||||
|
||||
// Allocates a uvm_gpu_t struct and initializes the basic fields and leaves all
|
||||
// other initialization up to the caller.
|
||||
static uvm_gpu_t *alloc_gpu(uvm_parent_gpu_t *parent_gpu, uvm_global_gpu_id_t global_gpu_id)
|
||||
static uvm_gpu_t *alloc_gpu(uvm_parent_gpu_t *parent_gpu, uvm_gpu_id_t gpu_id)
|
||||
{
|
||||
NvU32 sub_processor_index;
|
||||
uvm_gpu_t *gpu;
|
||||
@ -978,15 +1019,14 @@ static uvm_gpu_t *alloc_gpu(uvm_parent_gpu_t *parent_gpu, uvm_global_gpu_id_t gl
|
||||
if (!gpu)
|
||||
return gpu;
|
||||
|
||||
gpu->id = parent_gpu->id;
|
||||
gpu->global_id = global_gpu_id;
|
||||
gpu->id = gpu_id;
|
||||
gpu->parent = parent_gpu;
|
||||
|
||||
// Initialize enough of the gpu struct for remove_gpu to be called
|
||||
gpu->magic = UVM_GPU_MAGIC_VALUE;
|
||||
uvm_spin_lock_init(&gpu->peer_info.peer_gpus_lock, UVM_LOCK_ORDER_LEAF);
|
||||
|
||||
sub_processor_index = uvm_global_id_sub_processor_index(global_gpu_id);
|
||||
sub_processor_index = uvm_id_sub_processor_index(gpu_id);
|
||||
parent_gpu->gpus[sub_processor_index] = gpu;
|
||||
|
||||
return gpu;
|
||||
@ -1003,7 +1043,7 @@ static NV_STATUS configure_address_space(uvm_gpu_t *gpu)
|
||||
NULL,
|
||||
UVM_PAGE_TREE_TYPE_KERNEL,
|
||||
gpu->big_page.internal_size,
|
||||
uvm_gpu_page_tree_init_location(gpu),
|
||||
uvm_get_page_tree_location(gpu->parent),
|
||||
&gpu->address_space_tree);
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("Initializing the page tree failed: %s, GPU %s\n", nvstatusToString(status), uvm_gpu_name(gpu));
|
||||
@ -1026,7 +1066,7 @@ static NV_STATUS configure_address_space(uvm_gpu_t *gpu)
|
||||
status = uvm_rm_locked_call(nvUvmInterfaceSetPageDirectory(gpu->rm_address_space,
|
||||
uvm_page_tree_pdb(&gpu->address_space_tree)->addr.address, num_entries,
|
||||
uvm_page_tree_pdb(&gpu->address_space_tree)->addr.aperture == UVM_APERTURE_VID,
|
||||
-1U /* Invalid PASID for internal RM address space */));
|
||||
gpu_get_internal_pasid(gpu)));
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("nvUvmInterfaceSetPageDirectory() failed: %s, GPU %s\n",
|
||||
nvstatusToString(status),
|
||||
@ -1095,16 +1135,13 @@ static NV_STATUS init_parent_gpu(uvm_parent_gpu_t *parent_gpu,
|
||||
&parent_gpu->rm_device,
|
||||
NV_FALSE));
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("Creating RM device failed: %s, GPU %s\n", nvstatusToString(status), parent_gpu->name);
|
||||
UVM_ERR_PRINT("Creating RM device failed: %s, GPU %s\n",
|
||||
nvstatusToString(status),
|
||||
uvm_parent_gpu_name(parent_gpu));
|
||||
return status;
|
||||
}
|
||||
|
||||
status = uvm_conf_computing_init_parent_gpu(parent_gpu);
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("Confidential computing: %s, GPU %s\n",
|
||||
nvstatusToString(status), parent_gpu->name);
|
||||
return status;
|
||||
}
|
||||
uvm_conf_computing_check_parent_gpu(parent_gpu);
|
||||
|
||||
parent_gpu->pci_dev = gpu_platform_info->pci_dev;
|
||||
parent_gpu->closest_cpu_numa_node = dev_to_node(&parent_gpu->pci_dev->dev);
|
||||
@ -1115,7 +1152,8 @@ static NV_STATUS init_parent_gpu(uvm_parent_gpu_t *parent_gpu,
|
||||
|
||||
parent_gpu->virt_mode = gpu_info->virtMode;
|
||||
if (parent_gpu->virt_mode == UVM_VIRT_MODE_LEGACY) {
|
||||
UVM_ERR_PRINT("Failed to init GPU %s. UVM is not supported in legacy virtualization mode\n", parent_gpu->name);
|
||||
UVM_ERR_PRINT("Failed to init GPU %s. UVM is not supported in legacy virtualization mode\n",
|
||||
uvm_parent_gpu_name(parent_gpu));
|
||||
return NV_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
@ -1124,13 +1162,17 @@ static NV_STATUS init_parent_gpu(uvm_parent_gpu_t *parent_gpu,
|
||||
|
||||
status = init_parent_procfs_dir(parent_gpu);
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("Failed to init parent procfs dir: %s, GPU %s\n", nvstatusToString(status), parent_gpu->name);
|
||||
UVM_ERR_PRINT("Failed to init parent procfs dir: %s, GPU %s\n",
|
||||
nvstatusToString(status),
|
||||
uvm_parent_gpu_name(parent_gpu));
|
||||
return status;
|
||||
}
|
||||
|
||||
status = uvm_hal_init_gpu(parent_gpu);
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("Failed to init GPU hal: %s, GPU %s\n", nvstatusToString(status), parent_gpu->name);
|
||||
UVM_ERR_PRINT("Failed to init GPU hal: %s, GPU %s\n",
|
||||
nvstatusToString(status),
|
||||
uvm_parent_gpu_name(parent_gpu));
|
||||
return status;
|
||||
}
|
||||
|
||||
@ -1143,19 +1185,25 @@ static NV_STATUS init_parent_gpu(uvm_parent_gpu_t *parent_gpu,
|
||||
|
||||
status = uvm_ats_add_gpu(parent_gpu);
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("uvm_ats_add_gpu failed: %s, GPU %s\n", nvstatusToString(status), parent_gpu->name);
|
||||
UVM_ERR_PRINT("uvm_ats_add_gpu failed: %s, GPU %s\n",
|
||||
nvstatusToString(status),
|
||||
uvm_parent_gpu_name(parent_gpu));
|
||||
return status;
|
||||
}
|
||||
|
||||
status = init_parent_procfs_files(parent_gpu);
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("Failed to init parent procfs files: %s, GPU %s\n", nvstatusToString(status), parent_gpu->name);
|
||||
UVM_ERR_PRINT("Failed to init parent procfs files: %s, GPU %s\n",
|
||||
nvstatusToString(status),
|
||||
uvm_parent_gpu_name(parent_gpu));
|
||||
return status;
|
||||
}
|
||||
|
||||
status = uvm_gpu_init_isr(parent_gpu);
|
||||
status = uvm_parent_gpu_init_isr(parent_gpu);
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("Failed to init ISR: %s, GPU %s\n", nvstatusToString(status), parent_gpu->name);
|
||||
UVM_ERR_PRINT("Failed to init ISR: %s, GPU %s\n",
|
||||
nvstatusToString(status),
|
||||
uvm_parent_gpu_name(parent_gpu));
|
||||
return status;
|
||||
}
|
||||
|
||||
@ -1169,7 +1217,7 @@ static NV_STATUS init_gpu(uvm_gpu_t *gpu, const UvmGpuInfo *gpu_info)
|
||||
if (gpu->parent->smc.enabled) {
|
||||
status = uvm_rm_locked_call(nvUvmInterfaceDeviceCreate(uvm_global_session_handle(),
|
||||
gpu_info,
|
||||
uvm_gpu_uuid(gpu),
|
||||
&gpu->parent->uuid,
|
||||
&gpu->smc.rm_device,
|
||||
NV_TRUE));
|
||||
if (status != NV_OK) {
|
||||
@ -1178,6 +1226,7 @@ static NV_STATUS init_gpu(uvm_gpu_t *gpu, const UvmGpuInfo *gpu_info)
|
||||
}
|
||||
}
|
||||
|
||||
uvm_uuid_copy(&gpu->uuid, &gpu_info->uuid);
|
||||
gpu->smc.swizz_id = gpu_info->smcSwizzId;
|
||||
|
||||
// Initialize the per-GPU procfs dirs as early as possible so that other
|
||||
@ -1283,12 +1332,13 @@ static NV_STATUS init_gpu(uvm_gpu_t *gpu, const UvmGpuInfo *gpu_info)
|
||||
// TODO: Bug 2844714: Split parent-specific parts of this function out into a
|
||||
// separate add_parent_gpu() function.
|
||||
static NV_STATUS add_gpu(const NvProcessorUuid *gpu_uuid,
|
||||
const uvm_global_gpu_id_t global_gpu_id,
|
||||
const uvm_gpu_id_t gpu_id,
|
||||
const UvmGpuInfo *gpu_info,
|
||||
const UvmGpuPlatformInfo *gpu_platform_info,
|
||||
uvm_parent_gpu_t *parent_gpu,
|
||||
uvm_gpu_t **gpu_out)
|
||||
{
|
||||
char uuid_buffer[UVM_GPU_UUID_TEXT_BUFFER_LENGTH];
|
||||
NV_STATUS status;
|
||||
bool alloc_parent = (parent_gpu == NULL);
|
||||
uvm_gpu_t *gpu = NULL;
|
||||
@ -1296,12 +1346,12 @@ static NV_STATUS add_gpu(const NvProcessorUuid *gpu_uuid,
|
||||
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
|
||||
|
||||
if (alloc_parent) {
|
||||
status = alloc_parent_gpu(gpu_uuid, uvm_gpu_id_from_global_gpu_id(global_gpu_id), &parent_gpu);
|
||||
status = alloc_parent_gpu(gpu_uuid, uvm_parent_gpu_id_from_gpu_id(gpu_id), &parent_gpu);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
}
|
||||
|
||||
gpu = alloc_gpu(parent_gpu, global_gpu_id);
|
||||
gpu = alloc_gpu(parent_gpu, gpu_id);
|
||||
if (!gpu) {
|
||||
if (alloc_parent)
|
||||
uvm_parent_gpu_kref_put(parent_gpu);
|
||||
@ -1312,7 +1362,14 @@ static NV_STATUS add_gpu(const NvProcessorUuid *gpu_uuid,
|
||||
parent_gpu->num_retained_gpus++;
|
||||
|
||||
if (alloc_parent)
|
||||
fill_gpu_info(parent_gpu, gpu_info);
|
||||
fill_parent_gpu_info(parent_gpu, gpu_info);
|
||||
|
||||
format_uuid_to_buffer(uuid_buffer, sizeof(uuid_buffer), &parent_gpu->uuid);
|
||||
snprintf(gpu->name,
|
||||
sizeof(gpu->name),
|
||||
"ID %u: %s",
|
||||
uvm_id_value(gpu->id),
|
||||
uuid_buffer);
|
||||
|
||||
// After this point all error clean up should be handled by remove_gpu()
|
||||
|
||||
@ -1337,7 +1394,7 @@ static NV_STATUS add_gpu(const NvProcessorUuid *gpu_uuid,
|
||||
goto error;
|
||||
|
||||
atomic64_set(&gpu->retained_count, 1);
|
||||
uvm_global_processor_mask_set(&g_uvm_global.retained_gpus, gpu->global_id);
|
||||
uvm_processor_mask_set(&g_uvm_global.retained_gpus, gpu->id);
|
||||
|
||||
uvm_spin_lock_irqsave(&g_uvm_global.gpu_table_lock);
|
||||
|
||||
@ -1345,8 +1402,8 @@ static NV_STATUS add_gpu(const NvProcessorUuid *gpu_uuid,
|
||||
uvm_global_add_parent_gpu(parent_gpu);
|
||||
|
||||
// Mark the GPU as valid in the parent GPU's GPU table.
|
||||
UVM_ASSERT(!test_bit(uvm_global_id_sub_processor_index(gpu->global_id), parent_gpu->valid_gpus));
|
||||
__set_bit(uvm_global_id_sub_processor_index(gpu->global_id), parent_gpu->valid_gpus);
|
||||
UVM_ASSERT(!test_bit(uvm_id_sub_processor_index(gpu->id), parent_gpu->valid_gpus));
|
||||
__set_bit(uvm_id_sub_processor_index(gpu->id), parent_gpu->valid_gpus);
|
||||
|
||||
// Although locking correctness does not, at this early point (before the
|
||||
// GPU is visible in the table) strictly require holding the gpu_table_lock
|
||||
@ -1407,9 +1464,9 @@ static void sync_parent_gpu_trackers(uvm_parent_gpu_t *parent_gpu,
|
||||
// Sync the replay tracker since it inherits dependencies from the VA block
|
||||
// trackers.
|
||||
if (sync_replay_tracker) {
|
||||
uvm_gpu_replayable_faults_isr_lock(parent_gpu);
|
||||
uvm_parent_gpu_replayable_faults_isr_lock(parent_gpu);
|
||||
status = uvm_tracker_wait(&parent_gpu->fault_buffer_info.replayable.replay_tracker);
|
||||
uvm_gpu_replayable_faults_isr_unlock(parent_gpu);
|
||||
uvm_parent_gpu_replayable_faults_isr_unlock(parent_gpu);
|
||||
|
||||
if (status != NV_OK)
|
||||
UVM_ASSERT(status == uvm_global_get_status());
|
||||
@ -1418,9 +1475,9 @@ static void sync_parent_gpu_trackers(uvm_parent_gpu_t *parent_gpu,
|
||||
// Sync the clear_faulted tracker since it inherits dependencies from the
|
||||
// VA block trackers, too.
|
||||
if (sync_clear_faulted_tracker) {
|
||||
uvm_gpu_non_replayable_faults_isr_lock(parent_gpu);
|
||||
uvm_parent_gpu_non_replayable_faults_isr_lock(parent_gpu);
|
||||
status = uvm_tracker_wait(&parent_gpu->fault_buffer_info.non_replayable.clear_faulted_tracker);
|
||||
uvm_gpu_non_replayable_faults_isr_unlock(parent_gpu);
|
||||
uvm_parent_gpu_non_replayable_faults_isr_unlock(parent_gpu);
|
||||
|
||||
if (status != NV_OK)
|
||||
UVM_ASSERT(status == uvm_global_get_status());
|
||||
@ -1467,7 +1524,7 @@ static void deinit_parent_gpu(uvm_parent_gpu_t *parent_gpu)
|
||||
UVM_ASSERT(parent_gpu->isr.access_counters.handling_ref_count == 0);
|
||||
|
||||
// Return ownership to RM
|
||||
uvm_gpu_deinit_isr(parent_gpu);
|
||||
uvm_parent_gpu_deinit_isr(parent_gpu);
|
||||
|
||||
deinit_parent_procfs_files(parent_gpu);
|
||||
|
||||
@ -1495,7 +1552,7 @@ static void deinit_gpu(uvm_gpu_t *gpu)
|
||||
uvm_gpu_t *other_gpu;
|
||||
|
||||
// Remove any pointers to this GPU from other GPUs' trackers.
|
||||
for_each_global_gpu(other_gpu) {
|
||||
for_each_gpu(other_gpu) {
|
||||
UVM_ASSERT(other_gpu != gpu);
|
||||
remove_gpus_from_gpu(other_gpu);
|
||||
}
|
||||
@ -1552,7 +1609,7 @@ static void remove_gpu(uvm_gpu_t *gpu)
|
||||
|
||||
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
|
||||
|
||||
sub_processor_index = uvm_global_id_sub_processor_index(gpu->global_id);
|
||||
sub_processor_index = uvm_id_sub_processor_index(gpu->id);
|
||||
parent_gpu = gpu->parent;
|
||||
|
||||
UVM_ASSERT_MSG(uvm_gpu_retained_count(gpu) == 0,
|
||||
@ -1597,15 +1654,15 @@ static void remove_gpu(uvm_gpu_t *gpu)
|
||||
|
||||
uvm_spin_unlock_irqrestore(&g_uvm_global.gpu_table_lock);
|
||||
|
||||
uvm_global_processor_mask_clear(&g_uvm_global.retained_gpus, gpu->global_id);
|
||||
uvm_processor_mask_clear(&g_uvm_global.retained_gpus, gpu->id);
|
||||
|
||||
// If the parent is being freed, stop scheduling new bottom halves and
|
||||
// update relevant software state. Else flush any pending bottom halves
|
||||
// before continuing.
|
||||
if (free_parent)
|
||||
uvm_gpu_disable_isr(parent_gpu);
|
||||
uvm_parent_gpu_disable_isr(parent_gpu);
|
||||
else
|
||||
uvm_gpu_flush_bottom_halves(parent_gpu);
|
||||
uvm_parent_gpu_flush_bottom_halves(parent_gpu);
|
||||
|
||||
deinit_gpu(gpu);
|
||||
|
||||
@ -1625,11 +1682,11 @@ static void uvm_parent_gpu_destroy(nv_kref_t *nv_kref)
|
||||
NvU32 sub_processor_index;
|
||||
|
||||
UVM_ASSERT(parent_gpu->num_retained_gpus == 0);
|
||||
UVM_ASSERT(bitmap_empty(parent_gpu->valid_gpus, UVM_ID_MAX_SUB_PROCESSORS));
|
||||
UVM_ASSERT(bitmap_empty(parent_gpu->valid_gpus, UVM_PARENT_ID_MAX_SUB_PROCESSORS));
|
||||
|
||||
nv_kthread_q_stop(&parent_gpu->lazy_free_q);
|
||||
|
||||
for (sub_processor_index = 0; sub_processor_index < UVM_ID_MAX_SUB_PROCESSORS; sub_processor_index++)
|
||||
for (sub_processor_index = 0; sub_processor_index < UVM_PARENT_ID_MAX_SUB_PROCESSORS; sub_processor_index++)
|
||||
UVM_ASSERT(!parent_gpu->gpus[sub_processor_index]);
|
||||
|
||||
uvm_kvfree(parent_gpu);
|
||||
@ -1640,22 +1697,22 @@ void uvm_parent_gpu_kref_put(uvm_parent_gpu_t *parent_gpu)
|
||||
nv_kref_put(&parent_gpu->gpu_kref, uvm_parent_gpu_destroy);
|
||||
}
|
||||
|
||||
static void update_stats_gpu_fault_instance(uvm_gpu_t *gpu,
|
||||
const uvm_fault_buffer_entry_t *fault_entry,
|
||||
bool is_duplicate)
|
||||
static void update_stats_parent_gpu_fault_instance(uvm_parent_gpu_t *parent_gpu,
|
||||
const uvm_fault_buffer_entry_t *fault_entry,
|
||||
bool is_duplicate)
|
||||
{
|
||||
if (!fault_entry->is_replayable) {
|
||||
switch (fault_entry->fault_access_type)
|
||||
{
|
||||
case UVM_FAULT_ACCESS_TYPE_READ:
|
||||
++gpu->parent->fault_buffer_info.non_replayable.stats.num_read_faults;
|
||||
++parent_gpu->fault_buffer_info.non_replayable.stats.num_read_faults;
|
||||
break;
|
||||
case UVM_FAULT_ACCESS_TYPE_WRITE:
|
||||
++gpu->parent->fault_buffer_info.non_replayable.stats.num_write_faults;
|
||||
++parent_gpu->fault_buffer_info.non_replayable.stats.num_write_faults;
|
||||
break;
|
||||
case UVM_FAULT_ACCESS_TYPE_ATOMIC_WEAK:
|
||||
case UVM_FAULT_ACCESS_TYPE_ATOMIC_STRONG:
|
||||
++gpu->parent->fault_buffer_info.non_replayable.stats.num_atomic_faults;
|
||||
++parent_gpu->fault_buffer_info.non_replayable.stats.num_atomic_faults;
|
||||
break;
|
||||
default:
|
||||
UVM_ASSERT_MSG(false, "Invalid access type for non-replayable faults\n");
|
||||
@ -1663,9 +1720,9 @@ static void update_stats_gpu_fault_instance(uvm_gpu_t *gpu,
|
||||
}
|
||||
|
||||
if (!fault_entry->is_virtual)
|
||||
++gpu->parent->fault_buffer_info.non_replayable.stats.num_physical_faults;
|
||||
++parent_gpu->fault_buffer_info.non_replayable.stats.num_physical_faults;
|
||||
|
||||
++gpu->parent->stats.num_non_replayable_faults;
|
||||
++parent_gpu->stats.num_non_replayable_faults;
|
||||
|
||||
return;
|
||||
}
|
||||
@ -1675,30 +1732,30 @@ static void update_stats_gpu_fault_instance(uvm_gpu_t *gpu,
|
||||
switch (fault_entry->fault_access_type)
|
||||
{
|
||||
case UVM_FAULT_ACCESS_TYPE_PREFETCH:
|
||||
++gpu->parent->fault_buffer_info.replayable.stats.num_prefetch_faults;
|
||||
++parent_gpu->fault_buffer_info.replayable.stats.num_prefetch_faults;
|
||||
break;
|
||||
case UVM_FAULT_ACCESS_TYPE_READ:
|
||||
++gpu->parent->fault_buffer_info.replayable.stats.num_read_faults;
|
||||
++parent_gpu->fault_buffer_info.replayable.stats.num_read_faults;
|
||||
break;
|
||||
case UVM_FAULT_ACCESS_TYPE_WRITE:
|
||||
++gpu->parent->fault_buffer_info.replayable.stats.num_write_faults;
|
||||
++parent_gpu->fault_buffer_info.replayable.stats.num_write_faults;
|
||||
break;
|
||||
case UVM_FAULT_ACCESS_TYPE_ATOMIC_WEAK:
|
||||
case UVM_FAULT_ACCESS_TYPE_ATOMIC_STRONG:
|
||||
++gpu->parent->fault_buffer_info.replayable.stats.num_atomic_faults;
|
||||
++parent_gpu->fault_buffer_info.replayable.stats.num_atomic_faults;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
if (is_duplicate || fault_entry->filtered)
|
||||
++gpu->parent->fault_buffer_info.replayable.stats.num_duplicate_faults;
|
||||
++parent_gpu->fault_buffer_info.replayable.stats.num_duplicate_faults;
|
||||
|
||||
++gpu->parent->stats.num_replayable_faults;
|
||||
++parent_gpu->stats.num_replayable_faults;
|
||||
}
|
||||
|
||||
static void update_stats_fault_cb(uvm_perf_event_t event_id, uvm_perf_event_data_t *event_data)
|
||||
{
|
||||
uvm_gpu_t *gpu;
|
||||
uvm_parent_gpu_t *parent_gpu;
|
||||
const uvm_fault_buffer_entry_t *fault_entry, *fault_instance;
|
||||
|
||||
UVM_ASSERT(event_id == UVM_PERF_EVENT_FAULT);
|
||||
@ -1709,16 +1766,16 @@ static void update_stats_fault_cb(uvm_perf_event_t event_id, uvm_perf_event_data
|
||||
// The reported fault entry must be the "representative" fault entry
|
||||
UVM_ASSERT(!event_data->fault.gpu.buffer_entry->filtered);
|
||||
|
||||
gpu = uvm_va_space_get_gpu(event_data->fault.space, event_data->fault.proc_id);
|
||||
parent_gpu = uvm_va_space_get_gpu(event_data->fault.space, event_data->fault.proc_id)->parent;
|
||||
|
||||
fault_entry = event_data->fault.gpu.buffer_entry;
|
||||
|
||||
// Update the stats using the representative fault entry and the rest of
|
||||
// instances
|
||||
update_stats_gpu_fault_instance(gpu, fault_entry, event_data->fault.gpu.is_duplicate);
|
||||
update_stats_parent_gpu_fault_instance(parent_gpu, fault_entry, event_data->fault.gpu.is_duplicate);
|
||||
|
||||
list_for_each_entry(fault_instance, &fault_entry->merged_instances_list, merged_instances_list)
|
||||
update_stats_gpu_fault_instance(gpu, fault_instance, event_data->fault.gpu.is_duplicate);
|
||||
update_stats_parent_gpu_fault_instance(parent_gpu, fault_instance, event_data->fault.gpu.is_duplicate);
|
||||
}
|
||||
|
||||
static void update_stats_migration_cb(uvm_perf_event_t event_id, uvm_perf_event_data_t *event_data)
|
||||
@ -1812,10 +1869,10 @@ void uvm_gpu_exit(void)
|
||||
uvm_parent_gpu_t *parent_gpu;
|
||||
|
||||
for_each_parent_gpu(parent_gpu)
|
||||
UVM_ASSERT_MSG(false, "GPU still present: %s\n", parent_gpu->name);
|
||||
UVM_ASSERT_MSG(false, "GPU still present: %s\n", uvm_parent_gpu_name(parent_gpu));
|
||||
|
||||
// CPU should never be in the retained GPUs mask
|
||||
UVM_ASSERT(!uvm_global_processor_mask_test(&g_uvm_global.retained_gpus, UVM_GLOBAL_ID_CPU));
|
||||
UVM_ASSERT(!uvm_processor_mask_test(&g_uvm_global.retained_gpus, UVM_ID_CPU));
|
||||
}
|
||||
|
||||
NV_STATUS uvm_gpu_init_va_space(uvm_va_space_t *va_space)
|
||||
@ -1844,7 +1901,7 @@ uvm_parent_gpu_t *uvm_parent_gpu_get_by_uuid_locked(const NvProcessorUuid *gpu_u
|
||||
uvm_parent_gpu_t *parent_gpu;
|
||||
|
||||
for_each_parent_gpu(parent_gpu) {
|
||||
if (uvm_processor_uuid_eq(&parent_gpu->uuid, gpu_uuid))
|
||||
if (uvm_uuid_eq(&parent_gpu->uuid, gpu_uuid))
|
||||
return parent_gpu;
|
||||
}
|
||||
|
||||
@ -1858,17 +1915,15 @@ uvm_parent_gpu_t *uvm_parent_gpu_get_by_uuid(const NvProcessorUuid *gpu_uuid)
|
||||
return uvm_parent_gpu_get_by_uuid_locked(gpu_uuid);
|
||||
}
|
||||
|
||||
static uvm_gpu_t *uvm_gpu_get_by_uuid_locked(const NvProcessorUuid *gpu_uuid)
|
||||
static uvm_gpu_t *gpu_get_by_uuid_locked(const NvProcessorUuid *gpu_uuid)
|
||||
{
|
||||
uvm_gpu_id_t gpu_id;
|
||||
uvm_global_gpu_id_t global_gpu_id;
|
||||
uvm_gpu_t *gpu;
|
||||
|
||||
for_each_gpu_id(gpu_id) {
|
||||
global_gpu_id = uvm_global_gpu_id_from_gpu_id(gpu_id);
|
||||
gpu = uvm_gpu_get(global_gpu_id);
|
||||
uvm_gpu_t *gpu = uvm_gpu_get(gpu_id);
|
||||
|
||||
if (gpu) {
|
||||
if (uvm_processor_uuid_eq(uvm_gpu_uuid(gpu), gpu_uuid)) {
|
||||
if (uvm_uuid_eq(uvm_gpu_uuid(gpu), gpu_uuid)) {
|
||||
UVM_ASSERT(!gpu->parent->smc.enabled);
|
||||
return gpu;
|
||||
}
|
||||
@ -1882,14 +1937,15 @@ uvm_gpu_t *uvm_gpu_get_by_uuid(const NvProcessorUuid *gpu_uuid)
|
||||
{
|
||||
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
|
||||
|
||||
return uvm_gpu_get_by_uuid_locked(gpu_uuid);
|
||||
return gpu_get_by_uuid_locked(gpu_uuid);
|
||||
}
|
||||
|
||||
uvm_gpu_t *uvm_gpu_get_by_parent_and_swizz_id_locked(uvm_parent_gpu_t *parent_gpu, NvU32 swizz_id)
|
||||
uvm_gpu_t *uvm_gpu_get_by_parent_and_swizz_id(uvm_parent_gpu_t *parent_gpu, NvU32 swizz_id)
|
||||
{
|
||||
uvm_gpu_t *gpu;
|
||||
|
||||
UVM_ASSERT(parent_gpu);
|
||||
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
|
||||
|
||||
for_each_gpu_in_parent(parent_gpu, gpu) {
|
||||
if (gpu->smc.swizz_id == swizz_id)
|
||||
@ -1899,20 +1955,13 @@ uvm_gpu_t *uvm_gpu_get_by_parent_and_swizz_id_locked(uvm_parent_gpu_t *parent_gp
|
||||
return NULL;
|
||||
}
|
||||
|
||||
uvm_gpu_t *uvm_gpu_get_by_parent_and_swizz_id(uvm_parent_gpu_t *parent_gpu, NvU32 swizz_id)
|
||||
{
|
||||
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
|
||||
|
||||
return uvm_gpu_get_by_parent_and_swizz_id_locked(parent_gpu, swizz_id);
|
||||
}
|
||||
|
||||
// Increment the refcount for the GPU with the given UUID. If this is the first
|
||||
// time that this UUID is retained, the GPU is added to UVM.
|
||||
// When SMC partitioning is enabled, user_rm_device contains the user handles
|
||||
// that were created by the caller, and that can be used to identify and
|
||||
// obtain information about the partition. nvUvmInterfaceGetGpuInfo returns, in
|
||||
// gpu_info, whether SMC is enabled and the swizzId corresponding to the
|
||||
// partition.
|
||||
// gpu_info, whether SMC is enabled, the swizzId, and GI UUID corresponding to
|
||||
// the partition.
|
||||
static NV_STATUS gpu_retain_by_uuid_locked(const NvProcessorUuid *gpu_uuid,
|
||||
const uvm_rm_user_object_t *user_rm_device,
|
||||
uvm_gpu_t **gpu_out)
|
||||
@ -1923,7 +1972,7 @@ static NV_STATUS gpu_retain_by_uuid_locked(const NvProcessorUuid *gpu_uuid,
|
||||
UvmGpuInfo *gpu_info = NULL;
|
||||
UvmGpuClientInfo client_info = {0};
|
||||
UvmGpuPlatformInfo gpu_platform_info = {0};
|
||||
uvm_global_gpu_id_t global_gpu_id;
|
||||
uvm_gpu_id_t gpu_id;
|
||||
|
||||
client_info.hClient = user_rm_device->user_client;
|
||||
client_info.hSmcPartRef = user_rm_device->user_object;
|
||||
@ -1934,7 +1983,7 @@ static NV_STATUS gpu_retain_by_uuid_locked(const NvProcessorUuid *gpu_uuid,
|
||||
|
||||
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
|
||||
|
||||
parent_gpu = uvm_parent_gpu_get_by_uuid(gpu_uuid);
|
||||
parent_gpu = uvm_parent_gpu_get_by_uuid_locked(gpu_uuid);
|
||||
|
||||
if (parent_gpu == NULL) {
|
||||
// If this is the first time the UUID is seen, register it on RM
|
||||
@ -1962,11 +2011,11 @@ static NV_STATUS gpu_retain_by_uuid_locked(const NvProcessorUuid *gpu_uuid,
|
||||
}
|
||||
|
||||
if (gpu == NULL) {
|
||||
status = find_unused_global_gpu_id(parent_gpu, &global_gpu_id);
|
||||
status = find_unused_gpu_id(parent_gpu, &gpu_id);
|
||||
if (status != NV_OK)
|
||||
goto error_unregister;
|
||||
|
||||
status = add_gpu(gpu_uuid, global_gpu_id, gpu_info, &gpu_platform_info, parent_gpu, &gpu);
|
||||
status = add_gpu(gpu_uuid, gpu_id, gpu_info, &gpu_platform_info, parent_gpu, &gpu);
|
||||
if (status != NV_OK)
|
||||
goto error_unregister;
|
||||
}
|
||||
@ -2304,8 +2353,8 @@ static NV_STATUS init_peer_access(uvm_gpu_t *gpu0,
|
||||
|
||||
set_optimal_p2p_write_ces(p2p_caps_params, peer_caps, gpu0, gpu1);
|
||||
|
||||
UVM_ASSERT(uvm_gpu_get(gpu0->global_id) == gpu0);
|
||||
UVM_ASSERT(uvm_gpu_get(gpu1->global_id) == gpu1);
|
||||
UVM_ASSERT(uvm_gpu_get(gpu0->id) == gpu0);
|
||||
UVM_ASSERT(uvm_gpu_get(gpu1->id) == gpu1);
|
||||
|
||||
// In the case of NVLINK peers, this initialization will happen during
|
||||
// add_gpu. As soon as the peer info table is assigned below, the access
|
||||
@ -2444,7 +2493,7 @@ static NV_STATUS discover_nvlink_peers(uvm_gpu_t *gpu)
|
||||
if (gpu->parent->smc.enabled)
|
||||
return NV_OK;
|
||||
|
||||
for_each_global_gpu(other_gpu) {
|
||||
for_each_gpu(other_gpu) {
|
||||
UvmGpuP2PCapsParams p2p_caps_params;
|
||||
|
||||
if ((other_gpu == gpu) || other_gpu->parent->smc.enabled)
|
||||
@ -2486,7 +2535,7 @@ static void destroy_nvlink_peers(uvm_gpu_t *gpu)
|
||||
if (gpu->parent->smc.enabled)
|
||||
return;
|
||||
|
||||
for_each_global_gpu(other_gpu) {
|
||||
for_each_gpu(other_gpu) {
|
||||
uvm_gpu_peer_t *peer_caps;
|
||||
|
||||
if ((other_gpu == gpu) || other_gpu->parent->smc.enabled)
|
||||
@ -2568,8 +2617,8 @@ static void disable_peer_access(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1)
|
||||
|
||||
uvm_rm_locked_call_void(nvUvmInterfaceP2pObjectDestroy(uvm_global_session_handle(), p2p_handle));
|
||||
|
||||
UVM_ASSERT(uvm_gpu_get(gpu0->global_id) == gpu0);
|
||||
UVM_ASSERT(uvm_gpu_get(gpu1->global_id) == gpu1);
|
||||
UVM_ASSERT(uvm_gpu_get(gpu0->id) == gpu0);
|
||||
UVM_ASSERT(uvm_gpu_get(gpu1->id) == gpu1);
|
||||
|
||||
uvm_spin_lock(&gpu0->peer_info.peer_gpus_lock);
|
||||
uvm_processor_mask_clear(&gpu0->peer_info.peer_gpu_mask, gpu1->id);
|
||||
@ -2639,13 +2688,13 @@ uvm_aperture_t uvm_gpu_peer_aperture(uvm_gpu_t *local_gpu, uvm_gpu_t *remote_gpu
|
||||
return uvm_gpu_peer_caps_aperture(peer_caps, local_gpu, remote_gpu);
|
||||
}
|
||||
|
||||
uvm_aperture_t uvm_gpu_page_tree_init_location(const uvm_gpu_t *gpu)
|
||||
uvm_aperture_t uvm_get_page_tree_location(const uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
// See comment in page_tree_set_location
|
||||
if (uvm_gpu_is_virt_mode_sriov_heavy(gpu))
|
||||
if (uvm_parent_gpu_is_virt_mode_sriov_heavy(parent_gpu))
|
||||
return UVM_APERTURE_VID;
|
||||
|
||||
if (uvm_conf_computing_mode_enabled(gpu))
|
||||
if (g_uvm_global.conf_computing_enabled)
|
||||
return UVM_APERTURE_VID;
|
||||
|
||||
return UVM_APERTURE_DEFAULT;
|
||||
@ -2714,7 +2763,8 @@ static NvU64 instance_ptr_to_key(uvm_gpu_phys_address_t instance_ptr)
|
||||
return key;
|
||||
}
|
||||
|
||||
static NV_STATUS gpu_add_user_channel_subctx_info(uvm_gpu_t *gpu, uvm_user_channel_t *user_channel)
|
||||
static NV_STATUS parent_gpu_add_user_channel_subctx_info(uvm_parent_gpu_t *parent_gpu,
|
||||
uvm_user_channel_t *user_channel)
|
||||
{
|
||||
uvm_gpu_phys_address_t instance_ptr = user_channel->instance_ptr.addr;
|
||||
NV_STATUS status = NV_OK;
|
||||
@ -2738,10 +2788,10 @@ static NV_STATUS gpu_add_user_channel_subctx_info(uvm_gpu_t *gpu, uvm_user_chann
|
||||
uvm_kvmalloc_zero(sizeof(*new_channel_subctx_info->subctxs) * user_channel->tsg.max_subctx_count);
|
||||
}
|
||||
|
||||
uvm_spin_lock(&gpu->parent->instance_ptr_table_lock);
|
||||
uvm_spin_lock(&parent_gpu->instance_ptr_table_lock);
|
||||
|
||||
// Check if the subcontext information for the channel already exists
|
||||
channel_tree_node = uvm_rb_tree_find(&gpu->parent->tsg_table, user_channel->tsg.id);
|
||||
channel_tree_node = uvm_rb_tree_find(&parent_gpu->tsg_table, user_channel->tsg.id);
|
||||
|
||||
if (!channel_tree_node) {
|
||||
// We could not allocate the descriptor before taking the lock. Exiting
|
||||
@ -2752,7 +2802,7 @@ static NV_STATUS gpu_add_user_channel_subctx_info(uvm_gpu_t *gpu, uvm_user_chann
|
||||
|
||||
// Insert the new subcontext information descriptor
|
||||
new_channel_subctx_info->node.key = user_channel->tsg.id;
|
||||
status = uvm_rb_tree_insert(&gpu->parent->tsg_table, &new_channel_subctx_info->node);
|
||||
status = uvm_rb_tree_insert(&parent_gpu->tsg_table, &new_channel_subctx_info->node);
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
|
||||
channel_subctx_info = new_channel_subctx_info;
|
||||
@ -2812,7 +2862,7 @@ static NV_STATUS gpu_add_user_channel_subctx_info(uvm_gpu_t *gpu, uvm_user_chann
|
||||
++channel_subctx_info->total_refcount;
|
||||
|
||||
exit_unlock:
|
||||
uvm_spin_unlock(&gpu->parent->instance_ptr_table_lock);
|
||||
uvm_spin_unlock(&parent_gpu->instance_ptr_table_lock);
|
||||
|
||||
// Remove the pre-allocated per-TSG subctx information struct if there was
|
||||
// some error or it was not used
|
||||
@ -2826,12 +2876,13 @@ exit_unlock:
|
||||
return status;
|
||||
}
|
||||
|
||||
static void gpu_remove_user_channel_subctx_info_locked(uvm_gpu_t *gpu, uvm_user_channel_t *user_channel)
|
||||
static void parent_gpu_remove_user_channel_subctx_info_locked(uvm_parent_gpu_t *parent_gpu,
|
||||
uvm_user_channel_t *user_channel)
|
||||
{
|
||||
uvm_gpu_phys_address_t instance_ptr = user_channel->instance_ptr.addr;
|
||||
uvm_va_space_t *va_space = user_channel->gpu_va_space->va_space;
|
||||
|
||||
uvm_assert_spinlock_locked(&gpu->parent->instance_ptr_table_lock);
|
||||
uvm_assert_spinlock_locked(&parent_gpu->instance_ptr_table_lock);
|
||||
|
||||
// Channel subcontext info descriptor may not have been registered in
|
||||
// tsg_table since this function is called in some teardown paths during
|
||||
@ -2840,7 +2891,7 @@ static void gpu_remove_user_channel_subctx_info_locked(uvm_gpu_t *gpu, uvm_user_
|
||||
return;
|
||||
|
||||
UVM_ASSERT_MSG(&user_channel->subctx_info->node ==
|
||||
uvm_rb_tree_find(&gpu->parent->tsg_table, user_channel->subctx_info->node.key),
|
||||
uvm_rb_tree_find(&parent_gpu->tsg_table, user_channel->subctx_info->node.key),
|
||||
"CH %u:%u instance_ptr {0x%llx:%s} SubCTX %u in TSG %u: SubCTX not found in TSG table\n",
|
||||
user_channel->hw_runlist_id,
|
||||
user_channel->hw_channel_id,
|
||||
@ -2883,7 +2934,7 @@ static void gpu_remove_user_channel_subctx_info_locked(uvm_gpu_t *gpu, uvm_user_
|
||||
user_channel->subctx_info->subctxs[user_channel->subctx_id].va_space = NULL;
|
||||
|
||||
if (--user_channel->subctx_info->total_refcount == 0) {
|
||||
uvm_rb_tree_remove(&gpu->parent->tsg_table, &user_channel->subctx_info->node);
|
||||
uvm_rb_tree_remove(&parent_gpu->tsg_table, &user_channel->subctx_info->node);
|
||||
uvm_kvfree(user_channel->subctx_info->subctxs);
|
||||
uvm_kvfree(user_channel->subctx_info);
|
||||
}
|
||||
@ -2891,26 +2942,20 @@ static void gpu_remove_user_channel_subctx_info_locked(uvm_gpu_t *gpu, uvm_user_
|
||||
user_channel->subctx_info = NULL;
|
||||
}
|
||||
|
||||
static void gpu_remove_user_channel_subctx_info(uvm_gpu_t *gpu, uvm_user_channel_t *user_channel)
|
||||
{
|
||||
uvm_spin_lock(&gpu->parent->instance_ptr_table_lock);
|
||||
gpu_remove_user_channel_subctx_info_locked(gpu, user_channel);
|
||||
uvm_spin_unlock(&gpu->parent->instance_ptr_table_lock);
|
||||
}
|
||||
|
||||
static void gpu_add_user_channel_instance_ptr(uvm_gpu_t *gpu, uvm_user_channel_t *user_channel)
|
||||
static void parent_gpu_add_user_channel_instance_ptr(uvm_parent_gpu_t *parent_gpu,
|
||||
uvm_user_channel_t *user_channel)
|
||||
{
|
||||
uvm_gpu_phys_address_t instance_ptr = user_channel->instance_ptr.addr;
|
||||
NvU64 instance_ptr_key = instance_ptr_to_key(instance_ptr);
|
||||
NV_STATUS status;
|
||||
|
||||
uvm_spin_lock(&gpu->parent->instance_ptr_table_lock);
|
||||
uvm_spin_lock(&parent_gpu->instance_ptr_table_lock);
|
||||
|
||||
// Insert the instance_ptr -> user_channel mapping
|
||||
user_channel->instance_ptr.node.key = instance_ptr_key;
|
||||
status = uvm_rb_tree_insert(&gpu->parent->instance_ptr_table, &user_channel->instance_ptr.node);
|
||||
status = uvm_rb_tree_insert(&parent_gpu->instance_ptr_table, &user_channel->instance_ptr.node);
|
||||
|
||||
uvm_spin_unlock(&gpu->parent->instance_ptr_table_lock);
|
||||
uvm_spin_unlock(&parent_gpu->instance_ptr_table_lock);
|
||||
|
||||
UVM_ASSERT_MSG(status == NV_OK, "CH %u:%u instance_ptr {0x%llx:%s} SubCTX %u in TSG %u: error %s\n",
|
||||
user_channel->hw_runlist_id,
|
||||
@ -2922,17 +2967,18 @@ static void gpu_add_user_channel_instance_ptr(uvm_gpu_t *gpu, uvm_user_channel_t
|
||||
nvstatusToString(status));
|
||||
}
|
||||
|
||||
static void gpu_remove_user_channel_instance_ptr_locked(uvm_gpu_t *gpu, uvm_user_channel_t *user_channel)
|
||||
static void parent_gpu_remove_user_channel_instance_ptr_locked(uvm_parent_gpu_t *parent_gpu,
|
||||
uvm_user_channel_t *user_channel)
|
||||
{
|
||||
uvm_assert_spinlock_locked(&gpu->parent->instance_ptr_table_lock);
|
||||
uvm_assert_spinlock_locked(&parent_gpu->instance_ptr_table_lock);
|
||||
|
||||
if (UVM_RB_TREE_EMPTY_NODE(&user_channel->instance_ptr.node))
|
||||
return;
|
||||
|
||||
uvm_rb_tree_remove(&gpu->parent->instance_ptr_table, &user_channel->instance_ptr.node);
|
||||
uvm_rb_tree_remove(&parent_gpu->instance_ptr_table, &user_channel->instance_ptr.node);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_gpu_add_user_channel(uvm_gpu_t *gpu, uvm_user_channel_t *user_channel)
|
||||
NV_STATUS uvm_parent_gpu_add_user_channel(uvm_parent_gpu_t *parent_gpu, uvm_user_channel_t *user_channel)
|
||||
{
|
||||
uvm_va_space_t *va_space;
|
||||
uvm_gpu_va_space_t *gpu_va_space = user_channel->gpu_va_space;
|
||||
@ -2944,23 +2990,24 @@ NV_STATUS uvm_gpu_add_user_channel(uvm_gpu_t *gpu, uvm_user_channel_t *user_chan
|
||||
va_space = gpu_va_space->va_space;
|
||||
uvm_assert_rwsem_locked(&va_space->lock);
|
||||
|
||||
status = gpu_add_user_channel_subctx_info(gpu, user_channel);
|
||||
status = parent_gpu_add_user_channel_subctx_info(parent_gpu, user_channel);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
gpu_add_user_channel_instance_ptr(gpu, user_channel);
|
||||
parent_gpu_add_user_channel_instance_ptr(parent_gpu, user_channel);
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static uvm_user_channel_t *instance_ptr_to_user_channel(uvm_gpu_t *gpu, uvm_gpu_phys_address_t instance_ptr)
|
||||
static uvm_user_channel_t *instance_ptr_to_user_channel(uvm_parent_gpu_t *parent_gpu,
|
||||
uvm_gpu_phys_address_t instance_ptr)
|
||||
{
|
||||
NvU64 key = instance_ptr_to_key(instance_ptr);
|
||||
uvm_rb_tree_node_t *instance_node;
|
||||
|
||||
uvm_assert_spinlock_locked(&gpu->parent->instance_ptr_table_lock);
|
||||
uvm_assert_spinlock_locked(&parent_gpu->instance_ptr_table_lock);
|
||||
|
||||
instance_node = uvm_rb_tree_find(&gpu->parent->instance_ptr_table, key);
|
||||
instance_node = uvm_rb_tree_find(&parent_gpu->instance_ptr_table, key);
|
||||
if (!instance_node)
|
||||
return NULL;
|
||||
|
||||
@ -3009,18 +3056,18 @@ static uvm_va_space_t *user_channel_and_subctx_to_va_space(uvm_user_channel_t *u
|
||||
return channel_subctx_info->subctxs[subctx_id].va_space;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_gpu_fault_entry_to_va_space(uvm_gpu_t *gpu,
|
||||
uvm_fault_buffer_entry_t *fault,
|
||||
uvm_va_space_t **out_va_space)
|
||||
NV_STATUS uvm_parent_gpu_fault_entry_to_va_space(uvm_parent_gpu_t *parent_gpu,
|
||||
uvm_fault_buffer_entry_t *fault,
|
||||
uvm_va_space_t **out_va_space)
|
||||
{
|
||||
uvm_user_channel_t *user_channel;
|
||||
NV_STATUS status = NV_OK;
|
||||
|
||||
*out_va_space = NULL;
|
||||
|
||||
uvm_spin_lock(&gpu->parent->instance_ptr_table_lock);
|
||||
uvm_spin_lock(&parent_gpu->instance_ptr_table_lock);
|
||||
|
||||
user_channel = instance_ptr_to_user_channel(gpu, fault->instance_ptr);
|
||||
user_channel = instance_ptr_to_user_channel(parent_gpu, fault->instance_ptr);
|
||||
if (!user_channel) {
|
||||
status = NV_ERR_INVALID_CHANNEL;
|
||||
goto exit_unlock;
|
||||
@ -3057,14 +3104,14 @@ NV_STATUS uvm_gpu_fault_entry_to_va_space(uvm_gpu_t *gpu,
|
||||
}
|
||||
|
||||
exit_unlock:
|
||||
uvm_spin_unlock(&gpu->parent->instance_ptr_table_lock);
|
||||
uvm_spin_unlock(&parent_gpu->instance_ptr_table_lock);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_gpu_access_counter_entry_to_va_space(uvm_gpu_t *gpu,
|
||||
uvm_access_counter_buffer_entry_t *entry,
|
||||
uvm_va_space_t **out_va_space)
|
||||
NV_STATUS uvm_parent_gpu_access_counter_entry_to_va_space(uvm_parent_gpu_t *parent_gpu,
|
||||
uvm_access_counter_buffer_entry_t *entry,
|
||||
uvm_va_space_t **out_va_space)
|
||||
{
|
||||
uvm_user_channel_t *user_channel;
|
||||
NV_STATUS status = NV_OK;
|
||||
@ -3072,9 +3119,9 @@ NV_STATUS uvm_gpu_access_counter_entry_to_va_space(uvm_gpu_t *gpu,
|
||||
*out_va_space = NULL;
|
||||
UVM_ASSERT(entry->address.is_virtual);
|
||||
|
||||
uvm_spin_lock(&gpu->parent->instance_ptr_table_lock);
|
||||
uvm_spin_lock(&parent_gpu->instance_ptr_table_lock);
|
||||
|
||||
user_channel = instance_ptr_to_user_channel(gpu, entry->virtual_info.instance_ptr);
|
||||
user_channel = instance_ptr_to_user_channel(parent_gpu, entry->virtual_info.instance_ptr);
|
||||
if (!user_channel) {
|
||||
status = NV_ERR_INVALID_CHANNEL;
|
||||
goto exit_unlock;
|
||||
@ -3095,12 +3142,12 @@ NV_STATUS uvm_gpu_access_counter_entry_to_va_space(uvm_gpu_t *gpu,
|
||||
}
|
||||
|
||||
exit_unlock:
|
||||
uvm_spin_unlock(&gpu->parent->instance_ptr_table_lock);
|
||||
uvm_spin_unlock(&parent_gpu->instance_ptr_table_lock);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
void uvm_gpu_remove_user_channel(uvm_gpu_t *gpu, uvm_user_channel_t *user_channel)
|
||||
void uvm_parent_gpu_remove_user_channel(uvm_parent_gpu_t *parent_gpu, uvm_user_channel_t *user_channel)
|
||||
{
|
||||
uvm_va_space_t *va_space;
|
||||
uvm_gpu_va_space_t *gpu_va_space = user_channel->gpu_va_space;
|
||||
@ -3111,10 +3158,10 @@ void uvm_gpu_remove_user_channel(uvm_gpu_t *gpu, uvm_user_channel_t *user_channe
|
||||
va_space = gpu_va_space->va_space;
|
||||
uvm_assert_rwsem_locked_write(&va_space->lock);
|
||||
|
||||
uvm_spin_lock(&gpu->parent->instance_ptr_table_lock);
|
||||
gpu_remove_user_channel_subctx_info_locked(gpu, user_channel);
|
||||
gpu_remove_user_channel_instance_ptr_locked(gpu, user_channel);
|
||||
uvm_spin_unlock(&gpu->parent->instance_ptr_table_lock);
|
||||
uvm_spin_lock(&parent_gpu->instance_ptr_table_lock);
|
||||
parent_gpu_remove_user_channel_subctx_info_locked(parent_gpu, user_channel);
|
||||
parent_gpu_remove_user_channel_instance_ptr_locked(parent_gpu, user_channel);
|
||||
uvm_spin_unlock(&parent_gpu->instance_ptr_table_lock);
|
||||
}
|
||||
|
||||
static NvU64 gpu_addr_to_dma_addr(uvm_parent_gpu_t *parent_gpu, NvU64 gpu_addr)
|
||||
@ -3147,7 +3194,7 @@ static NvU64 dma_addr_to_gpu_addr(uvm_parent_gpu_t *parent_gpu, NvU64 dma_addr)
|
||||
return gpu_addr;
|
||||
}
|
||||
|
||||
void *uvm_gpu_dma_alloc_page(uvm_parent_gpu_t *parent_gpu, gfp_t gfp_flags, NvU64 *dma_address_out)
|
||||
void *uvm_parent_gpu_dma_alloc_page(uvm_parent_gpu_t *parent_gpu, gfp_t gfp_flags, NvU64 *dma_address_out)
|
||||
{
|
||||
NvU64 dma_addr;
|
||||
void *cpu_addr;
|
||||
@ -3162,14 +3209,14 @@ void *uvm_gpu_dma_alloc_page(uvm_parent_gpu_t *parent_gpu, gfp_t gfp_flags, NvU6
|
||||
return cpu_addr;
|
||||
}
|
||||
|
||||
void uvm_gpu_dma_free_page(uvm_parent_gpu_t *parent_gpu, void *va, NvU64 dma_address)
|
||||
void uvm_parent_gpu_dma_free_page(uvm_parent_gpu_t *parent_gpu, void *va, NvU64 dma_address)
|
||||
{
|
||||
dma_address = gpu_addr_to_dma_addr(parent_gpu, dma_address);
|
||||
dma_free_coherent(&parent_gpu->pci_dev->dev, PAGE_SIZE, va, dma_address);
|
||||
atomic64_sub(PAGE_SIZE, &parent_gpu->mapped_cpu_pages_size);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_gpu_map_cpu_pages(uvm_parent_gpu_t *parent_gpu, struct page *page, size_t size, NvU64 *dma_address_out)
|
||||
NV_STATUS uvm_parent_gpu_map_cpu_pages(uvm_parent_gpu_t *parent_gpu, struct page *page, size_t size, NvU64 *dma_address_out)
|
||||
{
|
||||
NvU64 dma_addr;
|
||||
|
||||
@ -3187,7 +3234,7 @@ NV_STATUS uvm_gpu_map_cpu_pages(uvm_parent_gpu_t *parent_gpu, struct page *page,
|
||||
dma_addr + (NvU64)size,
|
||||
parent_gpu->dma_addressable_start,
|
||||
parent_gpu->dma_addressable_limit + 1,
|
||||
parent_gpu->name);
|
||||
uvm_parent_gpu_name(parent_gpu));
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
}
|
||||
|
||||
@ -3197,7 +3244,7 @@ NV_STATUS uvm_gpu_map_cpu_pages(uvm_parent_gpu_t *parent_gpu, struct page *page,
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
void uvm_gpu_unmap_cpu_pages(uvm_parent_gpu_t *parent_gpu, NvU64 dma_address, size_t size)
|
||||
void uvm_parent_gpu_unmap_cpu_pages(uvm_parent_gpu_t *parent_gpu, NvU64 dma_address, size_t size)
|
||||
{
|
||||
UVM_ASSERT(PAGE_ALIGNED(size));
|
||||
|
||||
|
@ -57,14 +57,16 @@
|
||||
|
||||
typedef struct
|
||||
{
|
||||
// Number of faults from this uTLB that have been fetched but have not been serviced yet
|
||||
// Number of faults from this uTLB that have been fetched but have not been
|
||||
// serviced yet.
|
||||
NvU32 num_pending_faults;
|
||||
|
||||
// Whether the uTLB contains fatal faults
|
||||
bool has_fatal_faults;
|
||||
|
||||
// We have issued a replay of type START_ACK_ALL while containing fatal faults. This puts
|
||||
// the uTLB in lockdown mode and no new translations are accepted
|
||||
// We have issued a replay of type START_ACK_ALL while containing fatal
|
||||
// faults. This puts the uTLB in lockdown mode and no new translations are
|
||||
// accepted.
|
||||
bool in_lockdown;
|
||||
|
||||
// We have issued a cancel on this uTLB
|
||||
@ -126,8 +128,8 @@ struct uvm_service_block_context_struct
|
||||
struct list_head service_context_list;
|
||||
|
||||
// A mask of GPUs that need to be checked for ECC errors before the CPU
|
||||
// fault handler returns, but after the VA space lock has been unlocked to
|
||||
// avoid the RM/UVM VA space lock deadlocks.
|
||||
// fault handler returns, but after the VA space lock has been unlocked
|
||||
// to avoid the RM/UVM VA space lock deadlocks.
|
||||
uvm_processor_mask_t gpus_to_check_for_ecc;
|
||||
|
||||
// This is set to throttle page fault thrashing.
|
||||
@ -160,9 +162,9 @@ struct uvm_service_block_context_struct
|
||||
|
||||
struct
|
||||
{
|
||||
// Per-processor mask with the pages that will be resident after servicing.
|
||||
// We need one mask per processor because we may coalesce faults that
|
||||
// trigger migrations to different processors.
|
||||
// Per-processor mask with the pages that will be resident after
|
||||
// servicing. We need one mask per processor because we may coalesce
|
||||
// faults that trigger migrations to different processors.
|
||||
uvm_page_mask_t new_residency;
|
||||
} per_processor_masks[UVM_ID_MAX_PROCESSORS];
|
||||
|
||||
@ -179,23 +181,28 @@ struct uvm_service_block_context_struct
|
||||
typedef struct
|
||||
{
|
||||
// Mask of read faulted pages in a UVM_VA_BLOCK_SIZE aligned region of a SAM
|
||||
// VMA. Used for batching ATS faults in a vma.
|
||||
// VMA. Used for batching ATS faults in a vma. This is unused for access
|
||||
// counter service requests.
|
||||
uvm_page_mask_t read_fault_mask;
|
||||
|
||||
// Mask of write faulted pages in a UVM_VA_BLOCK_SIZE aligned region of a
|
||||
// SAM VMA. Used for batching ATS faults in a vma.
|
||||
// SAM VMA. Used for batching ATS faults in a vma. This is unused for access
|
||||
// counter service requests.
|
||||
uvm_page_mask_t write_fault_mask;
|
||||
|
||||
// Mask of successfully serviced pages in a UVM_VA_BLOCK_SIZE aligned region
|
||||
// of a SAM VMA. Used to return ATS fault status.
|
||||
// of a SAM VMA. Used to return ATS fault status. This is unused for access
|
||||
// counter service requests.
|
||||
uvm_page_mask_t faults_serviced_mask;
|
||||
|
||||
// Mask of successfully serviced read faults on pages in write_fault_mask.
|
||||
// This is unused for access counter service requests.
|
||||
uvm_page_mask_t reads_serviced_mask;
|
||||
|
||||
// Mask of all faulted pages in a UVM_VA_BLOCK_SIZE aligned region of a
|
||||
// SAM VMA. This is used as input to the prefetcher.
|
||||
uvm_page_mask_t faulted_mask;
|
||||
// Mask of all accessed pages in a UVM_VA_BLOCK_SIZE aligned region of a SAM
|
||||
// VMA. This is used as input for access counter service requests and output
|
||||
// of fault service requests.
|
||||
uvm_page_mask_t accessed_mask;
|
||||
|
||||
// Client type of the service requestor.
|
||||
uvm_fault_client_type_t client_type;
|
||||
@ -294,11 +301,8 @@ struct uvm_fault_service_batch_context_struct
|
||||
|
||||
struct uvm_ats_fault_invalidate_struct
|
||||
{
|
||||
// Whether the TLB batch contains any information
|
||||
bool write_faults_in_batch;
|
||||
|
||||
// Batch of TLB entries to be invalidated
|
||||
uvm_tlb_batch_t write_faults_tlb_batch;
|
||||
bool tlb_batch_pending;
|
||||
uvm_tlb_batch_t tlb_batch;
|
||||
};
|
||||
|
||||
typedef struct
|
||||
@ -443,20 +447,9 @@ struct uvm_access_counter_service_batch_context_struct
|
||||
NvU32 num_notifications;
|
||||
|
||||
// Boolean used to avoid sorting the fault batch by instance_ptr if we
|
||||
// determine at fetch time that all the access counter notifications in the
|
||||
// batch report the same instance_ptr
|
||||
// determine at fetch time that all the access counter notifications in
|
||||
// the batch report the same instance_ptr
|
||||
bool is_single_instance_ptr;
|
||||
|
||||
// Scratch space, used to generate artificial physically addressed notifications.
|
||||
// Virtual address notifications are always aligned to 64k. This means up to 16
|
||||
// different physical locations could have been accessed to trigger one notification.
|
||||
// The sub-granularity mask can correspond to any of them.
|
||||
struct
|
||||
{
|
||||
uvm_processor_id_t resident_processors[16];
|
||||
uvm_gpu_phys_address_t phys_addresses[16];
|
||||
uvm_access_counter_buffer_entry_t phys_entry;
|
||||
} scratch;
|
||||
} virt;
|
||||
|
||||
struct
|
||||
@ -467,8 +460,8 @@ struct uvm_access_counter_service_batch_context_struct
|
||||
NvU32 num_notifications;
|
||||
|
||||
// Boolean used to avoid sorting the fault batch by aperture if we
|
||||
// determine at fetch time that all the access counter notifications in the
|
||||
// batch report the same aperture
|
||||
// determine at fetch time that all the access counter notifications in
|
||||
// the batch report the same aperture
|
||||
bool is_single_aperture;
|
||||
} phys;
|
||||
|
||||
@ -478,6 +471,9 @@ struct uvm_access_counter_service_batch_context_struct
|
||||
// Structure used to coalesce access counter servicing in a VA block
|
||||
uvm_service_block_context_t block_service_context;
|
||||
|
||||
// Structure used to service access counter migrations in an ATS block.
|
||||
uvm_ats_fault_context_t ats_context;
|
||||
|
||||
// Unique id (per-GPU) generated for tools events recording
|
||||
NvU32 batch_id;
|
||||
};
|
||||
@ -610,10 +606,22 @@ typedef enum
|
||||
UVM_GPU_PEER_COPY_MODE_COUNT
|
||||
} uvm_gpu_peer_copy_mode_t;
|
||||
|
||||
// In order to support SMC/MIG GPU partitions, we split UVM GPUs into two
|
||||
// parts: parent GPUs (uvm_parent_gpu_t) which represent unique PCIe devices
|
||||
// (including VFs), and sub/child GPUs (uvm_gpu_t) which represent individual
|
||||
// partitions within the parent. The parent GPU and partition GPU have
|
||||
// different "id" and "uuid".
|
||||
struct uvm_gpu_struct
|
||||
{
|
||||
uvm_parent_gpu_t *parent;
|
||||
|
||||
// The gpu's GI uuid if SMC is enabled; otherwise, a copy of parent->uuid.
|
||||
NvProcessorUuid uuid;
|
||||
|
||||
// Nice printable name in the format: ID: 999: UVM-GPU-<parent_uuid>.
|
||||
// UVM_GPU_UUID_TEXT_BUFFER_LENGTH includes the null character.
|
||||
char name[9 + UVM_GPU_UUID_TEXT_BUFFER_LENGTH];
|
||||
|
||||
// Refcount of the gpu, i.e. how many times it has been retained. This is
|
||||
// roughly a count of how many times it has been registered with a VA space,
|
||||
// except that some paths retain the GPU temporarily without a VA space.
|
||||
@ -632,13 +640,9 @@ struct uvm_gpu_struct
|
||||
// user can create a lot of va spaces and register the gpu with them).
|
||||
atomic64_t retained_count;
|
||||
|
||||
// A unique uvm gpu id in range [1, UVM_ID_MAX_PROCESSORS); this is a copy
|
||||
// of the parent's id.
|
||||
// A unique uvm gpu id in range [1, UVM_ID_MAX_PROCESSORS).
|
||||
uvm_gpu_id_t id;
|
||||
|
||||
// A unique uvm global_gpu id in range [1, UVM_GLOBAL_ID_MAX_PROCESSORS)
|
||||
uvm_global_gpu_id_t global_id;
|
||||
|
||||
// Should be UVM_GPU_MAGIC_VALUE. Used for memory checking.
|
||||
NvU64 magic;
|
||||
|
||||
@ -664,8 +668,8 @@ struct uvm_gpu_struct
|
||||
struct
|
||||
{
|
||||
// Big page size used by the internal UVM VA space
|
||||
// Notably it may be different than the big page size used by a user's VA
|
||||
// space in general.
|
||||
// Notably it may be different than the big page size used by a user's
|
||||
// VA space in general.
|
||||
NvU32 internal_size;
|
||||
} big_page;
|
||||
|
||||
@ -691,8 +695,8 @@ struct uvm_gpu_struct
|
||||
// lazily-populated array of peer GPUs, indexed by the peer's GPU index
|
||||
uvm_gpu_t *peer_gpus[UVM_ID_MAX_GPUS];
|
||||
|
||||
// Leaf spinlock used to synchronize access to the peer_gpus table so that
|
||||
// it can be safely accessed from the access counters bottom half
|
||||
// Leaf spinlock used to synchronize access to the peer_gpus table so
|
||||
// that it can be safely accessed from the access counters bottom half
|
||||
uvm_spinlock_t peer_gpus_lock;
|
||||
} peer_info;
|
||||
|
||||
@ -852,6 +856,11 @@ struct uvm_gpu_struct
|
||||
bool uvm_test_force_upper_pushbuffer_segment;
|
||||
};
|
||||
|
||||
// In order to support SMC/MIG GPU partitions, we split UVM GPUs into two
|
||||
// parts: parent GPUs (uvm_parent_gpu_t) which represent unique PCIe devices
|
||||
// (including VFs), and sub/child GPUs (uvm_gpu_t) which represent individual
|
||||
// partitions within the parent. The parent GPU and partition GPU have
|
||||
// different "id" and "uuid".
|
||||
struct uvm_parent_gpu_struct
|
||||
{
|
||||
// Reference count for how many places are holding on to a parent GPU
|
||||
@ -864,11 +873,11 @@ struct uvm_parent_gpu_struct
|
||||
// The number of uvm_gpu_ts referencing this uvm_parent_gpu_t.
|
||||
NvU32 num_retained_gpus;
|
||||
|
||||
uvm_gpu_t *gpus[UVM_ID_MAX_SUB_PROCESSORS];
|
||||
uvm_gpu_t *gpus[UVM_PARENT_ID_MAX_SUB_PROCESSORS];
|
||||
|
||||
// Bitmap of valid child entries in the gpus[] table. Used to retrieve a
|
||||
// usable child GPU in bottom-halves.
|
||||
DECLARE_BITMAP(valid_gpus, UVM_ID_MAX_SUB_PROCESSORS);
|
||||
DECLARE_BITMAP(valid_gpus, UVM_PARENT_ID_MAX_SUB_PROCESSORS);
|
||||
|
||||
// The gpu's uuid
|
||||
NvProcessorUuid uuid;
|
||||
@ -880,8 +889,8 @@ struct uvm_parent_gpu_struct
|
||||
// hardware classes, etc.).
|
||||
UvmGpuInfo rm_info;
|
||||
|
||||
// A unique uvm gpu id in range [1, UVM_ID_MAX_PROCESSORS)
|
||||
uvm_gpu_id_t id;
|
||||
// A unique uvm gpu id in range [1, UVM_PARENT_ID_MAX_PROCESSORS)
|
||||
uvm_parent_gpu_id_t id;
|
||||
|
||||
// Reference to the Linux PCI device
|
||||
//
|
||||
@ -916,12 +925,13 @@ struct uvm_parent_gpu_struct
|
||||
// dma_addressable_start (in bifSetupDmaWindow_IMPL()) and hence when
|
||||
// referencing sysmem from the GPU, dma_addressable_start should be
|
||||
// subtracted from the physical address. The DMA mapping helpers like
|
||||
// uvm_gpu_map_cpu_pages() and uvm_gpu_dma_alloc_page() take care of that.
|
||||
// uvm_parent_gpu_map_cpu_pages() and uvm_parent_gpu_dma_alloc_page() take
|
||||
// care of that.
|
||||
NvU64 dma_addressable_start;
|
||||
NvU64 dma_addressable_limit;
|
||||
|
||||
// Total size (in bytes) of physically mapped (with uvm_gpu_map_cpu_pages)
|
||||
// sysmem pages, used for leak detection.
|
||||
// Total size (in bytes) of physically mapped (with
|
||||
// uvm_parent_gpu_map_cpu_pages) sysmem pages, used for leak detection.
|
||||
atomic64_t mapped_cpu_pages_size;
|
||||
|
||||
// Hardware Abstraction Layer
|
||||
@ -940,7 +950,11 @@ struct uvm_parent_gpu_struct
|
||||
// Virtualization mode of the GPU.
|
||||
UVM_VIRT_MODE virt_mode;
|
||||
|
||||
// Whether the GPU can trigger faults on prefetch instructions
|
||||
// Pascal+ GPUs can trigger faults on prefetch instructions. If false, this
|
||||
// feature must be disabled at all times in GPUs of the given architecture.
|
||||
// If true, the feature can be toggled at will by SW.
|
||||
//
|
||||
// The field should not be used unless the GPU supports replayable faults.
|
||||
bool prefetch_fault_supported;
|
||||
|
||||
// Number of membars required to flush out HSHUB following a TLB invalidate
|
||||
@ -955,6 +969,11 @@ struct uvm_parent_gpu_struct
|
||||
|
||||
bool access_counters_supported;
|
||||
|
||||
// If this is true, physical address based access counter notifications are
|
||||
// potentially generated. If false, only virtual address based notifications
|
||||
// are generated (assuming access_counters_supported is true too).
|
||||
bool access_counters_can_use_physical_addresses;
|
||||
|
||||
bool fault_cancel_va_supported;
|
||||
|
||||
// True if the GPU has hardware support for scoped atomics
|
||||
@ -981,6 +1000,10 @@ struct uvm_parent_gpu_struct
|
||||
|
||||
bool plc_supported;
|
||||
|
||||
// If true, page_tree initialization pre-populates no_ats_ranges. It only
|
||||
// affects ATS systems.
|
||||
bool no_ats_range_required;
|
||||
|
||||
// Parameters used by the TLB batching API
|
||||
struct
|
||||
{
|
||||
@ -1052,14 +1075,16 @@ struct uvm_parent_gpu_struct
|
||||
// Interrupt handling state and locks
|
||||
uvm_isr_info_t isr;
|
||||
|
||||
// Fault buffer info. This is only valid if supports_replayable_faults is set to true
|
||||
// Fault buffer info. This is only valid if supports_replayable_faults is
|
||||
// set to true.
|
||||
uvm_fault_buffer_info_t fault_buffer_info;
|
||||
|
||||
// PMM lazy free processing queue.
|
||||
// TODO: Bug 3881835: revisit whether to use nv_kthread_q_t or workqueue.
|
||||
nv_kthread_q_t lazy_free_q;
|
||||
|
||||
// Access counter buffer info. This is only valid if supports_access_counters is set to true
|
||||
// Access counter buffer info. This is only valid if
|
||||
// supports_access_counters is set to true.
|
||||
uvm_access_counter_buffer_info_t access_counter_buffer_info;
|
||||
|
||||
// Number of uTLBs per GPC. This information is only valid on Pascal+ GPUs.
|
||||
@ -1109,7 +1134,7 @@ struct uvm_parent_gpu_struct
|
||||
uvm_rb_tree_t instance_ptr_table;
|
||||
uvm_spinlock_t instance_ptr_table_lock;
|
||||
|
||||
// This is set to true if the GPU belongs to an SLI group. Else, set to false.
|
||||
// This is set to true if the GPU belongs to an SLI group.
|
||||
bool sli_enabled;
|
||||
|
||||
struct
|
||||
@ -1136,8 +1161,8 @@ struct uvm_parent_gpu_struct
|
||||
// environment, rather than using the peer-id field of the PTE (which can
|
||||
// only address 8 gpus), all gpus are assigned a 47-bit physical address
|
||||
// space by the fabric manager. Any physical address access to these
|
||||
// physical address spaces are routed through the switch to the corresponding
|
||||
// peer.
|
||||
// physical address spaces are routed through the switch to the
|
||||
// corresponding peer.
|
||||
struct
|
||||
{
|
||||
bool is_nvswitch_connected;
|
||||
@ -1175,9 +1200,14 @@ struct uvm_parent_gpu_struct
|
||||
} smmu_war;
|
||||
};
|
||||
|
||||
static const char *uvm_parent_gpu_name(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
return parent_gpu->name;
|
||||
}
|
||||
|
||||
static const char *uvm_gpu_name(uvm_gpu_t *gpu)
|
||||
{
|
||||
return gpu->parent->name;
|
||||
return gpu->name;
|
||||
}
|
||||
|
||||
static const NvProcessorUuid *uvm_gpu_uuid(uvm_gpu_t *gpu)
|
||||
@ -1362,7 +1392,8 @@ void uvm_gpu_release_pcie_peer_access(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1);
|
||||
// They must not be the same gpu.
|
||||
uvm_aperture_t uvm_gpu_peer_aperture(uvm_gpu_t *local_gpu, uvm_gpu_t *remote_gpu);
|
||||
|
||||
// Get the processor id accessible by the given GPU for the given physical address
|
||||
// Get the processor id accessible by the given GPU for the given physical
|
||||
// address.
|
||||
uvm_processor_id_t uvm_gpu_get_processor_id_by_address(uvm_gpu_t *gpu, uvm_gpu_phys_address_t addr);
|
||||
|
||||
// Get the P2P capabilities between the gpus with the given indexes
|
||||
@ -1407,10 +1438,11 @@ static bool uvm_gpus_are_indirect_peers(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1)
|
||||
// mapping covering the passed address, has been previously created.
|
||||
static uvm_gpu_address_t uvm_gpu_address_virtual_from_vidmem_phys(uvm_gpu_t *gpu, NvU64 pa)
|
||||
{
|
||||
UVM_ASSERT(uvm_mmu_gpu_needs_static_vidmem_mapping(gpu) || uvm_mmu_gpu_needs_dynamic_vidmem_mapping(gpu));
|
||||
UVM_ASSERT(uvm_mmu_parent_gpu_needs_static_vidmem_mapping(gpu->parent) ||
|
||||
uvm_mmu_parent_gpu_needs_dynamic_vidmem_mapping(gpu->parent));
|
||||
UVM_ASSERT(pa <= gpu->mem_info.max_allocatable_address);
|
||||
|
||||
if (uvm_mmu_gpu_needs_static_vidmem_mapping(gpu))
|
||||
if (uvm_mmu_parent_gpu_needs_static_vidmem_mapping(gpu->parent))
|
||||
UVM_ASSERT(gpu->static_flat_mapping.ready);
|
||||
|
||||
return uvm_gpu_address_virtual(gpu->parent->flat_vidmem_va_base + pa);
|
||||
@ -1422,12 +1454,12 @@ static uvm_gpu_address_t uvm_gpu_address_virtual_from_vidmem_phys(uvm_gpu_t *gpu
|
||||
//
|
||||
// The actual GPU mapping only exists if a linear mapping covering the passed
|
||||
// address has been previously created.
|
||||
static uvm_gpu_address_t uvm_gpu_address_virtual_from_sysmem_phys(uvm_gpu_t *gpu, NvU64 pa)
|
||||
static uvm_gpu_address_t uvm_parent_gpu_address_virtual_from_sysmem_phys(uvm_parent_gpu_t *parent_gpu, NvU64 pa)
|
||||
{
|
||||
UVM_ASSERT(uvm_mmu_gpu_needs_dynamic_sysmem_mapping(gpu));
|
||||
UVM_ASSERT(pa <= (gpu->parent->dma_addressable_limit - gpu->parent->dma_addressable_start));
|
||||
UVM_ASSERT(uvm_mmu_parent_gpu_needs_dynamic_sysmem_mapping(parent_gpu));
|
||||
UVM_ASSERT(pa <= (parent_gpu->dma_addressable_limit - parent_gpu->dma_addressable_start));
|
||||
|
||||
return uvm_gpu_address_virtual(gpu->parent->flat_sysmem_va_base + pa);
|
||||
return uvm_gpu_address_virtual(parent_gpu->flat_sysmem_va_base + pa);
|
||||
}
|
||||
|
||||
// Given a GPU or CPU physical address (not peer), retrieve an address suitable
|
||||
@ -1437,11 +1469,12 @@ static uvm_gpu_address_t uvm_gpu_address_copy(uvm_gpu_t *gpu, uvm_gpu_phys_addre
|
||||
UVM_ASSERT(phys_addr.aperture == UVM_APERTURE_VID || phys_addr.aperture == UVM_APERTURE_SYS);
|
||||
|
||||
if (phys_addr.aperture == UVM_APERTURE_VID) {
|
||||
if (uvm_mmu_gpu_needs_static_vidmem_mapping(gpu) || uvm_mmu_gpu_needs_dynamic_vidmem_mapping(gpu))
|
||||
if (uvm_mmu_parent_gpu_needs_static_vidmem_mapping(gpu->parent) ||
|
||||
uvm_mmu_parent_gpu_needs_dynamic_vidmem_mapping(gpu->parent))
|
||||
return uvm_gpu_address_virtual_from_vidmem_phys(gpu, phys_addr.address);
|
||||
}
|
||||
else if (uvm_mmu_gpu_needs_dynamic_sysmem_mapping(gpu)) {
|
||||
return uvm_gpu_address_virtual_from_sysmem_phys(gpu, phys_addr.address);
|
||||
else if (uvm_mmu_parent_gpu_needs_dynamic_sysmem_mapping(gpu->parent)) {
|
||||
return uvm_parent_gpu_address_virtual_from_sysmem_phys(gpu->parent, phys_addr.address);
|
||||
}
|
||||
|
||||
return uvm_gpu_address_from_phys(phys_addr);
|
||||
@ -1459,9 +1492,9 @@ NV_STATUS uvm_gpu_check_ecc_error(uvm_gpu_t *gpu);
|
||||
|
||||
// Check for ECC errors without calling into RM
|
||||
//
|
||||
// Calling into RM is problematic in many places, this check is always safe to do.
|
||||
// Returns NV_WARN_MORE_PROCESSING_REQUIRED if there might be an ECC error and
|
||||
// it's required to call uvm_gpu_check_ecc_error() to be sure.
|
||||
// Calling into RM is problematic in many places, this check is always safe to
|
||||
// do. Returns NV_WARN_MORE_PROCESSING_REQUIRED if there might be an ECC error
|
||||
// and it's required to call uvm_gpu_check_ecc_error() to be sure.
|
||||
NV_STATUS uvm_gpu_check_ecc_error_no_rm(uvm_gpu_t *gpu);
|
||||
|
||||
// Map size bytes of contiguous sysmem on the GPU for physical access
|
||||
@ -1470,19 +1503,19 @@ NV_STATUS uvm_gpu_check_ecc_error_no_rm(uvm_gpu_t *gpu);
|
||||
//
|
||||
// Returns the physical address of the pages that can be used to access them on
|
||||
// the GPU.
|
||||
NV_STATUS uvm_gpu_map_cpu_pages(uvm_parent_gpu_t *parent_gpu, struct page *page, size_t size, NvU64 *dma_address_out);
|
||||
NV_STATUS uvm_parent_gpu_map_cpu_pages(uvm_parent_gpu_t *parent_gpu, struct page *page, size_t size, NvU64 *dma_address_out);
|
||||
|
||||
// Unmap num_pages pages previously mapped with uvm_gpu_map_cpu_pages().
|
||||
void uvm_gpu_unmap_cpu_pages(uvm_parent_gpu_t *parent_gpu, NvU64 dma_address, size_t size);
|
||||
// Unmap num_pages pages previously mapped with uvm_parent_gpu_map_cpu_pages().
|
||||
void uvm_parent_gpu_unmap_cpu_pages(uvm_parent_gpu_t *parent_gpu, NvU64 dma_address, size_t size);
|
||||
|
||||
static NV_STATUS uvm_gpu_map_cpu_page(uvm_parent_gpu_t *parent_gpu, struct page *page, NvU64 *dma_address_out)
|
||||
static NV_STATUS uvm_parent_gpu_map_cpu_page(uvm_parent_gpu_t *parent_gpu, struct page *page, NvU64 *dma_address_out)
|
||||
{
|
||||
return uvm_gpu_map_cpu_pages(parent_gpu, page, PAGE_SIZE, dma_address_out);
|
||||
return uvm_parent_gpu_map_cpu_pages(parent_gpu, page, PAGE_SIZE, dma_address_out);
|
||||
}
|
||||
|
||||
static void uvm_gpu_unmap_cpu_page(uvm_parent_gpu_t *parent_gpu, NvU64 dma_address)
|
||||
static void uvm_parent_gpu_unmap_cpu_page(uvm_parent_gpu_t *parent_gpu, NvU64 dma_address)
|
||||
{
|
||||
uvm_gpu_unmap_cpu_pages(parent_gpu, dma_address, PAGE_SIZE);
|
||||
uvm_parent_gpu_unmap_cpu_pages(parent_gpu, dma_address, PAGE_SIZE);
|
||||
}
|
||||
|
||||
// Allocate and map a page of system DMA memory on the GPU for physical access
|
||||
@ -1491,13 +1524,13 @@ static void uvm_gpu_unmap_cpu_page(uvm_parent_gpu_t *parent_gpu, NvU64 dma_addre
|
||||
// - the address of the page that can be used to access them on
|
||||
// the GPU in the dma_address_out parameter.
|
||||
// - the address of allocated memory in CPU virtual address space.
|
||||
void *uvm_gpu_dma_alloc_page(uvm_parent_gpu_t *parent_gpu,
|
||||
gfp_t gfp_flags,
|
||||
NvU64 *dma_address_out);
|
||||
void *uvm_parent_gpu_dma_alloc_page(uvm_parent_gpu_t *parent_gpu,
|
||||
gfp_t gfp_flags,
|
||||
NvU64 *dma_address_out);
|
||||
|
||||
// Unmap and free size bytes of contiguous sysmem DMA previously allocated
|
||||
// with uvm_gpu_map_cpu_pages().
|
||||
void uvm_gpu_dma_free_page(uvm_parent_gpu_t *parent_gpu, void *va, NvU64 dma_address);
|
||||
// with uvm_parent_gpu_map_cpu_pages().
|
||||
void uvm_parent_gpu_dma_free_page(uvm_parent_gpu_t *parent_gpu, void *va, NvU64 dma_address);
|
||||
|
||||
// Returns whether the given range is within the GPU's addressable VA ranges.
|
||||
// It requires the input 'addr' to be in canonical form for platforms compliant
|
||||
@ -1518,6 +1551,8 @@ bool uvm_gpu_can_address(uvm_gpu_t *gpu, NvU64 addr, NvU64 size);
|
||||
// The GPU must be initialized before calling this function.
|
||||
bool uvm_gpu_can_address_kernel(uvm_gpu_t *gpu, NvU64 addr, NvU64 size);
|
||||
|
||||
bool uvm_platform_uses_canonical_form_address(void);
|
||||
|
||||
// Returns addr's canonical form for host systems that use canonical form
|
||||
// addresses.
|
||||
NvU64 uvm_parent_gpu_canonical_address(uvm_parent_gpu_t *parent_gpu, NvU64 addr);
|
||||
@ -1527,47 +1562,49 @@ static bool uvm_parent_gpu_is_coherent(const uvm_parent_gpu_t *parent_gpu)
|
||||
return parent_gpu->system_bus.memory_window_end > parent_gpu->system_bus.memory_window_start;
|
||||
}
|
||||
|
||||
static bool uvm_gpu_has_pushbuffer_segments(uvm_gpu_t *gpu)
|
||||
static bool uvm_parent_gpu_needs_pushbuffer_segments(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
return gpu->parent->max_host_va > (1ull << 40);
|
||||
return parent_gpu->max_host_va > (1ull << 40);
|
||||
}
|
||||
|
||||
static bool uvm_gpu_supports_eviction(uvm_gpu_t *gpu)
|
||||
static bool uvm_parent_gpu_supports_eviction(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
// Eviction is supported only if the GPU supports replayable faults
|
||||
return gpu->parent->replayable_faults_supported;
|
||||
return parent_gpu->replayable_faults_supported;
|
||||
}
|
||||
|
||||
static bool uvm_gpu_is_virt_mode_sriov_heavy(const uvm_gpu_t *gpu)
|
||||
static bool uvm_parent_gpu_is_virt_mode_sriov_heavy(const uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
return gpu->parent->virt_mode == UVM_VIRT_MODE_SRIOV_HEAVY;
|
||||
return parent_gpu->virt_mode == UVM_VIRT_MODE_SRIOV_HEAVY;
|
||||
}
|
||||
|
||||
static bool uvm_gpu_is_virt_mode_sriov_standard(const uvm_gpu_t *gpu)
|
||||
static bool uvm_parent_gpu_is_virt_mode_sriov_standard(const uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
return gpu->parent->virt_mode == UVM_VIRT_MODE_SRIOV_STANDARD;
|
||||
return parent_gpu->virt_mode == UVM_VIRT_MODE_SRIOV_STANDARD;
|
||||
}
|
||||
|
||||
// Returns true if the virtualization mode is SR-IOV heavy or SR-IOV standard.
|
||||
static bool uvm_gpu_is_virt_mode_sriov(const uvm_gpu_t *gpu)
|
||||
static bool uvm_parent_gpu_is_virt_mode_sriov(const uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
return uvm_gpu_is_virt_mode_sriov_heavy(gpu) || uvm_gpu_is_virt_mode_sriov_standard(gpu);
|
||||
return uvm_parent_gpu_is_virt_mode_sriov_heavy(parent_gpu) ||
|
||||
uvm_parent_gpu_is_virt_mode_sriov_standard(parent_gpu);
|
||||
}
|
||||
|
||||
static bool uvm_gpu_uses_proxy_channel_pool(const uvm_gpu_t *gpu)
|
||||
static bool uvm_parent_gpu_needs_proxy_channel_pool(const uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
return uvm_gpu_is_virt_mode_sriov_heavy(gpu);
|
||||
return uvm_parent_gpu_is_virt_mode_sriov_heavy(parent_gpu);
|
||||
}
|
||||
|
||||
uvm_aperture_t uvm_gpu_page_tree_init_location(const uvm_gpu_t *gpu);
|
||||
uvm_aperture_t uvm_get_page_tree_location(const uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
// Debug print of GPU properties
|
||||
void uvm_gpu_print(uvm_gpu_t *gpu);
|
||||
|
||||
// Add the given instance pointer -> user_channel mapping to this GPU. The bottom
|
||||
// half GPU page fault handler uses this to look up the VA space for GPU faults.
|
||||
NV_STATUS uvm_gpu_add_user_channel(uvm_gpu_t *gpu, uvm_user_channel_t *user_channel);
|
||||
void uvm_gpu_remove_user_channel(uvm_gpu_t *gpu, uvm_user_channel_t *user_channel);
|
||||
// Add the given instance pointer -> user_channel mapping to this GPU. The
|
||||
// bottom half GPU page fault handler uses this to look up the VA space for GPU
|
||||
// faults.
|
||||
NV_STATUS uvm_parent_gpu_add_user_channel(uvm_parent_gpu_t *parent_gpu, uvm_user_channel_t *user_channel);
|
||||
void uvm_parent_gpu_remove_user_channel(uvm_parent_gpu_t *parent_gpu, uvm_user_channel_t *user_channel);
|
||||
|
||||
// Looks up an entry added by uvm_gpu_add_user_channel. Return codes:
|
||||
// NV_OK Translation successful
|
||||
@ -1578,13 +1615,13 @@ void uvm_gpu_remove_user_channel(uvm_gpu_t *gpu, uvm_user_channel_t *user_channe
|
||||
// out_va_space is valid if NV_OK is returned, otherwise it's NULL. The caller
|
||||
// is responsibile for ensuring that the returned va_space can't be destroyed,
|
||||
// so these functions should only be called from the bottom half.
|
||||
NV_STATUS uvm_gpu_fault_entry_to_va_space(uvm_gpu_t *gpu,
|
||||
uvm_fault_buffer_entry_t *fault,
|
||||
uvm_va_space_t **out_va_space);
|
||||
NV_STATUS uvm_parent_gpu_fault_entry_to_va_space(uvm_parent_gpu_t *parent_gpu,
|
||||
uvm_fault_buffer_entry_t *fault,
|
||||
uvm_va_space_t **out_va_space);
|
||||
|
||||
NV_STATUS uvm_gpu_access_counter_entry_to_va_space(uvm_gpu_t *gpu,
|
||||
uvm_access_counter_buffer_entry_t *entry,
|
||||
uvm_va_space_t **out_va_space);
|
||||
NV_STATUS uvm_parent_gpu_access_counter_entry_to_va_space(uvm_parent_gpu_t *parent_gpu,
|
||||
uvm_access_counter_buffer_entry_t *entry,
|
||||
uvm_va_space_t **out_va_space);
|
||||
|
||||
typedef enum
|
||||
{
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2017-2022 NVIDIA Corporation
|
||||
Copyright (c) 2017-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -33,17 +33,18 @@
|
||||
#include "uvm_va_space_mm.h"
|
||||
#include "uvm_pmm_sysmem.h"
|
||||
#include "uvm_perf_module.h"
|
||||
#include "uvm_ats_ibm.h"
|
||||
#include "uvm_ats_faults.h"
|
||||
|
||||
#define UVM_PERF_ACCESS_COUNTER_BATCH_COUNT_MIN 1
|
||||
#define UVM_PERF_ACCESS_COUNTER_BATCH_COUNT_DEFAULT 256
|
||||
#define UVM_PERF_ACCESS_COUNTER_GRANULARITY_DEFAULT "2m"
|
||||
#define UVM_PERF_ACCESS_COUNTER_GRANULARITY UVM_ACCESS_COUNTER_GRANULARITY_2M
|
||||
#define UVM_PERF_ACCESS_COUNTER_THRESHOLD_MIN 1
|
||||
#define UVM_PERF_ACCESS_COUNTER_THRESHOLD_MAX ((1 << 16) - 1)
|
||||
#define UVM_PERF_ACCESS_COUNTER_THRESHOLD_DEFAULT 256
|
||||
|
||||
#define UVM_ACCESS_COUNTER_ACTION_NOTIFY 0x1
|
||||
#define UVM_ACCESS_COUNTER_ACTION_CLEAR 0x2
|
||||
#define UVM_ACCESS_COUNTER_ON_MANAGED 0x4
|
||||
#define UVM_ACCESS_COUNTER_ACTION_CLEAR 0x1
|
||||
#define UVM_ACCESS_COUNTER_PHYS_ON_MANAGED 0x2
|
||||
|
||||
// Each page in a tracked physical range may belong to a different VA Block. We
|
||||
// preallocate an array of reverse map translations. However, access counter
|
||||
@ -54,12 +55,6 @@
|
||||
#define UVM_MAX_TRANSLATION_SIZE (2 * 1024 * 1024ULL)
|
||||
#define UVM_SUB_GRANULARITY_REGIONS 32
|
||||
|
||||
// The GPU offers the following tracking granularities: 64K, 2M, 16M, 16G
|
||||
//
|
||||
// Use the largest granularity to minimize the number of access counter
|
||||
// notifications. This is fine because we simply drop the notifications during
|
||||
// normal operation, and tests override these values.
|
||||
static UVM_ACCESS_COUNTER_GRANULARITY g_uvm_access_counter_granularity;
|
||||
static unsigned g_uvm_access_counter_threshold;
|
||||
|
||||
// Per-VA space access counters information
|
||||
@ -87,7 +82,6 @@ static int uvm_perf_access_counter_momc_migration_enable = -1;
|
||||
static unsigned uvm_perf_access_counter_batch_count = UVM_PERF_ACCESS_COUNTER_BATCH_COUNT_DEFAULT;
|
||||
|
||||
// See module param documentation below
|
||||
static char *uvm_perf_access_counter_granularity = UVM_PERF_ACCESS_COUNTER_GRANULARITY_DEFAULT;
|
||||
static unsigned uvm_perf_access_counter_threshold = UVM_PERF_ACCESS_COUNTER_THRESHOLD_DEFAULT;
|
||||
|
||||
// Module parameters for the tunables
|
||||
@ -100,10 +94,6 @@ MODULE_PARM_DESC(uvm_perf_access_counter_momc_migration_enable,
|
||||
"Whether MOMC access counters will trigger migrations."
|
||||
"Valid values: <= -1 (default policy), 0 (off), >= 1 (on)");
|
||||
module_param(uvm_perf_access_counter_batch_count, uint, S_IRUGO);
|
||||
module_param(uvm_perf_access_counter_granularity, charp, S_IRUGO);
|
||||
MODULE_PARM_DESC(uvm_perf_access_counter_granularity,
|
||||
"Size of the physical memory region tracked by each counter. Valid values as"
|
||||
"of Volta: 64k, 2m, 16m, 16g");
|
||||
module_param(uvm_perf_access_counter_threshold, uint, S_IRUGO);
|
||||
MODULE_PARM_DESC(uvm_perf_access_counter_threshold,
|
||||
"Number of remote accesses on a region required to trigger a notification."
|
||||
@ -136,7 +126,7 @@ static va_space_access_counters_info_t *va_space_access_counters_info_get(uvm_va
|
||||
|
||||
// Whether access counter migrations are enabled or not. The policy is as
|
||||
// follows:
|
||||
// - MIMC migrations are enabled by default on P9 systems with ATS support
|
||||
// - MIMC migrations are disabled by default on all systems except P9.
|
||||
// - MOMC migrations are disabled by default on all systems
|
||||
// - Users can override this policy by specifying on/off
|
||||
static bool is_migration_enabled(uvm_access_counter_type_t type)
|
||||
@ -159,7 +149,10 @@ static bool is_migration_enabled(uvm_access_counter_type_t type)
|
||||
if (type == UVM_ACCESS_COUNTER_TYPE_MOMC)
|
||||
return false;
|
||||
|
||||
return g_uvm_global.ats.supported;
|
||||
if (UVM_ATS_IBM_SUPPORTED())
|
||||
return g_uvm_global.ats.supported;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// Create the access counters tracking struct for the given VA space
|
||||
@ -225,30 +218,18 @@ static NV_STATUS config_granularity_to_bytes(UVM_ACCESS_COUNTER_GRANULARITY gran
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
// Clear the given access counter and add it to the per-GPU clear tracker
|
||||
static NV_STATUS access_counter_clear_targeted(uvm_gpu_t *gpu,
|
||||
const uvm_access_counter_buffer_entry_t *entry)
|
||||
// Clear the access counter notifications and add it to the per-GPU clear
|
||||
// tracker.
|
||||
static NV_STATUS access_counter_clear_notifications(uvm_gpu_t *gpu,
|
||||
uvm_access_counter_buffer_entry_t **notification_start,
|
||||
NvU32 num_notifications)
|
||||
{
|
||||
NvU32 i;
|
||||
NV_STATUS status;
|
||||
uvm_push_t push;
|
||||
uvm_access_counter_buffer_info_t *access_counters = &gpu->parent->access_counter_buffer_info;
|
||||
|
||||
if (entry->address.is_virtual) {
|
||||
status = uvm_push_begin(gpu->channel_manager,
|
||||
UVM_CHANNEL_TYPE_MEMOPS,
|
||||
&push,
|
||||
"Clear access counter with virtual address: 0x%llx",
|
||||
entry->address.address);
|
||||
}
|
||||
else {
|
||||
status = uvm_push_begin(gpu->channel_manager,
|
||||
UVM_CHANNEL_TYPE_MEMOPS,
|
||||
&push,
|
||||
"Clear access counter with physical address: 0x%llx:%s",
|
||||
entry->address.address,
|
||||
uvm_aperture_string(entry->address.aperture));
|
||||
}
|
||||
|
||||
status = uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_MEMOPS, &push, "Clear access counter batch");
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("Error creating push to clear access counters: %s, GPU %s\n",
|
||||
nvstatusToString(status),
|
||||
@ -256,7 +237,8 @@ static NV_STATUS access_counter_clear_targeted(uvm_gpu_t *gpu,
|
||||
return status;
|
||||
}
|
||||
|
||||
gpu->parent->host_hal->access_counter_clear_targeted(&push, entry);
|
||||
for (i = 0; i < num_notifications; i++)
|
||||
gpu->parent->host_hal->access_counter_clear_targeted(&push, notification_start[i]);
|
||||
|
||||
uvm_push_end(&push);
|
||||
|
||||
@ -381,25 +363,6 @@ NV_STATUS uvm_gpu_init_access_counters(uvm_parent_gpu_t *parent_gpu)
|
||||
g_uvm_access_counter_threshold = uvm_perf_access_counter_threshold;
|
||||
}
|
||||
|
||||
if (strcmp(uvm_perf_access_counter_granularity, "64k") == 0) {
|
||||
g_uvm_access_counter_granularity = UVM_ACCESS_COUNTER_GRANULARITY_64K;
|
||||
}
|
||||
else if (strcmp(uvm_perf_access_counter_granularity, "2m") == 0) {
|
||||
g_uvm_access_counter_granularity = UVM_ACCESS_COUNTER_GRANULARITY_2M;
|
||||
}
|
||||
else if (strcmp(uvm_perf_access_counter_granularity, "16m") == 0) {
|
||||
g_uvm_access_counter_granularity = UVM_ACCESS_COUNTER_GRANULARITY_16M;
|
||||
}
|
||||
else if (strcmp(uvm_perf_access_counter_granularity, "16g") == 0) {
|
||||
g_uvm_access_counter_granularity = UVM_ACCESS_COUNTER_GRANULARITY_16G;
|
||||
}
|
||||
else {
|
||||
g_uvm_access_counter_granularity = UVM_ACCESS_COUNTER_GRANULARITY_2M;
|
||||
pr_info("Invalid value '%s' for uvm_perf_access_counter_granularity, using '%s' instead",
|
||||
uvm_perf_access_counter_granularity,
|
||||
UVM_PERF_ACCESS_COUNTER_GRANULARITY_DEFAULT);
|
||||
}
|
||||
|
||||
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
|
||||
UVM_ASSERT(parent_gpu->access_counter_buffer_hal != NULL);
|
||||
|
||||
@ -409,7 +372,7 @@ NV_STATUS uvm_gpu_init_access_counters(uvm_parent_gpu_t *parent_gpu)
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("Failed to init notify buffer info from RM: %s, GPU %s\n",
|
||||
nvstatusToString(status),
|
||||
parent_gpu->name);
|
||||
uvm_parent_gpu_name(parent_gpu));
|
||||
|
||||
// nvUvmInterfaceInitAccessCntrInfo may leave fields in rm_info
|
||||
// populated when it returns an error. Set the buffer handle to zero as
|
||||
@ -422,7 +385,7 @@ NV_STATUS uvm_gpu_init_access_counters(uvm_parent_gpu_t *parent_gpu)
|
||||
UVM_ASSERT(access_counters->rm_info.bufferSize %
|
||||
parent_gpu->access_counter_buffer_hal->entry_size(parent_gpu) == 0);
|
||||
|
||||
status = config_granularity_to_bytes(g_uvm_access_counter_granularity, &granularity_bytes);
|
||||
status = config_granularity_to_bytes(UVM_PERF_ACCESS_COUNTER_GRANULARITY, &granularity_bytes);
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
if (granularity_bytes > UVM_MAX_TRANSLATION_SIZE)
|
||||
UVM_ASSERT(granularity_bytes % UVM_MAX_TRANSLATION_SIZE == 0);
|
||||
@ -443,7 +406,7 @@ NV_STATUS uvm_gpu_init_access_counters(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
if (access_counters->max_batch_size != uvm_perf_access_counter_batch_count) {
|
||||
pr_info("Invalid uvm_perf_access_counter_batch_count value on GPU %s: %u. Valid range [%u:%u] Using %u instead\n",
|
||||
parent_gpu->name,
|
||||
uvm_parent_gpu_name(parent_gpu),
|
||||
uvm_perf_access_counter_batch_count,
|
||||
UVM_PERF_ACCESS_COUNTER_BATCH_COUNT_MIN,
|
||||
access_counters->max_notifications,
|
||||
@ -633,7 +596,7 @@ NV_STATUS uvm_gpu_access_counters_enable(uvm_gpu_t *gpu, uvm_va_space_t *va_spac
|
||||
|
||||
UVM_ASSERT(gpu->parent->access_counters_supported);
|
||||
|
||||
uvm_gpu_access_counters_isr_lock(gpu->parent);
|
||||
uvm_parent_gpu_access_counters_isr_lock(gpu->parent);
|
||||
|
||||
if (uvm_processor_mask_test(&va_space->access_counters_enabled_processors, gpu->id)) {
|
||||
status = NV_ERR_INVALID_DEVICE;
|
||||
@ -641,8 +604,8 @@ NV_STATUS uvm_gpu_access_counters_enable(uvm_gpu_t *gpu, uvm_va_space_t *va_spac
|
||||
else {
|
||||
UvmGpuAccessCntrConfig default_config =
|
||||
{
|
||||
.mimcGranularity = g_uvm_access_counter_granularity,
|
||||
.momcGranularity = g_uvm_access_counter_granularity,
|
||||
.mimcGranularity = UVM_PERF_ACCESS_COUNTER_GRANULARITY,
|
||||
.momcGranularity = UVM_PERF_ACCESS_COUNTER_GRANULARITY,
|
||||
.mimcUseLimit = UVM_ACCESS_COUNTER_USE_LIMIT_FULL,
|
||||
.momcUseLimit = UVM_ACCESS_COUNTER_USE_LIMIT_FULL,
|
||||
.threshold = g_uvm_access_counter_threshold,
|
||||
@ -658,7 +621,7 @@ NV_STATUS uvm_gpu_access_counters_enable(uvm_gpu_t *gpu, uvm_va_space_t *va_spac
|
||||
|
||||
// If this is the first reference taken on access counters, dropping the
|
||||
// ISR lock will enable interrupts.
|
||||
uvm_gpu_access_counters_isr_unlock(gpu->parent);
|
||||
uvm_parent_gpu_access_counters_isr_unlock(gpu->parent);
|
||||
|
||||
return status;
|
||||
}
|
||||
@ -667,7 +630,7 @@ void uvm_gpu_access_counters_disable(uvm_gpu_t *gpu, uvm_va_space_t *va_space)
|
||||
{
|
||||
UVM_ASSERT(gpu->parent->access_counters_supported);
|
||||
|
||||
uvm_gpu_access_counters_isr_lock(gpu->parent);
|
||||
uvm_parent_gpu_access_counters_isr_lock(gpu->parent);
|
||||
|
||||
if (uvm_processor_mask_test_and_clear_atomic(&va_space->access_counters_enabled_processors, gpu->id)) {
|
||||
gpu_access_counters_disable(gpu);
|
||||
@ -678,7 +641,7 @@ void uvm_gpu_access_counters_disable(uvm_gpu_t *gpu, uvm_va_space_t *va_space)
|
||||
gpu->parent->access_counter_buffer_info.reconfiguration_owner = NULL;
|
||||
}
|
||||
|
||||
uvm_gpu_access_counters_isr_unlock(gpu->parent);
|
||||
uvm_parent_gpu_access_counters_isr_unlock(gpu->parent);
|
||||
}
|
||||
|
||||
static void write_get(uvm_parent_gpu_t *parent_gpu, NvU32 get)
|
||||
@ -733,12 +696,12 @@ void uvm_gpu_access_counter_buffer_flush(uvm_gpu_t *gpu)
|
||||
UVM_ASSERT(gpu->parent->access_counters_supported);
|
||||
|
||||
// Disables access counter interrupts and notification servicing
|
||||
uvm_gpu_access_counters_isr_lock(gpu->parent);
|
||||
uvm_parent_gpu_access_counters_isr_lock(gpu->parent);
|
||||
|
||||
if (gpu->parent->isr.access_counters.handling_ref_count > 0)
|
||||
access_counter_buffer_flush_locked(gpu, UVM_GPU_BUFFER_FLUSH_MODE_UPDATE_PUT);
|
||||
|
||||
uvm_gpu_access_counters_isr_unlock(gpu->parent);
|
||||
uvm_parent_gpu_access_counters_isr_unlock(gpu->parent);
|
||||
}
|
||||
|
||||
static inline int cmp_access_counter_instance_ptr(const uvm_access_counter_buffer_entry_t *a,
|
||||
@ -767,6 +730,22 @@ static int cmp_sort_virt_notifications_by_instance_ptr(const void *_a, const voi
|
||||
return cmp_access_counter_instance_ptr(a, b);
|
||||
}
|
||||
|
||||
// Sort comparator for pointers to GVA access counter notification buffer
|
||||
// entries that sorts by va_space, and fault address.
|
||||
static int cmp_sort_virt_notifications_by_va_space_address(const void *_a, const void *_b)
|
||||
{
|
||||
const uvm_access_counter_buffer_entry_t **a = (const uvm_access_counter_buffer_entry_t **)_a;
|
||||
const uvm_access_counter_buffer_entry_t **b = (const uvm_access_counter_buffer_entry_t **)_b;
|
||||
|
||||
int result;
|
||||
|
||||
result = UVM_CMP_DEFAULT((*a)->virtual_info.va_space, (*b)->virtual_info.va_space);
|
||||
if (result != 0)
|
||||
return result;
|
||||
|
||||
return UVM_CMP_DEFAULT((*a)->address.address, (*b)->address.address);
|
||||
}
|
||||
|
||||
// Sort comparator for pointers to GPA access counter notification buffer
|
||||
// entries that sorts by physical address' aperture
|
||||
static int cmp_sort_phys_notifications_by_processor_id(const void *_a, const void *_b)
|
||||
@ -896,7 +875,7 @@ done:
|
||||
return notification_index;
|
||||
}
|
||||
|
||||
static void translate_virt_notifications_instance_ptrs(uvm_gpu_t *gpu,
|
||||
static void translate_virt_notifications_instance_ptrs(uvm_parent_gpu_t *parent_gpu,
|
||||
uvm_access_counter_service_batch_context_t *batch_context)
|
||||
{
|
||||
NvU32 i;
|
||||
@ -910,9 +889,9 @@ static void translate_virt_notifications_instance_ptrs(uvm_gpu_t *gpu,
|
||||
// If instance_ptr is different, make a new translation. If the
|
||||
// translation fails then va_space will be NULL and the entry will
|
||||
// simply be ignored in subsequent processing.
|
||||
status = uvm_gpu_access_counter_entry_to_va_space(gpu,
|
||||
current_entry,
|
||||
¤t_entry->virtual_info.va_space);
|
||||
status = uvm_parent_gpu_access_counter_entry_to_va_space(parent_gpu,
|
||||
current_entry,
|
||||
¤t_entry->virtual_info.va_space);
|
||||
if (status != NV_OK)
|
||||
UVM_ASSERT(current_entry->virtual_info.va_space == NULL);
|
||||
}
|
||||
@ -924,12 +903,11 @@ static void translate_virt_notifications_instance_ptrs(uvm_gpu_t *gpu,
|
||||
|
||||
// GVA notifications provide an instance_ptr and ve_id that can be directly
|
||||
// translated to a VA space. In order to minimize translations, we sort the
|
||||
// entries by instance_ptr.
|
||||
static void preprocess_virt_notifications(uvm_gpu_t *gpu,
|
||||
// entries by instance_ptr, va_space and notification address in that order.
|
||||
static void preprocess_virt_notifications(uvm_parent_gpu_t *parent_gpu,
|
||||
uvm_access_counter_service_batch_context_t *batch_context)
|
||||
{
|
||||
if (!batch_context->virt.is_single_instance_ptr) {
|
||||
// Sort by instance_ptr
|
||||
sort(batch_context->virt.notifications,
|
||||
batch_context->virt.num_notifications,
|
||||
sizeof(*batch_context->virt.notifications),
|
||||
@ -937,7 +915,13 @@ static void preprocess_virt_notifications(uvm_gpu_t *gpu,
|
||||
NULL);
|
||||
}
|
||||
|
||||
translate_virt_notifications_instance_ptrs(gpu, batch_context);
|
||||
translate_virt_notifications_instance_ptrs(parent_gpu, batch_context);
|
||||
|
||||
sort(batch_context->virt.notifications,
|
||||
batch_context->virt.num_notifications,
|
||||
sizeof(*batch_context->virt.notifications),
|
||||
cmp_sort_virt_notifications_by_va_space_address,
|
||||
NULL);
|
||||
}
|
||||
|
||||
// GPA notifications provide a physical address and an aperture. Sort
|
||||
@ -946,7 +930,6 @@ static void preprocess_virt_notifications(uvm_gpu_t *gpu,
|
||||
static void preprocess_phys_notifications(uvm_access_counter_service_batch_context_t *batch_context)
|
||||
{
|
||||
if (!batch_context->phys.is_single_aperture) {
|
||||
// Sort by instance_ptr
|
||||
sort(batch_context->phys.notifications,
|
||||
batch_context->phys.num_notifications,
|
||||
sizeof(*batch_context->phys.notifications),
|
||||
@ -955,6 +938,28 @@ static void preprocess_phys_notifications(uvm_access_counter_service_batch_conte
|
||||
}
|
||||
}
|
||||
|
||||
static NV_STATUS notify_tools_and_process_flags(uvm_gpu_t *gpu,
|
||||
uvm_access_counter_buffer_entry_t **notification_start,
|
||||
NvU32 num_entries,
|
||||
NvU32 flags)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
|
||||
if (uvm_enable_builtin_tests) {
|
||||
// TODO: Bug 4310744: [UVM][TOOLS] Attribute access counter tools events
|
||||
// to va_space instead of broadcasting.
|
||||
NvU32 i;
|
||||
|
||||
for (i = 0; i < num_entries; i++)
|
||||
uvm_tools_broadcast_access_counter(gpu, notification_start[i], flags & UVM_ACCESS_COUNTER_PHYS_ON_MANAGED);
|
||||
}
|
||||
|
||||
if (flags & UVM_ACCESS_COUNTER_ACTION_CLEAR)
|
||||
status = access_counter_clear_notifications(gpu, notification_start, num_entries);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS service_va_block_locked(uvm_processor_id_t processor,
|
||||
uvm_va_block_t *va_block,
|
||||
uvm_va_block_retry_t *va_block_retry,
|
||||
@ -969,6 +974,7 @@ static NV_STATUS service_va_block_locked(uvm_processor_id_t processor,
|
||||
uvm_page_index_t last_page_index;
|
||||
NvU32 page_count = 0;
|
||||
const uvm_page_mask_t *residency_mask;
|
||||
const bool hmm_migratable = true;
|
||||
|
||||
uvm_assert_mutex_locked(&va_block->lock);
|
||||
|
||||
@ -1055,6 +1061,7 @@ static NV_STATUS service_va_block_locked(uvm_processor_id_t processor,
|
||||
policy,
|
||||
&thrashing_hint,
|
||||
UVM_SERVICE_OPERATION_ACCESS_COUNTERS,
|
||||
hmm_migratable,
|
||||
&read_duplicate);
|
||||
|
||||
if (!uvm_processor_mask_test_and_set(&service_context->resident_processors, new_residency))
|
||||
@ -1163,7 +1170,7 @@ static NV_STATUS service_phys_single_va_block(uvm_gpu_t *gpu,
|
||||
const uvm_access_counter_buffer_entry_t *current_entry,
|
||||
const uvm_reverse_map_t *reverse_mappings,
|
||||
size_t num_reverse_mappings,
|
||||
unsigned *out_flags)
|
||||
NvU32 *out_flags)
|
||||
{
|
||||
size_t index;
|
||||
uvm_va_block_t *va_block = reverse_mappings[0].va_block;
|
||||
@ -1190,7 +1197,6 @@ static NV_STATUS service_phys_single_va_block(uvm_gpu_t *gpu,
|
||||
// If an mm is registered with the VA space, we have to retain it
|
||||
// in order to lock it before locking the VA space.
|
||||
mm = uvm_va_space_mm_retain_lock(va_space);
|
||||
|
||||
uvm_va_space_down_read(va_space);
|
||||
|
||||
// Re-check that the VA block is valid after taking the VA block lock.
|
||||
@ -1208,10 +1214,8 @@ static NV_STATUS service_phys_single_va_block(uvm_gpu_t *gpu,
|
||||
service_context->num_retries = 0;
|
||||
service_context->block_context->mm = mm;
|
||||
|
||||
if (uvm_va_block_is_hmm(va_block)) {
|
||||
uvm_hmm_service_context_init(service_context);
|
||||
if (uvm_va_block_is_hmm(va_block))
|
||||
uvm_hmm_migrate_begin_wait(va_block);
|
||||
}
|
||||
|
||||
uvm_mutex_lock(&va_block->lock);
|
||||
|
||||
@ -1226,9 +1230,15 @@ static NV_STATUS service_phys_single_va_block(uvm_gpu_t *gpu,
|
||||
|
||||
uvm_mutex_unlock(&va_block->lock);
|
||||
|
||||
if (uvm_va_block_is_hmm(va_block))
|
||||
if (uvm_va_block_is_hmm(va_block)) {
|
||||
uvm_hmm_migrate_finish(va_block);
|
||||
|
||||
// If the pages could not be migrated, no need to try again,
|
||||
// this is best effort only.
|
||||
if (status == NV_WARN_MORE_PROCESSING_REQUIRED || status == NV_WARN_MISMATCHED_TARGET)
|
||||
status = NV_OK;
|
||||
}
|
||||
|
||||
if (status == NV_OK)
|
||||
*out_flags |= UVM_ACCESS_COUNTER_ACTION_CLEAR;
|
||||
}
|
||||
@ -1251,7 +1261,7 @@ static NV_STATUS service_phys_va_blocks(uvm_gpu_t *gpu,
|
||||
const uvm_access_counter_buffer_entry_t *current_entry,
|
||||
const uvm_reverse_map_t *reverse_mappings,
|
||||
size_t num_reverse_mappings,
|
||||
unsigned *out_flags)
|
||||
NvU32 *out_flags)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
size_t index;
|
||||
@ -1259,7 +1269,7 @@ static NV_STATUS service_phys_va_blocks(uvm_gpu_t *gpu,
|
||||
*out_flags &= ~UVM_ACCESS_COUNTER_ACTION_CLEAR;
|
||||
|
||||
for (index = 0; index < num_reverse_mappings; ++index) {
|
||||
unsigned out_flags_local = 0;
|
||||
NvU32 out_flags_local = 0;
|
||||
status = service_phys_single_va_block(gpu,
|
||||
batch_context,
|
||||
current_entry,
|
||||
@ -1318,7 +1328,7 @@ static NV_STATUS service_phys_notification_translation(uvm_gpu_t *gpu,
|
||||
NvU64 address,
|
||||
unsigned long sub_granularity,
|
||||
size_t *num_reverse_mappings,
|
||||
unsigned *out_flags)
|
||||
NvU32 *out_flags)
|
||||
{
|
||||
NV_STATUS status;
|
||||
NvU32 region_start, region_end;
|
||||
@ -1327,7 +1337,10 @@ static NV_STATUS service_phys_notification_translation(uvm_gpu_t *gpu,
|
||||
|
||||
// Get the reverse_map translations for all the regions set in the
|
||||
// sub_granularity field of the counter.
|
||||
for_each_sub_granularity_region(region_start, region_end, sub_granularity, config->sub_granularity_regions_per_translation) {
|
||||
for_each_sub_granularity_region(region_start,
|
||||
region_end,
|
||||
sub_granularity,
|
||||
config->sub_granularity_regions_per_translation) {
|
||||
NvU64 local_address = address + region_start * config->sub_granularity_region_size;
|
||||
NvU32 local_translation_size = (region_end - region_start) * config->sub_granularity_region_size;
|
||||
uvm_reverse_map_t *local_reverse_mappings = batch_context->phys.translations + *num_reverse_mappings;
|
||||
@ -1376,7 +1389,7 @@ static NV_STATUS service_phys_notification_translation(uvm_gpu_t *gpu,
|
||||
static NV_STATUS service_phys_notification(uvm_gpu_t *gpu,
|
||||
uvm_access_counter_service_batch_context_t *batch_context,
|
||||
const uvm_access_counter_buffer_entry_t *current_entry,
|
||||
unsigned *out_flags)
|
||||
NvU32 *out_flags)
|
||||
{
|
||||
NvU64 address;
|
||||
NvU64 translation_index;
|
||||
@ -1387,7 +1400,7 @@ static NV_STATUS service_phys_notification(uvm_gpu_t *gpu,
|
||||
size_t total_reverse_mappings = 0;
|
||||
uvm_gpu_t *resident_gpu = NULL;
|
||||
NV_STATUS status = NV_OK;
|
||||
unsigned flags = 0;
|
||||
NvU32 flags = 0;
|
||||
|
||||
address = current_entry->address.address;
|
||||
UVM_ASSERT(address % config->translation_size == 0);
|
||||
@ -1397,7 +1410,7 @@ static NV_STATUS service_phys_notification(uvm_gpu_t *gpu,
|
||||
sub_granularity = 1;
|
||||
|
||||
if (UVM_ID_IS_GPU(current_entry->physical_info.resident_id)) {
|
||||
resident_gpu = uvm_gpu_get_by_processor_id(current_entry->physical_info.resident_id);
|
||||
resident_gpu = uvm_gpu_get(current_entry->physical_info.resident_id);
|
||||
UVM_ASSERT(resident_gpu != NULL);
|
||||
|
||||
if (gpu != resident_gpu && uvm_gpus_are_nvswitch_connected(gpu, resident_gpu)) {
|
||||
@ -1415,7 +1428,7 @@ static NV_STATUS service_phys_notification(uvm_gpu_t *gpu,
|
||||
|
||||
for (translation_index = 0; translation_index < config->translations_per_counter; ++translation_index) {
|
||||
size_t num_reverse_mappings;
|
||||
unsigned out_flags_local = 0;
|
||||
NvU32 out_flags_local = 0;
|
||||
status = service_phys_notification_translation(gpu,
|
||||
resident_gpu,
|
||||
batch_context,
|
||||
@ -1437,11 +1450,8 @@ static NV_STATUS service_phys_notification(uvm_gpu_t *gpu,
|
||||
sub_granularity = sub_granularity >> config->sub_granularity_regions_per_translation;
|
||||
}
|
||||
|
||||
// Currently we only report events for our tests, not for tools
|
||||
if (uvm_enable_builtin_tests) {
|
||||
*out_flags |= UVM_ACCESS_COUNTER_ACTION_NOTIFY;
|
||||
*out_flags |= ((total_reverse_mappings != 0) ? UVM_ACCESS_COUNTER_ON_MANAGED : 0);
|
||||
}
|
||||
if (uvm_enable_builtin_tests)
|
||||
*out_flags |= ((total_reverse_mappings != 0) ? UVM_ACCESS_COUNTER_PHYS_ON_MANAGED : 0);
|
||||
|
||||
if (status == NV_OK && (flags & UVM_ACCESS_COUNTER_ACTION_CLEAR))
|
||||
*out_flags |= UVM_ACCESS_COUNTER_ACTION_CLEAR;
|
||||
@ -1454,22 +1464,23 @@ static NV_STATUS service_phys_notifications(uvm_gpu_t *gpu,
|
||||
uvm_access_counter_service_batch_context_t *batch_context)
|
||||
{
|
||||
NvU32 i;
|
||||
uvm_access_counter_buffer_entry_t **notifications = batch_context->phys.notifications;
|
||||
|
||||
UVM_ASSERT(gpu->parent->access_counters_can_use_physical_addresses);
|
||||
|
||||
preprocess_phys_notifications(batch_context);
|
||||
|
||||
for (i = 0; i < batch_context->phys.num_notifications; ++i) {
|
||||
NV_STATUS status;
|
||||
uvm_access_counter_buffer_entry_t *current_entry = batch_context->phys.notifications[i];
|
||||
unsigned flags = 0;
|
||||
uvm_access_counter_buffer_entry_t *current_entry = notifications[i];
|
||||
NvU32 flags = 0;
|
||||
|
||||
if (!UVM_ID_IS_VALID(current_entry->physical_info.resident_id))
|
||||
continue;
|
||||
|
||||
status = service_phys_notification(gpu, batch_context, current_entry, &flags);
|
||||
if (flags & UVM_ACCESS_COUNTER_ACTION_NOTIFY)
|
||||
uvm_tools_broadcast_access_counter(gpu, current_entry, flags & UVM_ACCESS_COUNTER_ON_MANAGED);
|
||||
|
||||
if (status == NV_OK && (flags & UVM_ACCESS_COUNTER_ACTION_CLEAR))
|
||||
status = access_counter_clear_targeted(gpu, current_entry);
|
||||
notify_tools_and_process_flags(gpu, ¬ifications[i], 1, flags);
|
||||
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
@ -1478,187 +1489,373 @@ static NV_STATUS service_phys_notifications(uvm_gpu_t *gpu,
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static int cmp_sort_gpu_phys_addr(const void *_a, const void *_b)
|
||||
static NV_STATUS service_notification_va_block_helper(struct mm_struct *mm,
|
||||
uvm_va_block_t *va_block,
|
||||
uvm_processor_id_t processor,
|
||||
uvm_access_counter_service_batch_context_t *batch_context)
|
||||
{
|
||||
return uvm_gpu_phys_addr_cmp(*(uvm_gpu_phys_address_t*)_a,
|
||||
*(uvm_gpu_phys_address_t*)_b);
|
||||
}
|
||||
uvm_va_block_retry_t va_block_retry;
|
||||
uvm_page_mask_t *accessed_pages = &batch_context->accessed_pages;
|
||||
uvm_service_block_context_t *service_context = &batch_context->block_service_context;
|
||||
|
||||
static bool gpu_phys_same_region(uvm_gpu_phys_address_t a, uvm_gpu_phys_address_t b, NvU64 granularity)
|
||||
{
|
||||
if (a.aperture != b.aperture)
|
||||
return false;
|
||||
|
||||
UVM_ASSERT(is_power_of_2(granularity));
|
||||
|
||||
return UVM_ALIGN_DOWN(a.address, granularity) == UVM_ALIGN_DOWN(b.address, granularity);
|
||||
}
|
||||
|
||||
static bool phys_address_in_accessed_sub_region(uvm_gpu_phys_address_t address,
|
||||
NvU64 region_size,
|
||||
NvU64 sub_region_size,
|
||||
NvU32 accessed_mask)
|
||||
{
|
||||
const unsigned accessed_index = (address.address % region_size) / sub_region_size;
|
||||
|
||||
// accessed_mask is only filled for tracking granularities larger than 64K
|
||||
if (region_size == UVM_PAGE_SIZE_64K)
|
||||
return true;
|
||||
|
||||
UVM_ASSERT(accessed_index < 32);
|
||||
return ((1 << accessed_index) & accessed_mask) != 0;
|
||||
}
|
||||
|
||||
static NV_STATUS service_virt_notification(uvm_gpu_t *gpu,
|
||||
uvm_access_counter_service_batch_context_t *batch_context,
|
||||
const uvm_access_counter_buffer_entry_t *current_entry,
|
||||
unsigned *out_flags)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
NvU64 notification_size;
|
||||
NvU64 address;
|
||||
uvm_processor_id_t *resident_processors = batch_context->virt.scratch.resident_processors;
|
||||
uvm_gpu_phys_address_t *phys_addresses = batch_context->virt.scratch.phys_addresses;
|
||||
int num_addresses = 0;
|
||||
int i;
|
||||
|
||||
// Virtual address notifications are always 64K aligned
|
||||
NvU64 region_start = current_entry->address.address;
|
||||
NvU64 region_end = current_entry->address.address + UVM_PAGE_SIZE_64K;
|
||||
|
||||
|
||||
uvm_access_counter_buffer_info_t *access_counters = &gpu->parent->access_counter_buffer_info;
|
||||
uvm_access_counter_type_t counter_type = current_entry->counter_type;
|
||||
|
||||
const uvm_gpu_access_counter_type_config_t *config = get_config_for_type(access_counters, counter_type);
|
||||
|
||||
uvm_va_space_t *va_space = current_entry->virtual_info.va_space;
|
||||
|
||||
UVM_ASSERT(counter_type == UVM_ACCESS_COUNTER_TYPE_MIMC);
|
||||
|
||||
// Entries with NULL va_space are simply dropped.
|
||||
if (!va_space)
|
||||
if (uvm_page_mask_empty(accessed_pages))
|
||||
return NV_OK;
|
||||
|
||||
status = config_granularity_to_bytes(config->rm.granularity, ¬ification_size);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
uvm_assert_mutex_locked(&va_block->lock);
|
||||
|
||||
// Collect physical locations that could have been touched
|
||||
// in the reported 64K VA region. The notification mask can
|
||||
// correspond to any of them.
|
||||
uvm_va_space_down_read(va_space);
|
||||
for (address = region_start; address < region_end;) {
|
||||
uvm_va_block_t *va_block;
|
||||
service_context->operation = UVM_SERVICE_OPERATION_ACCESS_COUNTERS;
|
||||
service_context->num_retries = 0;
|
||||
|
||||
NV_STATUS local_status = uvm_va_block_find(va_space, address, &va_block);
|
||||
if (local_status == NV_ERR_INVALID_ADDRESS || local_status == NV_ERR_OBJECT_NOT_FOUND) {
|
||||
address += PAGE_SIZE;
|
||||
continue;
|
||||
}
|
||||
uvm_va_block_context_init(service_context->block_context, mm);
|
||||
|
||||
uvm_mutex_lock(&va_block->lock);
|
||||
while (address < va_block->end && address < region_end) {
|
||||
const unsigned page_index = uvm_va_block_cpu_page_index(va_block, address);
|
||||
return UVM_VA_BLOCK_RETRY_LOCKED(va_block,
|
||||
&va_block_retry,
|
||||
service_va_block_locked(processor,
|
||||
va_block,
|
||||
&va_block_retry,
|
||||
service_context,
|
||||
accessed_pages));
|
||||
}
|
||||
|
||||
// UVM va_block always maps the closest resident location to processor
|
||||
const uvm_processor_id_t res_id = uvm_va_block_page_get_closest_resident(va_block, page_index, gpu->id);
|
||||
static void expand_notification_block(uvm_gpu_va_space_t *gpu_va_space,
|
||||
uvm_va_block_t *va_block,
|
||||
uvm_page_mask_t *accessed_pages,
|
||||
const uvm_access_counter_buffer_entry_t *current_entry)
|
||||
{
|
||||
NvU64 addr;
|
||||
NvU64 granularity = 0;
|
||||
uvm_gpu_t *resident_gpu = NULL;
|
||||
uvm_processor_id_t resident_id;
|
||||
uvm_page_index_t page_index;
|
||||
uvm_gpu_t *gpu = gpu_va_space->gpu;
|
||||
const uvm_access_counter_buffer_info_t *access_counters = &gpu->parent->access_counter_buffer_info;
|
||||
const uvm_gpu_access_counter_type_config_t *config = get_config_for_type(access_counters,
|
||||
UVM_ACCESS_COUNTER_TYPE_MIMC);
|
||||
|
||||
// Add physical location if it's valid and not local vidmem
|
||||
if (UVM_ID_IS_VALID(res_id) && !uvm_id_equal(res_id, gpu->id)) {
|
||||
uvm_gpu_phys_address_t phys_address = uvm_va_block_res_phys_page_address(va_block, page_index, res_id, gpu);
|
||||
if (phys_address_in_accessed_sub_region(phys_address,
|
||||
notification_size,
|
||||
config->sub_granularity_region_size,
|
||||
current_entry->sub_granularity)) {
|
||||
resident_processors[num_addresses] = res_id;
|
||||
phys_addresses[num_addresses] = phys_address;
|
||||
++num_addresses;
|
||||
}
|
||||
else {
|
||||
UVM_DBG_PRINT_RL("Skipping phys address %llx:%s, because it couldn't have been accessed in mask %x",
|
||||
phys_address.address,
|
||||
uvm_aperture_string(phys_address.aperture),
|
||||
current_entry->sub_granularity);
|
||||
}
|
||||
}
|
||||
config_granularity_to_bytes(config->rm.granularity, &granularity);
|
||||
|
||||
address += PAGE_SIZE;
|
||||
}
|
||||
uvm_mutex_unlock(&va_block->lock);
|
||||
// Granularities other than 2MB can only be enabled by UVM tests. Do nothing
|
||||
// in that case.
|
||||
if (granularity != UVM_PAGE_SIZE_2M)
|
||||
return;
|
||||
|
||||
addr = current_entry->address.address;
|
||||
|
||||
uvm_assert_rwsem_locked(&gpu_va_space->va_space->lock);
|
||||
uvm_assert_mutex_locked(&va_block->lock);
|
||||
|
||||
page_index = uvm_va_block_cpu_page_index(va_block, addr);
|
||||
|
||||
resident_id = uvm_va_block_page_get_closest_resident(va_block, page_index, gpu->id);
|
||||
|
||||
// resident_id might be invalid or might already be the same as the GPU
|
||||
// which received the notification if the memory was already migrated before
|
||||
// acquiring the locks either during the servicing of previous notifications
|
||||
// or during faults or because of explicit migrations or if the VA range was
|
||||
// freed after receiving the notification. Return NV_OK in such cases.
|
||||
if (!UVM_ID_IS_VALID(resident_id) || uvm_id_equal(resident_id, gpu->id))
|
||||
return;
|
||||
|
||||
if (UVM_ID_IS_GPU(resident_id))
|
||||
resident_gpu = uvm_va_space_get_gpu(gpu_va_space->va_space, resident_id);
|
||||
|
||||
if (uvm_va_block_get_physical_size(va_block, resident_id, page_index) != granularity) {
|
||||
uvm_page_mask_set(accessed_pages, page_index);
|
||||
}
|
||||
uvm_va_space_up_read(va_space);
|
||||
else {
|
||||
NvU32 region_start;
|
||||
NvU32 region_end;
|
||||
unsigned long sub_granularity = current_entry->sub_granularity;
|
||||
NvU32 num_regions = config->sub_granularity_regions_per_translation;
|
||||
NvU32 num_sub_pages = config->sub_granularity_region_size / PAGE_SIZE;
|
||||
uvm_page_mask_t *resident_mask = uvm_va_block_resident_mask_get(va_block, resident_id, NUMA_NO_NODE);
|
||||
|
||||
// The addresses need to be sorted to aid coalescing.
|
||||
sort(phys_addresses,
|
||||
num_addresses,
|
||||
sizeof(*phys_addresses),
|
||||
cmp_sort_gpu_phys_addr,
|
||||
NULL);
|
||||
UVM_ASSERT(num_sub_pages >= 1);
|
||||
|
||||
for (i = 0; i < num_addresses; ++i) {
|
||||
uvm_access_counter_buffer_entry_t *fake_entry = &batch_context->virt.scratch.phys_entry;
|
||||
|
||||
// Skip the current pointer if the physical region was already handled
|
||||
if (i > 0 && gpu_phys_same_region(phys_addresses[i - 1], phys_addresses[i], notification_size)) {
|
||||
UVM_ASSERT(uvm_id_equal(resident_processors[i - 1], resident_processors[i]));
|
||||
continue;
|
||||
// region_start and region_end refer to sub_granularity indices, not
|
||||
// page_indices.
|
||||
for_each_sub_granularity_region(region_start, region_end, sub_granularity, num_regions) {
|
||||
uvm_page_mask_region_fill(accessed_pages,
|
||||
uvm_va_block_region(region_start * num_sub_pages,
|
||||
region_end * num_sub_pages));
|
||||
}
|
||||
UVM_DBG_PRINT_RL("Faking MIMC address[%i/%i]: %llx (granularity mask: %llx) in aperture %s on device %s\n",
|
||||
i,
|
||||
num_addresses,
|
||||
phys_addresses[i].address,
|
||||
notification_size - 1,
|
||||
uvm_aperture_string(phys_addresses[i].aperture),
|
||||
uvm_gpu_name(gpu));
|
||||
|
||||
// Construct a fake phys addr AC entry
|
||||
fake_entry->counter_type = current_entry->counter_type;
|
||||
fake_entry->address.address = UVM_ALIGN_DOWN(phys_addresses[i].address, notification_size);
|
||||
fake_entry->address.aperture = phys_addresses[i].aperture;
|
||||
fake_entry->address.is_virtual = false;
|
||||
fake_entry->physical_info.resident_id = resident_processors[i];
|
||||
fake_entry->counter_value = current_entry->counter_value;
|
||||
fake_entry->sub_granularity = current_entry->sub_granularity;
|
||||
// Remove pages in the va_block which are not resident on resident_id.
|
||||
// If the GPU is heavily accessing those pages, future access counter
|
||||
// migrations will migrate them to the GPU.
|
||||
uvm_page_mask_and(accessed_pages, accessed_pages, resident_mask);
|
||||
}
|
||||
}
|
||||
|
||||
status = service_phys_notification(gpu, batch_context, fake_entry, out_flags);
|
||||
if (status != NV_OK)
|
||||
static NV_STATUS service_virt_notifications_in_block(uvm_gpu_va_space_t *gpu_va_space,
|
||||
struct mm_struct *mm,
|
||||
uvm_va_block_t *va_block,
|
||||
uvm_access_counter_service_batch_context_t *batch_context,
|
||||
NvU32 index,
|
||||
NvU32 *out_index)
|
||||
{
|
||||
NvU32 i;
|
||||
NvU32 flags = 0;
|
||||
NV_STATUS status = NV_OK;
|
||||
NV_STATUS flags_status;
|
||||
uvm_gpu_t *gpu = gpu_va_space->gpu;
|
||||
uvm_va_space_t *va_space = gpu_va_space->va_space;
|
||||
uvm_page_mask_t *accessed_pages = &batch_context->accessed_pages;
|
||||
uvm_access_counter_buffer_entry_t **notifications = batch_context->virt.notifications;
|
||||
|
||||
UVM_ASSERT(va_block);
|
||||
UVM_ASSERT(index < batch_context->virt.num_notifications);
|
||||
|
||||
uvm_assert_rwsem_locked(&va_space->lock);
|
||||
|
||||
uvm_page_mask_zero(accessed_pages);
|
||||
|
||||
uvm_mutex_lock(&va_block->lock);
|
||||
|
||||
for (i = index; i < batch_context->virt.num_notifications; i++) {
|
||||
uvm_access_counter_buffer_entry_t *current_entry = notifications[i];
|
||||
NvU64 address = current_entry->address.address;
|
||||
|
||||
if ((current_entry->virtual_info.va_space == va_space) && (address <= va_block->end))
|
||||
expand_notification_block(gpu_va_space, va_block, accessed_pages, current_entry);
|
||||
else
|
||||
break;
|
||||
}
|
||||
|
||||
*out_index = i;
|
||||
|
||||
// Atleast one notification should have been processed.
|
||||
UVM_ASSERT(index < *out_index);
|
||||
|
||||
status = service_notification_va_block_helper(mm, va_block, gpu->id, batch_context);
|
||||
|
||||
uvm_mutex_unlock(&va_block->lock);
|
||||
|
||||
if (status == NV_OK)
|
||||
flags |= UVM_ACCESS_COUNTER_ACTION_CLEAR;
|
||||
|
||||
flags_status = notify_tools_and_process_flags(gpu, ¬ifications[index], *out_index - index, flags);
|
||||
|
||||
if ((status == NV_OK) && (flags_status != NV_OK))
|
||||
status = flags_status;
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS service_virt_notification_ats(uvm_gpu_va_space_t *gpu_va_space,
|
||||
struct mm_struct *mm,
|
||||
uvm_access_counter_service_batch_context_t *batch_context,
|
||||
NvU32 index,
|
||||
NvU32 *out_index)
|
||||
{
|
||||
|
||||
NvU32 i;
|
||||
NvU64 base;
|
||||
NvU64 end;
|
||||
NvU64 address;
|
||||
NvU32 flags = UVM_ACCESS_COUNTER_ACTION_CLEAR;
|
||||
NV_STATUS status = NV_OK;
|
||||
NV_STATUS flags_status;
|
||||
struct vm_area_struct *vma = NULL;
|
||||
uvm_gpu_t *gpu = gpu_va_space->gpu;
|
||||
uvm_va_space_t *va_space = gpu_va_space->va_space;
|
||||
uvm_ats_fault_context_t *ats_context = &batch_context->ats_context;
|
||||
uvm_access_counter_buffer_entry_t **notifications = batch_context->virt.notifications;
|
||||
|
||||
UVM_ASSERT(index < batch_context->virt.num_notifications);
|
||||
|
||||
uvm_assert_mmap_lock_locked(mm);
|
||||
uvm_assert_rwsem_locked(&va_space->lock);
|
||||
|
||||
address = notifications[index]->address.address;
|
||||
|
||||
vma = find_vma_intersection(mm, address, address + 1);
|
||||
if (!vma) {
|
||||
// Clear the notification entry to continue receiving access counter
|
||||
// notifications when a new VMA is allocated in this range.
|
||||
status = notify_tools_and_process_flags(gpu, ¬ifications[index], 1, flags);
|
||||
*out_index = index + 1;
|
||||
return status;
|
||||
}
|
||||
|
||||
base = UVM_VA_BLOCK_ALIGN_DOWN(address);
|
||||
end = min(base + UVM_VA_BLOCK_SIZE, (NvU64)vma->vm_end);
|
||||
|
||||
uvm_page_mask_zero(&ats_context->accessed_mask);
|
||||
|
||||
for (i = index; i < batch_context->virt.num_notifications; i++) {
|
||||
uvm_access_counter_buffer_entry_t *current_entry = notifications[i];
|
||||
address = current_entry->address.address;
|
||||
|
||||
if ((current_entry->virtual_info.va_space == va_space) && (address < end))
|
||||
uvm_page_mask_set(&ats_context->accessed_mask, (address - base) / PAGE_SIZE);
|
||||
else
|
||||
break;
|
||||
}
|
||||
|
||||
*out_index = i;
|
||||
|
||||
// Atleast one notification should have been processed.
|
||||
UVM_ASSERT(index < *out_index);
|
||||
|
||||
status = uvm_ats_service_access_counters(gpu_va_space, vma, base, ats_context);
|
||||
if (status != NV_OK)
|
||||
flags &= ~UVM_ACCESS_COUNTER_ACTION_CLEAR;
|
||||
|
||||
flags_status = notify_tools_and_process_flags(gpu, ¬ifications[index], *out_index - index, flags);
|
||||
if ((status == NV_OK) && (flags_status != NV_OK))
|
||||
status = flags_status;
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS service_virt_notifications_batch(uvm_gpu_va_space_t *gpu_va_space,
|
||||
struct mm_struct *mm,
|
||||
uvm_access_counter_service_batch_context_t *batch_context,
|
||||
NvU32 index,
|
||||
NvU32 *out_index)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_va_range_t *va_range;
|
||||
uvm_va_space_t *va_space = gpu_va_space->va_space;
|
||||
uvm_access_counter_buffer_entry_t *current_entry = batch_context->virt.notifications[index];
|
||||
NvU64 address = current_entry->address.address;
|
||||
|
||||
UVM_ASSERT(va_space);
|
||||
|
||||
if (mm)
|
||||
uvm_assert_mmap_lock_locked(mm);
|
||||
|
||||
uvm_assert_rwsem_locked(&va_space->lock);
|
||||
|
||||
// Virtual address notifications are always 64K aligned
|
||||
UVM_ASSERT(IS_ALIGNED(address, UVM_PAGE_SIZE_64K));
|
||||
|
||||
va_range = uvm_va_range_find(va_space, address);
|
||||
if (va_range) {
|
||||
// Avoid clearing the entry by default.
|
||||
NvU32 flags = 0;
|
||||
uvm_va_block_t *va_block = NULL;
|
||||
|
||||
if (va_range->type == UVM_VA_RANGE_TYPE_MANAGED) {
|
||||
size_t index = uvm_va_range_block_index(va_range, address);
|
||||
|
||||
va_block = uvm_va_range_block(va_range, index);
|
||||
|
||||
// If the va_range is a managed range, the notification belongs to a
|
||||
// recently freed va_range if va_block is NULL. If va_block is not
|
||||
// NULL, service_virt_notifications_in_block will process flags.
|
||||
// Clear the notification entry to continue receiving notifications
|
||||
// when a new va_range is allocated in that region.
|
||||
flags = UVM_ACCESS_COUNTER_ACTION_CLEAR;
|
||||
}
|
||||
|
||||
if (va_block) {
|
||||
status = service_virt_notifications_in_block(gpu_va_space, mm, va_block, batch_context, index, out_index);
|
||||
}
|
||||
else {
|
||||
status = notify_tools_and_process_flags(gpu_va_space->gpu, batch_context->virt.notifications, 1, flags);
|
||||
*out_index = index + 1;
|
||||
}
|
||||
}
|
||||
else if (uvm_ats_can_service_faults(gpu_va_space, mm)) {
|
||||
status = service_virt_notification_ats(gpu_va_space, mm, batch_context, index, out_index);
|
||||
}
|
||||
else {
|
||||
NvU32 flags;
|
||||
uvm_va_block_t *va_block = NULL;
|
||||
|
||||
status = uvm_hmm_va_block_find(va_space, address, &va_block);
|
||||
|
||||
// TODO: Bug 4309292: [UVM][HMM] Re-enable access counter HMM block
|
||||
// migrations for virtual notifications
|
||||
//
|
||||
// - If the va_block is HMM, don't clear the notification since HMM
|
||||
// migrations are currently disabled.
|
||||
//
|
||||
// - If the va_block isn't HMM, the notification belongs to a recently
|
||||
// freed va_range. Clear the notification entry to continue receiving
|
||||
// notifications when a new va_range is allocated in this region.
|
||||
flags = va_block ? 0 : UVM_ACCESS_COUNTER_ACTION_CLEAR;
|
||||
|
||||
UVM_ASSERT((status == NV_ERR_OBJECT_NOT_FOUND) ||
|
||||
(status == NV_ERR_INVALID_ADDRESS) ||
|
||||
uvm_va_block_is_hmm(va_block));
|
||||
|
||||
// Clobber status to continue processing the rest of the notifications
|
||||
// in the batch.
|
||||
status = notify_tools_and_process_flags(gpu_va_space->gpu, batch_context->virt.notifications, 1, flags);
|
||||
|
||||
*out_index = index + 1;
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS service_virt_notifications(uvm_gpu_t *gpu,
|
||||
uvm_access_counter_service_batch_context_t *batch_context)
|
||||
{
|
||||
NvU32 i;
|
||||
NvU32 i = 0;
|
||||
NV_STATUS status = NV_OK;
|
||||
preprocess_virt_notifications(gpu, batch_context);
|
||||
struct mm_struct *mm = NULL;
|
||||
uvm_va_space_t *va_space = NULL;
|
||||
uvm_va_space_t *prev_va_space = NULL;
|
||||
uvm_gpu_va_space_t *gpu_va_space = NULL;
|
||||
|
||||
for (i = 0; i < batch_context->virt.num_notifications; ++i) {
|
||||
unsigned flags = 0;
|
||||
// TODO: Bug 4299018 : Add support for virtual access counter migrations on
|
||||
// 4K page sizes.
|
||||
if (PAGE_SIZE == UVM_PAGE_SIZE_4K) {
|
||||
return notify_tools_and_process_flags(gpu,
|
||||
batch_context->virt.notifications,
|
||||
batch_context->virt.num_notifications,
|
||||
0);
|
||||
}
|
||||
|
||||
preprocess_virt_notifications(gpu->parent, batch_context);
|
||||
|
||||
while (i < batch_context->virt.num_notifications) {
|
||||
uvm_access_counter_buffer_entry_t *current_entry = batch_context->virt.notifications[i];
|
||||
va_space = current_entry->virtual_info.va_space;
|
||||
|
||||
status = service_virt_notification(gpu, batch_context, current_entry, &flags);
|
||||
if (va_space != prev_va_space) {
|
||||
|
||||
UVM_DBG_PRINT_RL("Processed virt access counter (%d/%d): %sMANAGED (status: %d) clear: %s\n",
|
||||
i + 1,
|
||||
batch_context->virt.num_notifications,
|
||||
(flags & UVM_ACCESS_COUNTER_ON_MANAGED) ? "" : "NOT ",
|
||||
status,
|
||||
(flags & UVM_ACCESS_COUNTER_ACTION_CLEAR) ? "YES" : "NO");
|
||||
// New va_space detected, drop locks of the old va_space.
|
||||
if (prev_va_space) {
|
||||
uvm_va_space_up_read(prev_va_space);
|
||||
uvm_va_space_mm_release_unlock(prev_va_space, mm);
|
||||
|
||||
if (uvm_enable_builtin_tests)
|
||||
uvm_tools_broadcast_access_counter(gpu, current_entry, flags & UVM_ACCESS_COUNTER_ON_MANAGED);
|
||||
mm = NULL;
|
||||
gpu_va_space = NULL;
|
||||
}
|
||||
|
||||
if (status == NV_OK && (flags & UVM_ACCESS_COUNTER_ACTION_CLEAR))
|
||||
status = access_counter_clear_targeted(gpu, current_entry);
|
||||
// Acquire locks for the new va_space.
|
||||
if (va_space) {
|
||||
mm = uvm_va_space_mm_retain_lock(va_space);
|
||||
uvm_va_space_down_read(va_space);
|
||||
|
||||
gpu_va_space = uvm_gpu_va_space_get_by_parent_gpu(va_space, gpu->parent);
|
||||
}
|
||||
|
||||
prev_va_space = va_space;
|
||||
}
|
||||
|
||||
if (va_space && gpu_va_space && uvm_va_space_has_access_counter_migrations(va_space)) {
|
||||
status = service_virt_notifications_batch(gpu_va_space, mm, batch_context, i, &i);
|
||||
}
|
||||
else {
|
||||
status = notify_tools_and_process_flags(gpu, &batch_context->virt.notifications[i], 1, 0);
|
||||
i++;
|
||||
}
|
||||
|
||||
if (status != NV_OK)
|
||||
break;
|
||||
}
|
||||
|
||||
if (va_space) {
|
||||
uvm_va_space_up_read(va_space);
|
||||
uvm_va_space_mm_release_unlock(va_space, mm);
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
@ -1682,13 +1879,17 @@ void uvm_gpu_service_access_counters(uvm_gpu_t *gpu)
|
||||
|
||||
++batch_context->batch_id;
|
||||
|
||||
status = service_virt_notifications(gpu, batch_context);
|
||||
if (status != NV_OK)
|
||||
break;
|
||||
if (batch_context->virt.num_notifications) {
|
||||
status = service_virt_notifications(gpu, batch_context);
|
||||
if (status != NV_OK)
|
||||
break;
|
||||
}
|
||||
|
||||
status = service_phys_notifications(gpu, batch_context);
|
||||
if (status != NV_OK)
|
||||
break;
|
||||
if (batch_context->phys.num_notifications) {
|
||||
status = service_phys_notifications(gpu, batch_context);
|
||||
if (status != NV_OK)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (status != NV_OK) {
|
||||
@ -1816,7 +2017,7 @@ NV_STATUS uvm_test_reconfigure_access_counters(UVM_TEST_RECONFIGURE_ACCESS_COUNT
|
||||
// ISR lock ensures that we own GET/PUT registers. It disables interrupts
|
||||
// and ensures that no other thread (nor the top half) will be able to
|
||||
// re-enable interrupts during reconfiguration.
|
||||
uvm_gpu_access_counters_isr_lock(gpu->parent);
|
||||
uvm_parent_gpu_access_counters_isr_lock(gpu->parent);
|
||||
|
||||
uvm_va_space_down_read_rm(va_space);
|
||||
|
||||
@ -1896,7 +2097,7 @@ exit_isr_unlock:
|
||||
if (status != NV_OK)
|
||||
uvm_va_space_up_read_rm(va_space);
|
||||
|
||||
uvm_gpu_access_counters_isr_unlock(gpu->parent);
|
||||
uvm_parent_gpu_access_counters_isr_unlock(gpu->parent);
|
||||
|
||||
exit_release_gpu:
|
||||
uvm_gpu_release(gpu);
|
||||
@ -1928,7 +2129,7 @@ NV_STATUS uvm_test_reset_access_counters(UVM_TEST_RESET_ACCESS_COUNTERS_PARAMS *
|
||||
goto exit_release_gpu;
|
||||
}
|
||||
|
||||
uvm_gpu_access_counters_isr_lock(gpu->parent);
|
||||
uvm_parent_gpu_access_counters_isr_lock(gpu->parent);
|
||||
|
||||
// Access counters not enabled. Nothing to reset
|
||||
if (gpu->parent->isr.access_counters.handling_ref_count == 0)
|
||||
@ -1941,6 +2142,7 @@ NV_STATUS uvm_test_reset_access_counters(UVM_TEST_RESET_ACCESS_COUNTERS_PARAMS *
|
||||
}
|
||||
else {
|
||||
uvm_access_counter_buffer_entry_t entry = { 0 };
|
||||
uvm_access_counter_buffer_entry_t *notification = &entry;
|
||||
|
||||
if (params->counter_type == UVM_TEST_ACCESS_COUNTER_TYPE_MIMC)
|
||||
entry.counter_type = UVM_ACCESS_COUNTER_TYPE_MIMC;
|
||||
@ -1950,14 +2152,14 @@ NV_STATUS uvm_test_reset_access_counters(UVM_TEST_RESET_ACCESS_COUNTERS_PARAMS *
|
||||
entry.bank = params->bank;
|
||||
entry.tag = params->tag;
|
||||
|
||||
status = access_counter_clear_targeted(gpu, &entry);
|
||||
status = access_counter_clear_notifications(gpu, ¬ification, 1);
|
||||
}
|
||||
|
||||
if (status == NV_OK)
|
||||
status = uvm_tracker_wait(&access_counters->clear_tracker);
|
||||
|
||||
exit_isr_unlock:
|
||||
uvm_gpu_access_counters_isr_unlock(gpu->parent);
|
||||
uvm_parent_gpu_access_counters_isr_unlock(gpu->parent);
|
||||
|
||||
exit_release_gpu:
|
||||
uvm_gpu_release(gpu);
|
||||
@ -1972,7 +2174,7 @@ void uvm_gpu_access_counters_set_ignore(uvm_gpu_t *gpu, bool do_ignore)
|
||||
if (!gpu->parent->access_counters_supported)
|
||||
return;
|
||||
|
||||
uvm_gpu_access_counters_isr_lock(gpu->parent);
|
||||
uvm_parent_gpu_access_counters_isr_lock(gpu->parent);
|
||||
|
||||
if (do_ignore) {
|
||||
if (gpu->parent->access_counter_buffer_info.notifications_ignored_count++ == 0)
|
||||
@ -1990,9 +2192,9 @@ void uvm_gpu_access_counters_set_ignore(uvm_gpu_t *gpu, bool do_ignore)
|
||||
uvm_spin_lock_irqsave(&gpu->parent->isr.interrupts_lock);
|
||||
|
||||
if (do_ignore)
|
||||
uvm_gpu_access_counters_intr_disable(gpu->parent);
|
||||
uvm_parent_gpu_access_counters_intr_disable(gpu->parent);
|
||||
else
|
||||
uvm_gpu_access_counters_intr_enable(gpu->parent);
|
||||
uvm_parent_gpu_access_counters_intr_enable(gpu->parent);
|
||||
|
||||
uvm_spin_unlock_irqrestore(&gpu->parent->isr.interrupts_lock);
|
||||
|
||||
@ -2000,7 +2202,7 @@ void uvm_gpu_access_counters_set_ignore(uvm_gpu_t *gpu, bool do_ignore)
|
||||
access_counter_buffer_flush_locked(gpu, UVM_GPU_BUFFER_FLUSH_MODE_CACHED_PUT);
|
||||
}
|
||||
|
||||
uvm_gpu_access_counters_isr_unlock(gpu->parent);
|
||||
uvm_parent_gpu_access_counters_isr_unlock(gpu->parent);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_test_set_ignore_access_counters(UVM_TEST_SET_IGNORE_ACCESS_COUNTERS_PARAMS *params, struct file *filp)
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2016-2021 NVIDIA Corporation
|
||||
Copyright (c) 2016-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -67,21 +67,21 @@ static void access_counters_isr_bottom_half_entry(void *args);
|
||||
// interrupts should be disabled. The caller is guaranteed that replayable page
|
||||
// faults are disabled upon return. Interrupts might already be disabled prior
|
||||
// to making this call. Each call is ref-counted, so this must be paired with a
|
||||
// call to uvm_gpu_replayable_faults_intr_enable().
|
||||
// call to uvm_parent_gpu_replayable_faults_intr_enable().
|
||||
//
|
||||
// parent_gpu->isr.interrupts_lock must be held to call this function.
|
||||
static void uvm_gpu_replayable_faults_intr_disable(uvm_parent_gpu_t *parent_gpu);
|
||||
static void uvm_parent_gpu_replayable_faults_intr_disable(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
// Decrements the reference count tracking whether replayable page fault
|
||||
// interrupts should be disabled. Only once the count reaches 0 are the HW
|
||||
// interrupts actually enabled, so this call does not guarantee that the
|
||||
// interrupts have been re-enabled upon return.
|
||||
//
|
||||
// uvm_gpu_replayable_faults_intr_disable() must have been called prior to
|
||||
// calling this function.
|
||||
// uvm_parent_gpu_replayable_faults_intr_disable() must have been called prior
|
||||
// to calling this function.
|
||||
//
|
||||
// parent_gpu->isr.interrupts_lock must be held to call this function.
|
||||
static void uvm_gpu_replayable_faults_intr_enable(uvm_parent_gpu_t *parent_gpu);
|
||||
static void uvm_parent_gpu_replayable_faults_intr_enable(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
static unsigned schedule_replayable_faults_handler(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
@ -108,7 +108,7 @@ static unsigned schedule_replayable_faults_handler(uvm_parent_gpu_t *parent_gpu)
|
||||
nv_kref_get(&parent_gpu->gpu_kref);
|
||||
|
||||
// Interrupts need to be disabled here to avoid an interrupt storm
|
||||
uvm_gpu_replayable_faults_intr_disable(parent_gpu);
|
||||
uvm_parent_gpu_replayable_faults_intr_disable(parent_gpu);
|
||||
|
||||
// Schedule a bottom half, but do *not* release the GPU ISR lock. The bottom
|
||||
// half releases the GPU ISR lock as part of its cleanup.
|
||||
@ -175,7 +175,7 @@ static unsigned schedule_access_counters_handler(uvm_parent_gpu_t *parent_gpu)
|
||||
nv_kref_get(&parent_gpu->gpu_kref);
|
||||
|
||||
// Interrupts need to be disabled to avoid an interrupt storm
|
||||
uvm_gpu_access_counters_intr_disable(parent_gpu);
|
||||
uvm_parent_gpu_access_counters_intr_disable(parent_gpu);
|
||||
|
||||
nv_kthread_q_schedule_q_item(&parent_gpu->isr.bottom_half_q,
|
||||
&parent_gpu->isr.access_counters.bottom_half_q_item);
|
||||
@ -288,7 +288,7 @@ static NV_STATUS init_queue_on_node(nv_kthread_q_t *queue, const char *name, int
|
||||
return errno_to_nv_status(nv_kthread_q_init(queue, name));
|
||||
}
|
||||
|
||||
NV_STATUS uvm_gpu_init_isr(uvm_parent_gpu_t *parent_gpu)
|
||||
NV_STATUS uvm_parent_gpu_init_isr(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
char kthread_name[TASK_COMM_LEN + 1];
|
||||
@ -299,7 +299,7 @@ NV_STATUS uvm_gpu_init_isr(uvm_parent_gpu_t *parent_gpu)
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("Failed to initialize GPU fault buffer: %s, GPU: %s\n",
|
||||
nvstatusToString(status),
|
||||
parent_gpu->name);
|
||||
uvm_parent_gpu_name(parent_gpu));
|
||||
return status;
|
||||
}
|
||||
|
||||
@ -320,12 +320,12 @@ NV_STATUS uvm_gpu_init_isr(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
parent_gpu->isr.replayable_faults.handling = true;
|
||||
|
||||
snprintf(kthread_name, sizeof(kthread_name), "UVM GPU%u BH", uvm_id_value(parent_gpu->id));
|
||||
snprintf(kthread_name, sizeof(kthread_name), "UVM GPU%u BH", uvm_parent_id_value(parent_gpu->id));
|
||||
status = init_queue_on_node(&parent_gpu->isr.bottom_half_q, kthread_name, parent_gpu->closest_cpu_numa_node);
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("Failed in nv_kthread_q_init for bottom_half_q: %s, GPU %s\n",
|
||||
nvstatusToString(status),
|
||||
parent_gpu->name);
|
||||
uvm_parent_gpu_name(parent_gpu));
|
||||
return status;
|
||||
}
|
||||
|
||||
@ -348,14 +348,14 @@ NV_STATUS uvm_gpu_init_isr(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
parent_gpu->isr.non_replayable_faults.handling = true;
|
||||
|
||||
snprintf(kthread_name, sizeof(kthread_name), "UVM GPU%u KC", uvm_id_value(parent_gpu->id));
|
||||
snprintf(kthread_name, sizeof(kthread_name), "UVM GPU%u KC", uvm_parent_id_value(parent_gpu->id));
|
||||
status = init_queue_on_node(&parent_gpu->isr.kill_channel_q,
|
||||
kthread_name,
|
||||
parent_gpu->closest_cpu_numa_node);
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("Failed in nv_kthread_q_init for kill_channel_q: %s, GPU %s\n",
|
||||
nvstatusToString(status),
|
||||
parent_gpu->name);
|
||||
uvm_parent_gpu_name(parent_gpu));
|
||||
return status;
|
||||
}
|
||||
}
|
||||
@ -365,7 +365,7 @@ NV_STATUS uvm_gpu_init_isr(uvm_parent_gpu_t *parent_gpu)
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("Failed to initialize GPU access counters: %s, GPU: %s\n",
|
||||
nvstatusToString(status),
|
||||
parent_gpu->name);
|
||||
uvm_parent_gpu_name(parent_gpu));
|
||||
return status;
|
||||
}
|
||||
|
||||
@ -393,13 +393,13 @@ NV_STATUS uvm_gpu_init_isr(uvm_parent_gpu_t *parent_gpu)
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
void uvm_gpu_flush_bottom_halves(uvm_parent_gpu_t *parent_gpu)
|
||||
void uvm_parent_gpu_flush_bottom_halves(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
nv_kthread_q_flush(&parent_gpu->isr.bottom_half_q);
|
||||
nv_kthread_q_flush(&parent_gpu->isr.kill_channel_q);
|
||||
}
|
||||
|
||||
void uvm_gpu_disable_isr(uvm_parent_gpu_t *parent_gpu)
|
||||
void uvm_parent_gpu_disable_isr(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
UVM_ASSERT(parent_gpu->isr.access_counters.handling_ref_count == 0);
|
||||
|
||||
@ -408,7 +408,7 @@ void uvm_gpu_disable_isr(uvm_parent_gpu_t *parent_gpu)
|
||||
// any more bottom halves.
|
||||
uvm_spin_lock_irqsave(&parent_gpu->isr.interrupts_lock);
|
||||
|
||||
uvm_gpu_replayable_faults_intr_disable(parent_gpu);
|
||||
uvm_parent_gpu_replayable_faults_intr_disable(parent_gpu);
|
||||
|
||||
parent_gpu->isr.replayable_faults.was_handling = parent_gpu->isr.replayable_faults.handling;
|
||||
parent_gpu->isr.non_replayable_faults.was_handling = parent_gpu->isr.non_replayable_faults.handling;
|
||||
@ -428,7 +428,7 @@ void uvm_gpu_disable_isr(uvm_parent_gpu_t *parent_gpu)
|
||||
nv_kthread_q_stop(&parent_gpu->isr.kill_channel_q);
|
||||
}
|
||||
|
||||
void uvm_gpu_deinit_isr(uvm_parent_gpu_t *parent_gpu)
|
||||
void uvm_parent_gpu_deinit_isr(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
uvm_va_block_context_t *block_context;
|
||||
|
||||
@ -436,22 +436,23 @@ void uvm_gpu_deinit_isr(uvm_parent_gpu_t *parent_gpu)
|
||||
if (parent_gpu->isr.replayable_faults.was_handling) {
|
||||
// No user threads could have anything left on
|
||||
// replayable_faults.disable_intr_ref_count since they must retain the
|
||||
// GPU across uvm_gpu_replayable_faults_isr_lock/
|
||||
// uvm_gpu_replayable_faults_isr_unlock. This means the
|
||||
// GPU across uvm_parent_gpu_replayable_faults_isr_lock/
|
||||
// uvm_parent_gpu_replayable_faults_isr_unlock. This means the
|
||||
// uvm_gpu_replayable_faults_disable_intr above could only have raced
|
||||
// with bottom halves.
|
||||
//
|
||||
// If we cleared replayable_faults.handling before the bottom half got
|
||||
// to its uvm_gpu_replayable_faults_isr_unlock, when it eventually
|
||||
// reached uvm_gpu_replayable_faults_isr_unlock it would have skipped
|
||||
// the disable, leaving us with extra ref counts here.
|
||||
// to its uvm_parent_gpu_replayable_faults_isr_unlock, when it
|
||||
// eventually reached uvm_parent_gpu_replayable_faults_isr_unlock it
|
||||
// would have skipped the disable, leaving us with extra ref counts
|
||||
// here.
|
||||
//
|
||||
// In any case we're guaranteed that replayable faults interrupts are
|
||||
// disabled and can't get re-enabled, so we can safely ignore the ref
|
||||
// count value and just clean things up.
|
||||
UVM_ASSERT_MSG(parent_gpu->isr.replayable_faults.disable_intr_ref_count > 0,
|
||||
"%s replayable_faults.disable_intr_ref_count: %llu\n",
|
||||
parent_gpu->name,
|
||||
uvm_parent_gpu_name(parent_gpu),
|
||||
parent_gpu->isr.replayable_faults.disable_intr_ref_count);
|
||||
|
||||
uvm_gpu_fault_buffer_deinit(parent_gpu);
|
||||
@ -480,7 +481,6 @@ void uvm_gpu_deinit_isr(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
static uvm_gpu_t *find_first_valid_gpu(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
uvm_global_gpu_id_t global_gpu_id = uvm_global_gpu_id_from_gpu_id(parent_gpu->id);
|
||||
uvm_gpu_t *gpu;
|
||||
|
||||
// When SMC is enabled, there's no longer a 1:1 relationship between the
|
||||
@ -495,10 +495,10 @@ static uvm_gpu_t *find_first_valid_gpu(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
uvm_spin_lock_irqsave(&g_uvm_global.gpu_table_lock);
|
||||
|
||||
sub_processor_index = find_first_bit(parent_gpu->valid_gpus, UVM_ID_MAX_SUB_PROCESSORS);
|
||||
sub_processor_index = find_first_bit(parent_gpu->valid_gpus, UVM_PARENT_ID_MAX_SUB_PROCESSORS);
|
||||
|
||||
if (sub_processor_index < UVM_ID_MAX_SUB_PROCESSORS) {
|
||||
gpu = uvm_gpu_get(uvm_global_id_from_value(uvm_global_id_value(global_gpu_id) + sub_processor_index));
|
||||
if (sub_processor_index < UVM_PARENT_ID_MAX_SUB_PROCESSORS) {
|
||||
gpu = parent_gpu->gpus[sub_processor_index];
|
||||
UVM_ASSERT(gpu != NULL);
|
||||
}
|
||||
else {
|
||||
@ -508,7 +508,7 @@ static uvm_gpu_t *find_first_valid_gpu(uvm_parent_gpu_t *parent_gpu)
|
||||
uvm_spin_unlock_irqrestore(&g_uvm_global.gpu_table_lock);
|
||||
}
|
||||
else {
|
||||
gpu = uvm_gpu_get(global_gpu_id);
|
||||
gpu = parent_gpu->gpus[0];
|
||||
UVM_ASSERT(gpu != NULL);
|
||||
}
|
||||
|
||||
@ -547,12 +547,12 @@ static void replayable_faults_isr_bottom_half(void *args)
|
||||
|
||||
uvm_gpu_service_replayable_faults(gpu);
|
||||
|
||||
uvm_gpu_replayable_faults_isr_unlock(parent_gpu);
|
||||
uvm_parent_gpu_replayable_faults_isr_unlock(parent_gpu);
|
||||
|
||||
put_kref:
|
||||
// It is OK to drop a reference on the parent GPU if a bottom half has
|
||||
// been retriggered within uvm_gpu_replayable_faults_isr_unlock, because the
|
||||
// rescheduling added an additional reference.
|
||||
// been retriggered within uvm_parent_gpu_replayable_faults_isr_unlock,
|
||||
// because the rescheduling added an additional reference.
|
||||
uvm_parent_gpu_kref_put(parent_gpu);
|
||||
}
|
||||
|
||||
@ -573,7 +573,7 @@ static void non_replayable_faults_isr_bottom_half(void *args)
|
||||
|
||||
UVM_ASSERT(parent_gpu->non_replayable_faults_supported);
|
||||
|
||||
uvm_gpu_non_replayable_faults_isr_lock(parent_gpu);
|
||||
uvm_parent_gpu_non_replayable_faults_isr_lock(parent_gpu);
|
||||
|
||||
// Multiple bottom halves for non-replayable faults can be running
|
||||
// concurrently, but only one can enter this section for a given GPU
|
||||
@ -586,7 +586,7 @@ static void non_replayable_faults_isr_bottom_half(void *args)
|
||||
|
||||
uvm_gpu_service_non_replayable_fault_buffer(gpu);
|
||||
|
||||
uvm_gpu_non_replayable_faults_isr_unlock(parent_gpu);
|
||||
uvm_parent_gpu_non_replayable_faults_isr_unlock(parent_gpu);
|
||||
|
||||
put_kref:
|
||||
uvm_parent_gpu_kref_put(parent_gpu);
|
||||
@ -622,7 +622,7 @@ static void access_counters_isr_bottom_half(void *args)
|
||||
|
||||
uvm_gpu_service_access_counters(gpu);
|
||||
|
||||
uvm_gpu_access_counters_isr_unlock(parent_gpu);
|
||||
uvm_parent_gpu_access_counters_isr_unlock(parent_gpu);
|
||||
|
||||
put_kref:
|
||||
uvm_parent_gpu_kref_put(parent_gpu);
|
||||
@ -651,7 +651,7 @@ static void replayable_faults_retrigger_bottom_half(uvm_parent_gpu_t *parent_gpu
|
||||
//
|
||||
// (1) UVM didn't process all the entries up to cached PUT
|
||||
//
|
||||
// (2) UVM did process all the entries up to cached PUT, but GPS-RM
|
||||
// (2) UVM did process all the entries up to cached PUT, but GSP-RM
|
||||
// added new entries such that cached PUT is out-of-date
|
||||
//
|
||||
// In both cases, re-enablement of interrupts would have caused the
|
||||
@ -663,7 +663,7 @@ static void replayable_faults_retrigger_bottom_half(uvm_parent_gpu_t *parent_gpu
|
||||
// While in the typical case the retriggering happens within a replayable
|
||||
// fault bottom half, it can also happen within a non-interrupt path such as
|
||||
// uvm_gpu_fault_buffer_flush.
|
||||
if (uvm_conf_computing_mode_enabled_parent(parent_gpu))
|
||||
if (g_uvm_global.conf_computing_enabled)
|
||||
retrigger = true;
|
||||
|
||||
if (!retrigger)
|
||||
@ -678,7 +678,7 @@ static void replayable_faults_retrigger_bottom_half(uvm_parent_gpu_t *parent_gpu
|
||||
uvm_spin_unlock_irqrestore(&parent_gpu->isr.interrupts_lock);
|
||||
}
|
||||
|
||||
void uvm_gpu_replayable_faults_isr_lock(uvm_parent_gpu_t *parent_gpu)
|
||||
void uvm_parent_gpu_replayable_faults_isr_lock(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
UVM_ASSERT(nv_kref_read(&parent_gpu->gpu_kref) > 0);
|
||||
|
||||
@ -687,7 +687,7 @@ void uvm_gpu_replayable_faults_isr_lock(uvm_parent_gpu_t *parent_gpu)
|
||||
// Bump the disable ref count. This guarantees that the bottom half or
|
||||
// another thread trying to take the replayable_faults.service_lock won't
|
||||
// inadvertently re-enable interrupts during this locking sequence.
|
||||
uvm_gpu_replayable_faults_intr_disable(parent_gpu);
|
||||
uvm_parent_gpu_replayable_faults_intr_disable(parent_gpu);
|
||||
|
||||
uvm_spin_unlock_irqrestore(&parent_gpu->isr.interrupts_lock);
|
||||
|
||||
@ -696,7 +696,7 @@ void uvm_gpu_replayable_faults_isr_lock(uvm_parent_gpu_t *parent_gpu)
|
||||
uvm_down(&parent_gpu->isr.replayable_faults.service_lock);
|
||||
}
|
||||
|
||||
void uvm_gpu_replayable_faults_isr_unlock(uvm_parent_gpu_t *parent_gpu)
|
||||
void uvm_parent_gpu_replayable_faults_isr_unlock(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
UVM_ASSERT(nv_kref_read(&parent_gpu->gpu_kref) > 0);
|
||||
|
||||
@ -733,9 +733,10 @@ void uvm_gpu_replayable_faults_isr_unlock(uvm_parent_gpu_t *parent_gpu)
|
||||
// Note that if we're in the bottom half and the GPU was removed before
|
||||
// we checked replayable_faults.handling, we won't drop our interrupt
|
||||
// disable ref count from the corresponding top-half call to
|
||||
// uvm_gpu_replayable_faults_intr_disable. That's ok because remove_gpu
|
||||
// ignores the refcount after waiting for the bottom half to finish.
|
||||
uvm_gpu_replayable_faults_intr_enable(parent_gpu);
|
||||
// uvm_parent_gpu_replayable_faults_intr_disable. That's ok because
|
||||
// remove_gpu ignores the refcount after waiting for the bottom half to
|
||||
// finish.
|
||||
uvm_parent_gpu_replayable_faults_intr_enable(parent_gpu);
|
||||
|
||||
// Rearm pulse interrupts. This guarantees that the state of the pending
|
||||
// interrupt is current and the top level rearm performed by RM is only
|
||||
@ -762,42 +763,42 @@ void uvm_gpu_replayable_faults_isr_unlock(uvm_parent_gpu_t *parent_gpu)
|
||||
replayable_faults_retrigger_bottom_half(parent_gpu);
|
||||
}
|
||||
|
||||
void uvm_gpu_non_replayable_faults_isr_lock(uvm_parent_gpu_t *parent_gpu)
|
||||
void uvm_parent_gpu_non_replayable_faults_isr_lock(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
UVM_ASSERT(nv_kref_read(&parent_gpu->gpu_kref) > 0);
|
||||
|
||||
uvm_down(&parent_gpu->isr.non_replayable_faults.service_lock);
|
||||
}
|
||||
|
||||
void uvm_gpu_non_replayable_faults_isr_unlock(uvm_parent_gpu_t *parent_gpu)
|
||||
void uvm_parent_gpu_non_replayable_faults_isr_unlock(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
UVM_ASSERT(nv_kref_read(&parent_gpu->gpu_kref) > 0);
|
||||
|
||||
uvm_up(&parent_gpu->isr.non_replayable_faults.service_lock);
|
||||
}
|
||||
|
||||
void uvm_gpu_access_counters_isr_lock(uvm_parent_gpu_t *parent_gpu)
|
||||
void uvm_parent_gpu_access_counters_isr_lock(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
// See comments in uvm_gpu_replayable_faults_isr_lock
|
||||
// See comments in uvm_parent_gpu_replayable_faults_isr_lock
|
||||
|
||||
uvm_spin_lock_irqsave(&parent_gpu->isr.interrupts_lock);
|
||||
|
||||
uvm_gpu_access_counters_intr_disable(parent_gpu);
|
||||
uvm_parent_gpu_access_counters_intr_disable(parent_gpu);
|
||||
|
||||
uvm_spin_unlock_irqrestore(&parent_gpu->isr.interrupts_lock);
|
||||
|
||||
uvm_down(&parent_gpu->isr.access_counters.service_lock);
|
||||
}
|
||||
|
||||
void uvm_gpu_access_counters_isr_unlock(uvm_parent_gpu_t *parent_gpu)
|
||||
void uvm_parent_gpu_access_counters_isr_unlock(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
UVM_ASSERT(nv_kref_read(&parent_gpu->gpu_kref) > 0);
|
||||
|
||||
// See comments in uvm_gpu_replayable_faults_isr_unlock
|
||||
// See comments in uvm_parent_gpu_replayable_faults_isr_unlock
|
||||
|
||||
uvm_spin_lock_irqsave(&parent_gpu->isr.interrupts_lock);
|
||||
|
||||
uvm_gpu_access_counters_intr_enable(parent_gpu);
|
||||
uvm_parent_gpu_access_counters_intr_enable(parent_gpu);
|
||||
|
||||
if (parent_gpu->isr.access_counters.handling_ref_count > 0) {
|
||||
parent_gpu->access_counter_buffer_hal->clear_access_counter_notifications(parent_gpu,
|
||||
@ -811,7 +812,7 @@ void uvm_gpu_access_counters_isr_unlock(uvm_parent_gpu_t *parent_gpu)
|
||||
uvm_spin_unlock_irqrestore(&parent_gpu->isr.interrupts_lock);
|
||||
}
|
||||
|
||||
static void uvm_gpu_replayable_faults_intr_disable(uvm_parent_gpu_t *parent_gpu)
|
||||
static void uvm_parent_gpu_replayable_faults_intr_disable(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
uvm_assert_spinlock_locked(&parent_gpu->isr.interrupts_lock);
|
||||
|
||||
@ -821,7 +822,7 @@ static void uvm_gpu_replayable_faults_intr_disable(uvm_parent_gpu_t *parent_gpu)
|
||||
++parent_gpu->isr.replayable_faults.disable_intr_ref_count;
|
||||
}
|
||||
|
||||
static void uvm_gpu_replayable_faults_intr_enable(uvm_parent_gpu_t *parent_gpu)
|
||||
static void uvm_parent_gpu_replayable_faults_intr_enable(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
uvm_assert_spinlock_locked(&parent_gpu->isr.interrupts_lock);
|
||||
UVM_ASSERT(parent_gpu->isr.replayable_faults.disable_intr_ref_count > 0);
|
||||
@ -831,7 +832,7 @@ static void uvm_gpu_replayable_faults_intr_enable(uvm_parent_gpu_t *parent_gpu)
|
||||
parent_gpu->fault_buffer_hal->enable_replayable_faults(parent_gpu);
|
||||
}
|
||||
|
||||
void uvm_gpu_access_counters_intr_disable(uvm_parent_gpu_t *parent_gpu)
|
||||
void uvm_parent_gpu_access_counters_intr_disable(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
uvm_assert_spinlock_locked(&parent_gpu->isr.interrupts_lock);
|
||||
|
||||
@ -848,7 +849,7 @@ void uvm_gpu_access_counters_intr_disable(uvm_parent_gpu_t *parent_gpu)
|
||||
++parent_gpu->isr.access_counters.disable_intr_ref_count;
|
||||
}
|
||||
|
||||
void uvm_gpu_access_counters_intr_enable(uvm_parent_gpu_t *parent_gpu)
|
||||
void uvm_parent_gpu_access_counters_intr_enable(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
uvm_assert_spinlock_locked(&parent_gpu->isr.interrupts_lock);
|
||||
UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.access_counters.service_lock));
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2016-2019 NVIDIA Corporation
|
||||
Copyright (c) 2016-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -131,19 +131,19 @@ typedef struct
|
||||
NV_STATUS uvm_isr_top_half_entry(const NvProcessorUuid *gpu_uuid);
|
||||
|
||||
// Initialize ISR handling state
|
||||
NV_STATUS uvm_gpu_init_isr(uvm_parent_gpu_t *parent_gpu);
|
||||
NV_STATUS uvm_parent_gpu_init_isr(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
// Flush any currently scheduled bottom halves. This is called during GPU
|
||||
// removal.
|
||||
void uvm_gpu_flush_bottom_halves(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_parent_gpu_flush_bottom_halves(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
// Prevent new bottom halves from being scheduled. This is called during parent
|
||||
// GPU removal.
|
||||
void uvm_gpu_disable_isr(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_parent_gpu_disable_isr(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
// Destroy ISR handling state and return interrupt ownership to RM. This is
|
||||
// called during parent GPU removal
|
||||
void uvm_gpu_deinit_isr(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_parent_gpu_deinit_isr(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
// Take parent_gpu->isr.replayable_faults.service_lock from a non-top/bottom
|
||||
// half thread. This will also disable replayable page fault interrupts (if
|
||||
@ -151,46 +151,46 @@ void uvm_gpu_deinit_isr(uvm_parent_gpu_t *parent_gpu);
|
||||
// would cause an interrupt storm if we didn't disable them first.
|
||||
//
|
||||
// At least one GPU under the parent must have been previously retained.
|
||||
void uvm_gpu_replayable_faults_isr_lock(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_parent_gpu_replayable_faults_isr_lock(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
// Unlock parent_gpu->isr.replayable_faults.service_lock. This call may
|
||||
// re-enable replayable page fault interrupts. Unlike
|
||||
// uvm_gpu_replayable_faults_isr_lock(), which should only called from
|
||||
// uvm_parent_gpu_replayable_faults_isr_lock(), which should only called from
|
||||
// non-top/bottom half threads, this can be called by any thread.
|
||||
void uvm_gpu_replayable_faults_isr_unlock(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_parent_gpu_replayable_faults_isr_unlock(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
// Lock/unlock routines for non-replayable faults. These do not need to prevent
|
||||
// interrupt storms since the GPU fault buffers for non-replayable faults are
|
||||
// managed by RM. Unlike uvm_gpu_replayable_faults_isr_lock, no GPUs under
|
||||
// the parent need to have been previously retained.
|
||||
void uvm_gpu_non_replayable_faults_isr_lock(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_gpu_non_replayable_faults_isr_unlock(uvm_parent_gpu_t *parent_gpu);
|
||||
// managed by RM. Unlike uvm_parent_gpu_replayable_faults_isr_lock, no GPUs
|
||||
// under the parent need to have been previously retained.
|
||||
void uvm_parent_gpu_non_replayable_faults_isr_lock(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_parent_gpu_non_replayable_faults_isr_unlock(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
// See uvm_gpu_replayable_faults_isr_lock/unlock
|
||||
void uvm_gpu_access_counters_isr_lock(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_gpu_access_counters_isr_unlock(uvm_parent_gpu_t *parent_gpu);
|
||||
// See uvm_parent_gpu_replayable_faults_isr_lock/unlock
|
||||
void uvm_parent_gpu_access_counters_isr_lock(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_parent_gpu_access_counters_isr_unlock(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
// Increments the reference count tracking whether access counter interrupts
|
||||
// should be disabled. The caller is guaranteed that access counter interrupts
|
||||
// are disabled upon return. Interrupts might already be disabled prior to
|
||||
// making this call. Each call is ref-counted, so this must be paired with a
|
||||
// call to uvm_gpu_access_counters_intr_enable().
|
||||
// call to uvm_parent_gpu_access_counters_intr_enable().
|
||||
//
|
||||
// parent_gpu->isr.interrupts_lock must be held to call this function.
|
||||
void uvm_gpu_access_counters_intr_disable(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_parent_gpu_access_counters_intr_disable(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
// Decrements the reference count tracking whether access counter interrupts
|
||||
// should be disabled. Only once the count reaches 0 are the HW interrupts
|
||||
// actually enabled, so this call does not guarantee that the interrupts have
|
||||
// been re-enabled upon return.
|
||||
//
|
||||
// uvm_gpu_access_counters_intr_disable() must have been called prior to calling
|
||||
// this function.
|
||||
// uvm_parent_gpu_access_counters_intr_disable() must have been called prior to
|
||||
// calling this function.
|
||||
//
|
||||
// NOTE: For pulse-based interrupts, the caller is responsible for re-arming
|
||||
// the interrupt.
|
||||
//
|
||||
// parent_gpu->isr.interrupts_lock must be held to call this function.
|
||||
void uvm_gpu_access_counters_intr_enable(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_parent_gpu_access_counters_intr_enable(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
#endif // __UVM_GPU_ISR_H__
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2017-2022 NVIDIA Corporation
|
||||
Copyright (c) 2017-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -196,7 +196,7 @@ static NV_STATUS fetch_non_replayable_fault_buffer_entries(uvm_parent_gpu_t *par
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("nvUvmInterfaceGetNonReplayableFaults() failed: %s, GPU %s\n",
|
||||
nvstatusToString(status),
|
||||
parent_gpu->name);
|
||||
uvm_parent_gpu_name(parent_gpu));
|
||||
|
||||
uvm_global_set_fatal_error(status);
|
||||
return status;
|
||||
@ -235,17 +235,27 @@ static NV_STATUS fetch_non_replayable_fault_buffer_entries(uvm_parent_gpu_t *par
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
// In SRIOV, the UVM (guest) driver does not have access to the privileged
|
||||
// registers used to clear the faulted bit. Instead, UVM requests host RM to do
|
||||
// the clearing on its behalf, using a SW method.
|
||||
static bool use_clear_faulted_channel_sw_method(uvm_gpu_t *gpu)
|
||||
{
|
||||
if (uvm_gpu_is_virt_mode_sriov(gpu)) {
|
||||
UVM_ASSERT(gpu->parent->has_clear_faulted_channel_sw_method);
|
||||
return true;
|
||||
}
|
||||
// If true, UVM uses a SW method to request RM to do the clearing on its
|
||||
// behalf.
|
||||
bool use_sw_method = false;
|
||||
|
||||
return false;
|
||||
// In SRIOV, the UVM (guest) driver does not have access to the privileged
|
||||
// registers used to clear the faulted bit.
|
||||
if (uvm_parent_gpu_is_virt_mode_sriov(gpu->parent))
|
||||
use_sw_method = true;
|
||||
|
||||
// In Confidential Computing access to the privileged registers is blocked,
|
||||
// in order to prevent interference between guests, or between the
|
||||
// (untrusted) host and the guests.
|
||||
if (g_uvm_global.conf_computing_enabled)
|
||||
use_sw_method = true;
|
||||
|
||||
if (use_sw_method)
|
||||
UVM_ASSERT(gpu->parent->has_clear_faulted_channel_sw_method);
|
||||
|
||||
return use_sw_method;
|
||||
}
|
||||
|
||||
static NV_STATUS clear_faulted_method_on_gpu(uvm_gpu_t *gpu,
|
||||
@ -334,7 +344,8 @@ static NV_STATUS service_managed_fault_in_block_locked(uvm_gpu_t *gpu,
|
||||
uvm_va_block_t *va_block,
|
||||
uvm_va_block_retry_t *va_block_retry,
|
||||
uvm_fault_buffer_entry_t *fault_entry,
|
||||
uvm_service_block_context_t *service_context)
|
||||
uvm_service_block_context_t *service_context,
|
||||
const bool hmm_migratable)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_page_index_t page_index;
|
||||
@ -400,6 +411,7 @@ static NV_STATUS service_managed_fault_in_block_locked(uvm_gpu_t *gpu,
|
||||
policy,
|
||||
&thrashing_hint,
|
||||
UVM_SERVICE_OPERATION_NON_REPLAYABLE_FAULTS,
|
||||
hmm_migratable,
|
||||
&read_duplicate);
|
||||
|
||||
// Initialize the minimum necessary state in the fault service context
|
||||
@ -431,7 +443,8 @@ static NV_STATUS service_managed_fault_in_block_locked(uvm_gpu_t *gpu,
|
||||
|
||||
static NV_STATUS service_managed_fault_in_block(uvm_gpu_t *gpu,
|
||||
uvm_va_block_t *va_block,
|
||||
uvm_fault_buffer_entry_t *fault_entry)
|
||||
uvm_fault_buffer_entry_t *fault_entry,
|
||||
const bool hmm_migratable)
|
||||
{
|
||||
NV_STATUS status, tracker_status;
|
||||
uvm_va_block_retry_t va_block_retry;
|
||||
@ -440,10 +453,8 @@ static NV_STATUS service_managed_fault_in_block(uvm_gpu_t *gpu,
|
||||
service_context->operation = UVM_SERVICE_OPERATION_NON_REPLAYABLE_FAULTS;
|
||||
service_context->num_retries = 0;
|
||||
|
||||
if (uvm_va_block_is_hmm(va_block)) {
|
||||
uvm_hmm_service_context_init(service_context);
|
||||
if (uvm_va_block_is_hmm(va_block))
|
||||
uvm_hmm_migrate_begin_wait(va_block);
|
||||
}
|
||||
|
||||
uvm_mutex_lock(&va_block->lock);
|
||||
|
||||
@ -452,7 +463,8 @@ static NV_STATUS service_managed_fault_in_block(uvm_gpu_t *gpu,
|
||||
va_block,
|
||||
&va_block_retry,
|
||||
fault_entry,
|
||||
service_context));
|
||||
service_context,
|
||||
hmm_migratable));
|
||||
|
||||
tracker_status = uvm_tracker_add_tracker_safe(&gpu->parent->fault_buffer_info.non_replayable.fault_service_tracker,
|
||||
&va_block->tracker);
|
||||
@ -570,7 +582,7 @@ static NV_STATUS service_non_managed_fault(uvm_gpu_va_space_t *gpu_va_space,
|
||||
|
||||
ats_context->client_type = UVM_FAULT_CLIENT_TYPE_HUB;
|
||||
|
||||
ats_invalidate->write_faults_in_batch = false;
|
||||
ats_invalidate->tlb_batch_pending = false;
|
||||
|
||||
va_range_next = uvm_va_space_iter_first(gpu_va_space->va_space, fault_entry->fault_address, ~0ULL);
|
||||
|
||||
@ -619,7 +631,7 @@ static NV_STATUS service_non_managed_fault(uvm_gpu_va_space_t *gpu_va_space,
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS service_fault(uvm_gpu_t *gpu, uvm_fault_buffer_entry_t *fault_entry)
|
||||
static NV_STATUS service_fault_once(uvm_gpu_t *gpu, uvm_fault_buffer_entry_t *fault_entry, const bool hmm_migratable)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_user_channel_t *user_channel;
|
||||
@ -631,7 +643,7 @@ static NV_STATUS service_fault(uvm_gpu_t *gpu, uvm_fault_buffer_entry_t *fault_e
|
||||
uvm_va_block_context_t *va_block_context =
|
||||
gpu->parent->fault_buffer_info.non_replayable.block_service_context.block_context;
|
||||
|
||||
status = uvm_gpu_fault_entry_to_va_space(gpu, fault_entry, &va_space);
|
||||
status = uvm_parent_gpu_fault_entry_to_va_space(gpu->parent, fault_entry, &va_space);
|
||||
if (status != NV_OK) {
|
||||
// The VA space lookup will fail if we're running concurrently with
|
||||
// removal of the channel from the VA space (channel unregister, GPU VA
|
||||
@ -691,7 +703,7 @@ static NV_STATUS service_fault(uvm_gpu_t *gpu, uvm_fault_buffer_entry_t *fault_e
|
||||
&va_block);
|
||||
}
|
||||
if (status == NV_OK)
|
||||
status = service_managed_fault_in_block(gpu_va_space->gpu, va_block, fault_entry);
|
||||
status = service_managed_fault_in_block(gpu_va_space->gpu, va_block, fault_entry, hmm_migratable);
|
||||
else
|
||||
status = service_non_managed_fault(gpu_va_space, mm, fault_entry, status);
|
||||
|
||||
@ -708,21 +720,46 @@ static NV_STATUS service_fault(uvm_gpu_t *gpu, uvm_fault_buffer_entry_t *fault_e
|
||||
}
|
||||
|
||||
if (fault_entry->is_fatal)
|
||||
uvm_tools_record_gpu_fatal_fault(gpu->parent->id, fault_entry->va_space, fault_entry, fault_entry->fatal_reason);
|
||||
uvm_tools_record_gpu_fatal_fault(gpu->id, fault_entry->va_space, fault_entry, fault_entry->fatal_reason);
|
||||
|
||||
if (status != NV_OK || fault_entry->is_fatal)
|
||||
if (fault_entry->is_fatal ||
|
||||
(status != NV_OK &&
|
||||
status != NV_WARN_MORE_PROCESSING_REQUIRED &&
|
||||
status != NV_WARN_MISMATCHED_TARGET))
|
||||
schedule_kill_channel(gpu, fault_entry, user_channel);
|
||||
|
||||
exit_no_channel:
|
||||
uvm_va_space_up_read(va_space);
|
||||
uvm_va_space_mm_release_unlock(va_space, mm);
|
||||
|
||||
if (status != NV_OK)
|
||||
if (status != NV_OK &&
|
||||
status != NV_WARN_MORE_PROCESSING_REQUIRED &&
|
||||
status != NV_WARN_MISMATCHED_TARGET)
|
||||
UVM_DBG_PRINT("Error servicing non-replayable faults on GPU: %s\n", uvm_gpu_name(gpu));
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS service_fault(uvm_gpu_t *gpu, uvm_fault_buffer_entry_t *fault_entry)
|
||||
{
|
||||
uvm_service_block_context_t *service_context =
|
||||
&gpu->parent->fault_buffer_info.non_replayable.block_service_context;
|
||||
NV_STATUS status;
|
||||
bool hmm_migratable = true;
|
||||
|
||||
service_context->num_retries = 0;
|
||||
|
||||
do {
|
||||
status = service_fault_once(gpu, fault_entry, hmm_migratable);
|
||||
if (status == NV_WARN_MISMATCHED_TARGET) {
|
||||
hmm_migratable = false;
|
||||
status = NV_WARN_MORE_PROCESSING_REQUIRED;
|
||||
}
|
||||
} while (status == NV_WARN_MORE_PROCESSING_REQUIRED);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
void uvm_gpu_service_non_replayable_fault_buffer(uvm_gpu_t *gpu)
|
||||
{
|
||||
NvU32 cached_faults;
|
||||
|
@ -139,7 +139,7 @@ static NV_STATUS fault_buffer_init_replayable_faults(uvm_parent_gpu_t *parent_gp
|
||||
|
||||
if (parent_gpu->fault_buffer_info.max_batch_size != uvm_perf_fault_batch_count) {
|
||||
pr_info("Invalid uvm_perf_fault_batch_count value on GPU %s: %u. Valid range [%u:%u] Using %u instead\n",
|
||||
parent_gpu->name,
|
||||
uvm_parent_gpu_name(parent_gpu),
|
||||
uvm_perf_fault_batch_count,
|
||||
UVM_PERF_FAULT_BATCH_COUNT_MIN,
|
||||
replayable_faults->max_faults,
|
||||
@ -171,7 +171,7 @@ static NV_STATUS fault_buffer_init_replayable_faults(uvm_parent_gpu_t *parent_gp
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("Failed to take page fault ownership from RM: %s, GPU %s\n",
|
||||
nvstatusToString(status),
|
||||
parent_gpu->name);
|
||||
uvm_parent_gpu_name(parent_gpu));
|
||||
return status;
|
||||
}
|
||||
|
||||
@ -181,7 +181,7 @@ static NV_STATUS fault_buffer_init_replayable_faults(uvm_parent_gpu_t *parent_gp
|
||||
|
||||
if (replayable_faults->replay_policy != uvm_perf_fault_replay_policy) {
|
||||
pr_info("Invalid uvm_perf_fault_replay_policy value on GPU %s: %d. Using %d instead\n",
|
||||
parent_gpu->name,
|
||||
uvm_parent_gpu_name(parent_gpu),
|
||||
uvm_perf_fault_replay_policy,
|
||||
replayable_faults->replay_policy);
|
||||
}
|
||||
@ -189,7 +189,7 @@ static NV_STATUS fault_buffer_init_replayable_faults(uvm_parent_gpu_t *parent_gp
|
||||
replayable_faults->replay_update_put_ratio = min(uvm_perf_fault_replay_update_put_ratio, 100u);
|
||||
if (replayable_faults->replay_update_put_ratio != uvm_perf_fault_replay_update_put_ratio) {
|
||||
pr_info("Invalid uvm_perf_fault_replay_update_put_ratio value on GPU %s: %u. Using %u instead\n",
|
||||
parent_gpu->name,
|
||||
uvm_parent_gpu_name(parent_gpu),
|
||||
uvm_perf_fault_replay_update_put_ratio,
|
||||
replayable_faults->replay_update_put_ratio);
|
||||
}
|
||||
@ -238,7 +238,7 @@ NV_STATUS uvm_gpu_fault_buffer_init(uvm_parent_gpu_t *parent_gpu)
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("Failed to init fault buffer info from RM: %s, GPU %s\n",
|
||||
nvstatusToString(status),
|
||||
parent_gpu->name);
|
||||
uvm_parent_gpu_name(parent_gpu));
|
||||
|
||||
// nvUvmInterfaceInitFaultInfo may leave fields in rm_info populated
|
||||
// when it returns an error. Set the buffer handle to zero as it is
|
||||
@ -297,19 +297,6 @@ void uvm_gpu_fault_buffer_deinit(uvm_parent_gpu_t *parent_gpu)
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: Bug 4098289: this function can be removed, and the calls to it replaced
|
||||
// with calls to uvm_conf_computing_mode_enabled_parent, once UVM ownership is
|
||||
// dictated by Confidential Computing enablement. Currently we support a
|
||||
// non-production scenario in which Confidential Computing is enabled, but
|
||||
// UVM still owns the replayable fault buffer.
|
||||
bool uvm_parent_gpu_replayable_fault_buffer_is_uvm_owned(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
if (uvm_conf_computing_mode_enabled_parent(parent_gpu))
|
||||
return parent_gpu->fault_buffer_info.rm_info.replayable.bUvmOwnsHwFaultBuffer;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool uvm_gpu_replayable_faults_pending(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
uvm_replayable_fault_buffer_info_t *replayable_faults = &parent_gpu->fault_buffer_info.replayable;
|
||||
@ -362,7 +349,8 @@ static NV_STATUS push_cancel_on_gpu(uvm_gpu_t *gpu,
|
||||
"Cancel targeting instance_ptr {0x%llx:%s}\n",
|
||||
instance_ptr.address,
|
||||
uvm_aperture_string(instance_ptr.aperture));
|
||||
} else {
|
||||
}
|
||||
else {
|
||||
status = uvm_push_begin_acquire(gpu->channel_manager,
|
||||
UVM_CHANNEL_TYPE_MEMOPS,
|
||||
&replayable_faults->replay_tracker,
|
||||
@ -559,13 +547,9 @@ static NV_STATUS hw_fault_buffer_flush_locked(uvm_parent_gpu_t *parent_gpu)
|
||||
// fault_buffer_flush_locked is called, are now either flushed from the HW
|
||||
// buffer, or are present in the shadow buffer and are about to be discarded
|
||||
// too.
|
||||
if (!uvm_conf_computing_mode_enabled_parent(parent_gpu))
|
||||
if (!g_uvm_global.conf_computing_enabled)
|
||||
return NV_OK;
|
||||
|
||||
// nvUvmInterfaceFlushReplayableFaultBuffer relies on the caller to ensure
|
||||
// serialization for a given GPU.
|
||||
UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.replayable_faults.service_lock));
|
||||
|
||||
// Flush the HW replayable buffer owned by GSP-RM.
|
||||
status = nvUvmInterfaceFlushReplayableFaultBuffer(parent_gpu->rm_device);
|
||||
|
||||
@ -581,7 +565,7 @@ static void fault_buffer_skip_replayable_entry(uvm_parent_gpu_t *parent_gpu, NvU
|
||||
// Flushed faults are never decrypted, but the decryption IV associated with
|
||||
// replayable faults still requires manual adjustment so it is kept in sync
|
||||
// with the encryption IV on the GSP-RM's side.
|
||||
if (!uvm_parent_gpu_replayable_fault_buffer_is_uvm_owned(parent_gpu))
|
||||
if (g_uvm_global.conf_computing_enabled)
|
||||
uvm_conf_computing_fault_increment_decrypt_iv(parent_gpu, 1);
|
||||
|
||||
parent_gpu->fault_buffer_hal->entry_clear_valid(parent_gpu, index);
|
||||
@ -644,7 +628,7 @@ NV_STATUS uvm_gpu_fault_buffer_flush(uvm_gpu_t *gpu)
|
||||
UVM_ASSERT(gpu->parent->replayable_faults_supported);
|
||||
|
||||
// Disables replayable fault interrupts and fault servicing
|
||||
uvm_gpu_replayable_faults_isr_lock(gpu->parent);
|
||||
uvm_parent_gpu_replayable_faults_isr_lock(gpu->parent);
|
||||
|
||||
status = fault_buffer_flush_locked(gpu,
|
||||
UVM_GPU_BUFFER_FLUSH_MODE_WAIT_UPDATE_PUT,
|
||||
@ -653,7 +637,7 @@ NV_STATUS uvm_gpu_fault_buffer_flush(uvm_gpu_t *gpu)
|
||||
|
||||
// This will trigger the top half to start servicing faults again, if the
|
||||
// replay brought any back in
|
||||
uvm_gpu_replayable_faults_isr_unlock(gpu->parent);
|
||||
uvm_parent_gpu_replayable_faults_isr_unlock(gpu->parent);
|
||||
return status;
|
||||
}
|
||||
|
||||
@ -990,7 +974,7 @@ static NV_STATUS translate_instance_ptrs(uvm_gpu_t *gpu,
|
||||
continue;
|
||||
}
|
||||
|
||||
status = uvm_gpu_fault_entry_to_va_space(gpu, current_entry, ¤t_entry->va_space);
|
||||
status = uvm_parent_gpu_fault_entry_to_va_space(gpu->parent, current_entry, ¤t_entry->va_space);
|
||||
if (status != NV_OK) {
|
||||
if (status == NV_ERR_PAGE_TABLE_NOT_AVAIL) {
|
||||
// The channel is valid but the subcontext is not. This can only
|
||||
@ -1310,6 +1294,7 @@ static NV_STATUS service_fault_batch_block_locked(uvm_gpu_t *gpu,
|
||||
uvm_va_block_retry_t *va_block_retry,
|
||||
uvm_fault_service_batch_context_t *batch_context,
|
||||
NvU32 first_fault_index,
|
||||
const bool hmm_migratable,
|
||||
NvU32 *block_faults)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
@ -1480,6 +1465,7 @@ static NV_STATUS service_fault_batch_block_locked(uvm_gpu_t *gpu,
|
||||
policy,
|
||||
&thrashing_hint,
|
||||
UVM_SERVICE_OPERATION_REPLAYABLE_FAULTS,
|
||||
hmm_migratable,
|
||||
&read_duplicate);
|
||||
|
||||
if (!uvm_processor_mask_test_and_set(&block_context->resident_processors, new_residency))
|
||||
@ -1532,6 +1518,7 @@ static NV_STATUS service_fault_batch_block(uvm_gpu_t *gpu,
|
||||
uvm_va_block_t *va_block,
|
||||
uvm_fault_service_batch_context_t *batch_context,
|
||||
NvU32 first_fault_index,
|
||||
const bool hmm_migratable,
|
||||
NvU32 *block_faults)
|
||||
{
|
||||
NV_STATUS status;
|
||||
@ -1553,6 +1540,7 @@ static NV_STATUS service_fault_batch_block(uvm_gpu_t *gpu,
|
||||
&va_block_retry,
|
||||
batch_context,
|
||||
first_fault_index,
|
||||
hmm_migratable,
|
||||
block_faults));
|
||||
|
||||
tracker_status = uvm_tracker_add_tracker_safe(&batch_context->tracker, &va_block->tracker);
|
||||
@ -1631,23 +1619,23 @@ static NV_STATUS service_fault_batch_ats_sub_vma(uvm_gpu_va_space_t *gpu_va_spac
|
||||
const uvm_page_mask_t *write_fault_mask = &ats_context->write_fault_mask;
|
||||
const uvm_page_mask_t *reads_serviced_mask = &ats_context->reads_serviced_mask;
|
||||
uvm_page_mask_t *faults_serviced_mask = &ats_context->faults_serviced_mask;
|
||||
uvm_page_mask_t *faulted_mask = &ats_context->faulted_mask;
|
||||
uvm_page_mask_t *accessed_mask = &ats_context->accessed_mask;
|
||||
|
||||
UVM_ASSERT(vma);
|
||||
|
||||
ats_context->client_type = UVM_FAULT_CLIENT_TYPE_GPC;
|
||||
|
||||
uvm_page_mask_or(faulted_mask, write_fault_mask, read_fault_mask);
|
||||
uvm_page_mask_or(accessed_mask, write_fault_mask, read_fault_mask);
|
||||
|
||||
status = uvm_ats_service_faults(gpu_va_space, vma, base, &batch_context->ats_context);
|
||||
|
||||
// Remove prefetched pages from the serviced mask since fault servicing
|
||||
// failures belonging to prefetch pages need to be ignored.
|
||||
uvm_page_mask_and(faults_serviced_mask, faults_serviced_mask, faulted_mask);
|
||||
uvm_page_mask_and(faults_serviced_mask, faults_serviced_mask, accessed_mask);
|
||||
|
||||
UVM_ASSERT(uvm_page_mask_subset(faults_serviced_mask, faulted_mask));
|
||||
UVM_ASSERT(uvm_page_mask_subset(faults_serviced_mask, accessed_mask));
|
||||
|
||||
if ((status != NV_OK) || uvm_page_mask_equal(faults_serviced_mask, faulted_mask)) {
|
||||
if ((status != NV_OK) || uvm_page_mask_equal(faults_serviced_mask, accessed_mask)) {
|
||||
(*block_faults) += (fault_index_end - fault_index_start);
|
||||
return status;
|
||||
}
|
||||
@ -1679,7 +1667,8 @@ static NV_STATUS service_fault_batch_ats_sub_vma(uvm_gpu_va_space_t *gpu_va_spac
|
||||
if (access_type <= UVM_FAULT_ACCESS_TYPE_READ) {
|
||||
cancel_va_mode = UVM_FAULT_CANCEL_VA_MODE_ALL;
|
||||
}
|
||||
else if (access_type >= UVM_FAULT_ACCESS_TYPE_WRITE) {
|
||||
else {
|
||||
UVM_ASSERT(access_type >= UVM_FAULT_ACCESS_TYPE_WRITE);
|
||||
if (uvm_fault_access_type_mask_test(current_entry->access_type_mask, UVM_FAULT_ACCESS_TYPE_READ) &&
|
||||
!uvm_page_mask_test(reads_serviced_mask, page_index))
|
||||
cancel_va_mode = UVM_FAULT_CANCEL_VA_MODE_ALL;
|
||||
@ -1856,7 +1845,8 @@ static NV_STATUS service_fault_batch_dispatch(uvm_va_space_t *va_space,
|
||||
uvm_fault_service_batch_context_t *batch_context,
|
||||
NvU32 fault_index,
|
||||
NvU32 *block_faults,
|
||||
bool replay_per_va_block)
|
||||
bool replay_per_va_block,
|
||||
const bool hmm_migratable)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_va_range_t *va_range = NULL;
|
||||
@ -1887,7 +1877,7 @@ static NV_STATUS service_fault_batch_dispatch(uvm_va_space_t *va_space,
|
||||
status = NV_ERR_INVALID_ADDRESS;
|
||||
|
||||
if (status == NV_OK) {
|
||||
status = service_fault_batch_block(gpu, va_block, batch_context, fault_index, block_faults);
|
||||
status = service_fault_batch_block(gpu, va_block, batch_context, fault_index, hmm_migratable, block_faults);
|
||||
}
|
||||
else if ((status == NV_ERR_INVALID_ADDRESS) && uvm_ats_can_service_faults(gpu_va_space, mm)) {
|
||||
NvU64 outer = ~0ULL;
|
||||
@ -1991,9 +1981,6 @@ static NV_STATUS service_fault_batch_for_cancel(uvm_gpu_t *gpu, uvm_fault_servic
|
||||
// in the HW buffer. When GSP owns the HW buffer, we also have to wait for
|
||||
// GSP to copy all available faults from the HW buffer into the shadow
|
||||
// buffer.
|
||||
//
|
||||
// TODO: Bug 2533557: This flush does not actually guarantee that GSP will
|
||||
// copy over all faults.
|
||||
status = hw_fault_buffer_flush_locked(gpu->parent);
|
||||
if (status != NV_OK)
|
||||
goto done;
|
||||
@ -2064,19 +2051,19 @@ static NV_STATUS service_fault_batch_for_cancel(uvm_gpu_t *gpu, uvm_fault_servic
|
||||
else {
|
||||
uvm_ats_fault_invalidate_t *ats_invalidate = &gpu->parent->fault_buffer_info.replayable.ats_invalidate;
|
||||
NvU32 block_faults;
|
||||
const bool hmm_migratable = true;
|
||||
|
||||
ats_invalidate->write_faults_in_batch = false;
|
||||
uvm_hmm_service_context_init(service_context);
|
||||
ats_invalidate->tlb_batch_pending = false;
|
||||
|
||||
// Service all the faults that we can. We only really need to search
|
||||
// for fatal faults, but attempting to service all is the easiest
|
||||
// way to do that.
|
||||
status = service_fault_batch_dispatch(va_space, gpu_va_space, batch_context, i, &block_faults, false);
|
||||
status = service_fault_batch_dispatch(va_space, gpu_va_space, batch_context, i, &block_faults, false, hmm_migratable);
|
||||
if (status != NV_OK) {
|
||||
// TODO: Bug 3900733: clean up locking in service_fault_batch().
|
||||
// We need to drop lock and retry. That means flushing and
|
||||
// starting over.
|
||||
if (status == NV_WARN_MORE_PROCESSING_REQUIRED)
|
||||
if (status == NV_WARN_MORE_PROCESSING_REQUIRED || status == NV_WARN_MISMATCHED_TARGET)
|
||||
status = NV_OK;
|
||||
|
||||
break;
|
||||
@ -2148,11 +2135,11 @@ static NV_STATUS service_fault_batch(uvm_gpu_t *gpu,
|
||||
uvm_service_block_context_t *service_context =
|
||||
&gpu->parent->fault_buffer_info.replayable.block_service_context;
|
||||
uvm_va_block_context_t *va_block_context = service_context->block_context;
|
||||
bool hmm_migratable = true;
|
||||
|
||||
UVM_ASSERT(gpu->parent->replayable_faults_supported);
|
||||
|
||||
ats_invalidate->write_faults_in_batch = false;
|
||||
uvm_hmm_service_context_init(service_context);
|
||||
ats_invalidate->tlb_batch_pending = false;
|
||||
|
||||
for (i = 0; i < batch_context->num_coalesced_faults;) {
|
||||
NvU32 block_faults;
|
||||
@ -2215,9 +2202,12 @@ static NV_STATUS service_fault_batch(uvm_gpu_t *gpu,
|
||||
batch_context,
|
||||
i,
|
||||
&block_faults,
|
||||
replay_per_va_block);
|
||||
replay_per_va_block,
|
||||
hmm_migratable);
|
||||
// TODO: Bug 3900733: clean up locking in service_fault_batch().
|
||||
if (status == NV_WARN_MORE_PROCESSING_REQUIRED) {
|
||||
if (status == NV_WARN_MORE_PROCESSING_REQUIRED || status == NV_WARN_MISMATCHED_TARGET) {
|
||||
if (status == NV_WARN_MISMATCHED_TARGET)
|
||||
hmm_migratable = false;
|
||||
uvm_va_space_up_read(va_space);
|
||||
uvm_va_space_mm_release_unlock(va_space, mm);
|
||||
mm = NULL;
|
||||
@ -2229,6 +2219,7 @@ static NV_STATUS service_fault_batch(uvm_gpu_t *gpu,
|
||||
if (status != NV_OK)
|
||||
goto fail;
|
||||
|
||||
hmm_migratable = true;
|
||||
i += block_faults;
|
||||
|
||||
// Don't issue replays in cancel mode
|
||||
@ -2348,7 +2339,7 @@ static void record_fatal_fault_helper(uvm_gpu_t *gpu, uvm_fault_buffer_entry_t *
|
||||
UVM_ASSERT(va_space);
|
||||
uvm_va_space_down_read(va_space);
|
||||
// Record fatal fault event
|
||||
uvm_tools_record_gpu_fatal_fault(gpu->parent->id, va_space, entry, reason);
|
||||
uvm_tools_record_gpu_fatal_fault(gpu->id, va_space, entry, reason);
|
||||
uvm_va_space_up_read(va_space);
|
||||
}
|
||||
|
||||
@ -2538,10 +2529,10 @@ static void cancel_fault_batch_tlb(uvm_gpu_t *gpu,
|
||||
// The list iteration below skips the entry used as 'head'.
|
||||
// Report the 'head' entry explicitly.
|
||||
uvm_va_space_down_read(current_entry->va_space);
|
||||
uvm_tools_record_gpu_fatal_fault(gpu->parent->id, current_entry->va_space, current_entry, reason);
|
||||
uvm_tools_record_gpu_fatal_fault(gpu->id, current_entry->va_space, current_entry, reason);
|
||||
|
||||
list_for_each_entry(coalesced_entry, ¤t_entry->merged_instances_list, merged_instances_list)
|
||||
uvm_tools_record_gpu_fatal_fault(gpu->parent->id, current_entry->va_space, coalesced_entry, reason);
|
||||
uvm_tools_record_gpu_fatal_fault(gpu->id, current_entry->va_space, coalesced_entry, reason);
|
||||
uvm_va_space_up_read(current_entry->va_space);
|
||||
|
||||
// We need to cancel each instance pointer to correctly handle faults from multiple contexts.
|
||||
@ -2948,9 +2939,9 @@ NV_STATUS uvm_test_drain_replayable_faults(UVM_TEST_DRAIN_REPLAYABLE_FAULTS_PARA
|
||||
uvm_spin_loop_init(&spin);
|
||||
|
||||
do {
|
||||
uvm_gpu_replayable_faults_isr_lock(gpu->parent);
|
||||
uvm_parent_gpu_replayable_faults_isr_lock(gpu->parent);
|
||||
pending = uvm_gpu_replayable_faults_pending(gpu->parent);
|
||||
uvm_gpu_replayable_faults_isr_unlock(gpu->parent);
|
||||
uvm_parent_gpu_replayable_faults_isr_unlock(gpu->parent);
|
||||
|
||||
if (!pending)
|
||||
break;
|
||||
|
@ -74,8 +74,4 @@ void uvm_gpu_disable_prefetch_faults(uvm_parent_gpu_t *parent_gpu);
|
||||
// Service pending replayable faults on the given GPU. This function must be
|
||||
// only called from the ISR bottom half
|
||||
void uvm_gpu_service_replayable_faults(uvm_gpu_t *gpu);
|
||||
|
||||
// Returns true if UVM owns the hardware replayable fault buffer
|
||||
bool uvm_parent_gpu_replayable_fault_buffer_is_uvm_owned(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
#endif // __UVM_GPU_PAGE_FAULT_H__
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2022 NVIDIA Corporation
|
||||
Copyright (c) 2015-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -72,7 +72,7 @@ struct uvm_gpu_semaphore_pool_page_struct
|
||||
|
||||
static bool gpu_semaphore_pool_is_secure(uvm_gpu_semaphore_pool_t *pool)
|
||||
{
|
||||
return uvm_conf_computing_mode_enabled(pool->gpu) && (pool->aperture == UVM_APERTURE_VID);
|
||||
return g_uvm_global.conf_computing_enabled && (pool->aperture == UVM_APERTURE_VID);
|
||||
}
|
||||
|
||||
static bool gpu_semaphore_is_secure(uvm_gpu_semaphore_t *semaphore)
|
||||
@ -366,7 +366,7 @@ NV_STATUS uvm_gpu_semaphore_secure_pool_create(uvm_gpu_t *gpu, uvm_gpu_semaphore
|
||||
{
|
||||
NV_STATUS status;
|
||||
|
||||
UVM_ASSERT(uvm_conf_computing_mode_enabled(gpu));
|
||||
UVM_ASSERT(g_uvm_global.conf_computing_enabled);
|
||||
|
||||
status = uvm_gpu_semaphore_pool_create(gpu, pool_out);
|
||||
if (status == NV_OK)
|
||||
@ -498,7 +498,7 @@ static bool tracking_semaphore_check_gpu(uvm_gpu_tracking_semaphore_t *tracking_
|
||||
// those cases.
|
||||
//
|
||||
// But if a pointer is in the table it must match.
|
||||
table_gpu = uvm_gpu_get(gpu->global_id);
|
||||
table_gpu = uvm_gpu_get(gpu->id);
|
||||
if (table_gpu)
|
||||
UVM_ASSERT(table_gpu == gpu);
|
||||
|
||||
@ -509,16 +509,11 @@ static bool tracking_semaphore_check_gpu(uvm_gpu_tracking_semaphore_t *tracking_
|
||||
|
||||
bool tracking_semaphore_uses_mutex(uvm_gpu_tracking_semaphore_t *tracking_semaphore)
|
||||
{
|
||||
uvm_gpu_t *gpu = tracking_semaphore->semaphore.page->pool->gpu;
|
||||
|
||||
UVM_ASSERT(tracking_semaphore_check_gpu(tracking_semaphore));
|
||||
if (uvm_conf_computing_mode_enabled(gpu))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
return g_uvm_global.conf_computing_enabled;
|
||||
}
|
||||
|
||||
|
||||
NV_STATUS uvm_gpu_tracking_semaphore_alloc(uvm_gpu_semaphore_pool_t *pool, uvm_gpu_tracking_semaphore_t *tracking_sem)
|
||||
{
|
||||
NV_STATUS status;
|
||||
@ -532,7 +527,7 @@ NV_STATUS uvm_gpu_tracking_semaphore_alloc(uvm_gpu_semaphore_pool_t *pool, uvm_g
|
||||
|
||||
UVM_ASSERT(uvm_gpu_semaphore_get_payload(&tracking_sem->semaphore) == 0);
|
||||
|
||||
if (uvm_conf_computing_mode_enabled(pool->gpu))
|
||||
if (g_uvm_global.conf_computing_enabled)
|
||||
order = UVM_LOCK_ORDER_SECURE_SEMAPHORE;
|
||||
|
||||
if (tracking_semaphore_uses_mutex(tracking_sem))
|
||||
@ -579,9 +574,8 @@ static void uvm_gpu_semaphore_encrypted_payload_update(uvm_channel_t *channel, u
|
||||
void *auth_tag_cpu_addr = uvm_rm_mem_get_cpu_va(semaphore->conf_computing.auth_tag);
|
||||
NvU32 *gpu_notifier_cpu_addr = (NvU32 *)uvm_rm_mem_get_cpu_va(semaphore->conf_computing.notifier);
|
||||
NvU32 *payload_cpu_addr = (NvU32 *)uvm_rm_mem_get_cpu_va(semaphore->conf_computing.encrypted_payload);
|
||||
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
|
||||
|
||||
UVM_ASSERT(uvm_conf_computing_mode_enabled(gpu));
|
||||
UVM_ASSERT(g_uvm_global.conf_computing_enabled);
|
||||
UVM_ASSERT(uvm_channel_is_ce(channel));
|
||||
|
||||
last_observed_notifier = semaphore->conf_computing.last_observed_notifier;
|
||||
@ -695,7 +689,7 @@ static NvU64 update_completed_value_locked(uvm_gpu_tracking_semaphore_t *trackin
|
||||
// Check for unexpected large jumps of the semaphore value
|
||||
UVM_ASSERT_MSG_RELEASE(new_value - old_value <= UVM_GPU_SEMAPHORE_MAX_JUMP,
|
||||
"GPU %s unexpected semaphore (CPU VA 0x%llx) jump from 0x%llx to 0x%llx\n",
|
||||
tracking_semaphore->semaphore.page->pool->gpu->parent->name,
|
||||
uvm_gpu_name(tracking_semaphore->semaphore.page->pool->gpu),
|
||||
(NvU64)(uintptr_t)tracking_semaphore->semaphore.payload,
|
||||
old_value, new_value);
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2019 NVIDIA Corporation
|
||||
Copyright (c) 2015-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -185,7 +185,7 @@ static NV_STATUS test_alloc(uvm_va_space_t *va_space)
|
||||
|
||||
// In SR-IOV heavy, there should be a mapping in the proxy VA space
|
||||
// too.
|
||||
if (uvm_gpu_uses_proxy_channel_pool(gpu)) {
|
||||
if (uvm_parent_gpu_needs_proxy_channel_pool(gpu->parent)) {
|
||||
gpu_va = uvm_gpu_semaphore_get_gpu_proxy_va(&semaphores[i], gpu);
|
||||
TEST_CHECK_GOTO(gpu_va != 0, done);
|
||||
}
|
||||
|
@ -22,6 +22,7 @@
|
||||
*******************************************************************************/
|
||||
|
||||
#include "uvm_hal.h"
|
||||
#include "uvm_global.h"
|
||||
#include "uvm_kvmalloc.h"
|
||||
|
||||
#include "cla16f.h"
|
||||
@ -680,7 +681,9 @@ NV_STATUS uvm_hal_init_gpu(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
class_ops = ops_find_by_id(ce_table, ARRAY_SIZE(ce_table), gpu_info->ceClass);
|
||||
if (class_ops == NULL) {
|
||||
UVM_ERR_PRINT("Unsupported ce class: 0x%X, GPU %s\n", gpu_info->ceClass, parent_gpu->name);
|
||||
UVM_ERR_PRINT("Unsupported ce class: 0x%X, GPU %s\n",
|
||||
gpu_info->ceClass,
|
||||
uvm_parent_gpu_name(parent_gpu));
|
||||
return NV_ERR_INVALID_CLASS;
|
||||
}
|
||||
|
||||
@ -688,7 +691,9 @@ NV_STATUS uvm_hal_init_gpu(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
class_ops = ops_find_by_id(host_table, ARRAY_SIZE(host_table), gpu_info->hostClass);
|
||||
if (class_ops == NULL) {
|
||||
UVM_ERR_PRINT("Unsupported host class: 0x%X, GPU %s\n", gpu_info->hostClass, parent_gpu->name);
|
||||
UVM_ERR_PRINT("Unsupported host class: 0x%X, GPU %s\n",
|
||||
gpu_info->hostClass,
|
||||
uvm_parent_gpu_name(parent_gpu));
|
||||
return NV_ERR_INVALID_CLASS;
|
||||
}
|
||||
|
||||
@ -696,7 +701,9 @@ NV_STATUS uvm_hal_init_gpu(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
class_ops = ops_find_by_id(arch_table, ARRAY_SIZE(arch_table), gpu_info->gpuArch);
|
||||
if (class_ops == NULL) {
|
||||
UVM_ERR_PRINT("Unsupported GPU architecture: 0x%X, GPU %s\n", gpu_info->gpuArch, parent_gpu->name);
|
||||
UVM_ERR_PRINT("Unsupported GPU architecture: 0x%X, GPU %s\n",
|
||||
gpu_info->gpuArch,
|
||||
uvm_parent_gpu_name(parent_gpu));
|
||||
return NV_ERR_INVALID_CLASS;
|
||||
}
|
||||
|
||||
@ -704,7 +711,9 @@ NV_STATUS uvm_hal_init_gpu(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
class_ops = ops_find_by_id(fault_buffer_table, ARRAY_SIZE(fault_buffer_table), gpu_info->gpuArch);
|
||||
if (class_ops == NULL) {
|
||||
UVM_ERR_PRINT("Fault buffer HAL not found, GPU %s, arch: 0x%X\n", parent_gpu->name, gpu_info->gpuArch);
|
||||
UVM_ERR_PRINT("Fault buffer HAL not found, GPU %s, arch: 0x%X\n",
|
||||
uvm_parent_gpu_name(parent_gpu),
|
||||
gpu_info->gpuArch);
|
||||
return NV_ERR_INVALID_CLASS;
|
||||
}
|
||||
|
||||
@ -714,7 +723,9 @@ NV_STATUS uvm_hal_init_gpu(uvm_parent_gpu_t *parent_gpu)
|
||||
ARRAY_SIZE(access_counter_buffer_table),
|
||||
gpu_info->gpuArch);
|
||||
if (class_ops == NULL) {
|
||||
UVM_ERR_PRINT("Access counter HAL not found, GPU %s, arch: 0x%X\n", parent_gpu->name, gpu_info->gpuArch);
|
||||
UVM_ERR_PRINT("Access counter HAL not found, GPU %s, arch: 0x%X\n",
|
||||
uvm_parent_gpu_name(parent_gpu),
|
||||
gpu_info->gpuArch);
|
||||
return NV_ERR_INVALID_CLASS;
|
||||
}
|
||||
|
||||
@ -722,7 +733,9 @@ NV_STATUS uvm_hal_init_gpu(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
class_ops = ops_find_by_id(sec2_table, ARRAY_SIZE(sec2_table), gpu_info->gpuArch);
|
||||
if (class_ops == NULL) {
|
||||
UVM_ERR_PRINT("SEC2 HAL not found, GPU %s, arch: 0x%X\n", parent_gpu->name, gpu_info->gpuArch);
|
||||
UVM_ERR_PRINT("SEC2 HAL not found, GPU %s, arch: 0x%X\n",
|
||||
uvm_parent_gpu_name(parent_gpu),
|
||||
gpu_info->gpuArch);
|
||||
return NV_ERR_INVALID_CLASS;
|
||||
}
|
||||
|
||||
@ -736,11 +749,16 @@ static void hal_override_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
// Access counters are currently not supported in vGPU.
|
||||
//
|
||||
// TODO: Bug 200692962: Add support for access counters in vGPU
|
||||
if (parent_gpu->virt_mode != UVM_VIRT_MODE_NONE)
|
||||
if (parent_gpu->virt_mode != UVM_VIRT_MODE_NONE) {
|
||||
parent_gpu->access_counters_supported = false;
|
||||
// Access counters are not supported in CC.
|
||||
else if (uvm_conf_computing_mode_enabled_parent(parent_gpu))
|
||||
parent_gpu->access_counters_can_use_physical_addresses = false;
|
||||
}
|
||||
|
||||
// Access counters are not supported in Confidential Computing.
|
||||
else if (g_uvm_global.conf_computing_enabled) {
|
||||
parent_gpu->access_counters_supported = false;
|
||||
parent_gpu->access_counters_can_use_physical_addresses = false;
|
||||
}
|
||||
}
|
||||
|
||||
void uvm_hal_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
|
@ -292,9 +292,9 @@ typedef NvU32 (*uvm_hal_ce_plc_mode_t)(void);
|
||||
NvU32 uvm_hal_maxwell_ce_plc_mode(void);
|
||||
NvU32 uvm_hal_ampere_ce_plc_mode_c7b5(void);
|
||||
|
||||
typedef NvU32 (*uvm_hal_ce_memcopy_type_t)(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
|
||||
NvU32 uvm_hal_maxwell_ce_memcopy_copy_type(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
|
||||
NvU32 uvm_hal_hopper_ce_memcopy_copy_type(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
|
||||
typedef NvU32 (*uvm_hal_ce_memcopy_type_t)(uvm_gpu_address_t dst, uvm_gpu_address_t src);
|
||||
NvU32 uvm_hal_maxwell_ce_memcopy_copy_type(uvm_gpu_address_t dst, uvm_gpu_address_t src);
|
||||
NvU32 uvm_hal_hopper_ce_memcopy_copy_type(uvm_gpu_address_t dst, uvm_gpu_address_t src);
|
||||
|
||||
// CE method validation
|
||||
typedef bool (*uvm_hal_ce_method_is_valid)(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
|
||||
|
@ -112,7 +112,20 @@ typedef struct
|
||||
|
||||
bool uvm_hmm_is_enabled_system_wide(void)
|
||||
{
|
||||
return !uvm_disable_hmm && !g_uvm_global.ats.enabled && uvm_va_space_mm_enabled_system();
|
||||
if (uvm_disable_hmm)
|
||||
return false;
|
||||
|
||||
if (g_uvm_global.ats.enabled)
|
||||
return false;
|
||||
|
||||
// Confidential Computing and HMM impose mutually exclusive constraints. In
|
||||
// Confidential Computing the GPU can only access pages resident in vidmem,
|
||||
// but in HMM pages may be required to be resident in sysmem: file backed
|
||||
// VMAs, huge pages, etc.
|
||||
if (g_uvm_global.conf_computing_enabled)
|
||||
return false;
|
||||
|
||||
return uvm_va_space_mm_enabled_system();
|
||||
}
|
||||
|
||||
bool uvm_hmm_is_enabled(uvm_va_space_t *va_space)
|
||||
@ -132,8 +145,9 @@ static uvm_va_block_t *hmm_va_block_from_node(uvm_range_tree_node_t *node)
|
||||
// Copies the contents of the source device-private page to the
|
||||
// destination CPU page. This will invalidate mappings, so cannot be
|
||||
// called while holding any va_block locks.
|
||||
static NV_STATUS uvm_hmm_copy_devmem_page(struct page *dst_page, struct page *src_page, uvm_tracker_t *tracker)
|
||||
static void hmm_copy_devmem_page(struct page *dst_page, struct page *src_page)
|
||||
{
|
||||
uvm_tracker_t tracker = UVM_TRACKER_INIT();
|
||||
uvm_gpu_phys_address_t src_addr;
|
||||
uvm_gpu_phys_address_t dst_addr;
|
||||
uvm_gpu_chunk_t *gpu_chunk;
|
||||
@ -152,9 +166,9 @@ static NV_STATUS uvm_hmm_copy_devmem_page(struct page *dst_page, struct page *sr
|
||||
gpu = uvm_gpu_chunk_get_gpu(gpu_chunk);
|
||||
status = uvm_mmu_chunk_map(gpu_chunk);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
goto out_zero;
|
||||
|
||||
status = uvm_gpu_map_cpu_pages(gpu->parent, dst_page, PAGE_SIZE, &dma_addr);
|
||||
status = uvm_parent_gpu_map_cpu_pages(gpu->parent, dst_page, PAGE_SIZE, &dma_addr);
|
||||
if (status != NV_OK)
|
||||
goto out_unmap_gpu;
|
||||
|
||||
@ -162,7 +176,7 @@ static NV_STATUS uvm_hmm_copy_devmem_page(struct page *dst_page, struct page *sr
|
||||
src_addr = uvm_gpu_phys_address(UVM_APERTURE_VID, gpu_chunk->address);
|
||||
status = uvm_push_begin_acquire(gpu->channel_manager,
|
||||
UVM_CHANNEL_TYPE_GPU_TO_CPU,
|
||||
tracker,
|
||||
&tracker,
|
||||
&push,
|
||||
"Copy for remote process fault");
|
||||
if (status != NV_OK)
|
||||
@ -173,15 +187,23 @@ static NV_STATUS uvm_hmm_copy_devmem_page(struct page *dst_page, struct page *sr
|
||||
uvm_gpu_address_copy(gpu, src_addr),
|
||||
PAGE_SIZE);
|
||||
uvm_push_end(&push);
|
||||
status = uvm_tracker_add_push_safe(tracker, &push);
|
||||
status = uvm_tracker_add_push_safe(&tracker, &push);
|
||||
if (status == NV_OK)
|
||||
uvm_tracker_wait_deinit(&tracker);
|
||||
|
||||
out_unmap_cpu:
|
||||
uvm_gpu_unmap_cpu_pages(gpu->parent, dma_addr, PAGE_SIZE);
|
||||
uvm_parent_gpu_unmap_cpu_pages(gpu->parent, dma_addr, PAGE_SIZE);
|
||||
|
||||
out_unmap_gpu:
|
||||
uvm_mmu_chunk_unmap(gpu_chunk, NULL);
|
||||
|
||||
return status;
|
||||
out_zero:
|
||||
// We can't fail eviction because we need to free the device-private pages
|
||||
// so the GPU can be unregistered. So the best we can do is warn on any
|
||||
// failures and zero the uninitialised page. This could result in data loss
|
||||
// in the application but failures are not expected.
|
||||
if (WARN_ON(status != NV_OK))
|
||||
memzero_page(dst_page, 0, PAGE_SIZE);
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_pmm_gpu_evict_pfn(unsigned long pfn)
|
||||
@ -197,7 +219,6 @@ static NV_STATUS uvm_hmm_pmm_gpu_evict_pfn(unsigned long pfn)
|
||||
return errno_to_nv_status(ret);
|
||||
|
||||
if (src_pfn & MIGRATE_PFN_MIGRATE) {
|
||||
uvm_tracker_t tracker = UVM_TRACKER_INIT();
|
||||
|
||||
dst_page = alloc_page(GFP_HIGHUSER_MOVABLE);
|
||||
if (!dst_page) {
|
||||
@ -206,12 +227,9 @@ static NV_STATUS uvm_hmm_pmm_gpu_evict_pfn(unsigned long pfn)
|
||||
}
|
||||
|
||||
lock_page(dst_page);
|
||||
if (WARN_ON(uvm_hmm_copy_devmem_page(dst_page, migrate_pfn_to_page(src_pfn), &tracker) != NV_OK))
|
||||
memzero_page(dst_page, 0, PAGE_SIZE);
|
||||
|
||||
hmm_copy_devmem_page(dst_page, migrate_pfn_to_page(src_pfn));
|
||||
dst_pfn = migrate_pfn(page_to_pfn(dst_page));
|
||||
migrate_device_pages(&src_pfn, &dst_pfn, 1);
|
||||
uvm_tracker_wait_deinit(&tracker);
|
||||
}
|
||||
|
||||
out:
|
||||
@ -674,12 +692,6 @@ bool uvm_hmm_check_context_vma_is_valid(uvm_va_block_t *va_block,
|
||||
return true;
|
||||
}
|
||||
|
||||
void uvm_hmm_service_context_init(uvm_service_block_context_t *service_context)
|
||||
{
|
||||
// TODO: Bug 4050579: Remove this when swap cached pages can be migrated.
|
||||
service_context->block_context->hmm.swap_cached = false;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_hmm_migrate_begin(uvm_va_block_t *va_block)
|
||||
{
|
||||
if (uvm_mutex_trylock(&va_block->hmm.migrate_lock))
|
||||
@ -1072,6 +1084,7 @@ done:
|
||||
static NV_STATUS hmm_set_preferred_location_locked(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_processor_id_t preferred_location,
|
||||
int preferred_cpu_nid,
|
||||
NvU64 addr,
|
||||
NvU64 end,
|
||||
uvm_tracker_t *out_tracker)
|
||||
@ -1085,10 +1098,10 @@ static NV_STATUS hmm_set_preferred_location_locked(uvm_va_block_t *va_block,
|
||||
|
||||
// Note that we can't just call uvm_va_policy_set_range() for the whole
|
||||
// range [addr end] because we need to examine the old value of
|
||||
// policy->preferred_location before setting it. Thus we iterate over
|
||||
// the existing policy nodes.
|
||||
// policy->preferred_location and policy->preferred_nid before setting it.
|
||||
// Thus we iterate over the existing policy nodes.
|
||||
uvm_for_each_va_policy_in(old_policy, va_block, addr, end, node, region) {
|
||||
if (uvm_id_equal(old_policy->preferred_location, preferred_location))
|
||||
if (uvm_va_policy_preferred_location_equal(old_policy, preferred_location, preferred_cpu_nid))
|
||||
continue;
|
||||
|
||||
// If the old preferred location is a valid processor ID, remote
|
||||
@ -1100,7 +1113,11 @@ static NV_STATUS hmm_set_preferred_location_locked(uvm_va_block_t *va_block,
|
||||
uvm_processor_mask_test(&old_policy->accessed_by, old_policy->preferred_location))
|
||||
uvm_processor_mask_set(&set_accessed_by_processors, old_policy->preferred_location);
|
||||
|
||||
if (!uvm_va_policy_set_preferred_location(va_block, region, preferred_location, old_policy))
|
||||
if (!uvm_va_policy_set_preferred_location(va_block,
|
||||
region,
|
||||
preferred_location,
|
||||
preferred_cpu_nid,
|
||||
old_policy))
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
// Establish new remote mappings if the old preferred location had
|
||||
@ -1134,6 +1151,7 @@ static NV_STATUS hmm_set_preferred_location_locked(uvm_va_block_t *va_block,
|
||||
|
||||
NV_STATUS uvm_hmm_set_preferred_location(uvm_va_space_t *va_space,
|
||||
uvm_processor_id_t preferred_location,
|
||||
int preferred_cpu_nid,
|
||||
NvU64 base,
|
||||
NvU64 last_address,
|
||||
uvm_tracker_t *out_tracker)
|
||||
@ -1170,6 +1188,7 @@ NV_STATUS uvm_hmm_set_preferred_location(uvm_va_space_t *va_space,
|
||||
status = hmm_set_preferred_location_locked(va_block,
|
||||
va_block_context,
|
||||
preferred_location,
|
||||
preferred_cpu_nid,
|
||||
addr,
|
||||
end,
|
||||
out_tracker);
|
||||
@ -1259,6 +1278,7 @@ NV_STATUS uvm_hmm_set_accessed_by(uvm_va_space_t *va_space,
|
||||
UVM_VA_POLICY_ACCESSED_BY,
|
||||
!set_bit,
|
||||
processor_id,
|
||||
NUMA_NO_NODE,
|
||||
UVM_READ_DUPLICATION_MAX);
|
||||
|
||||
if (status == NV_OK && set_bit) {
|
||||
@ -1968,28 +1988,74 @@ static void fill_dst_pfns(uvm_va_block_t *va_block,
|
||||
}
|
||||
}
|
||||
|
||||
static NV_STATUS alloc_and_copy_to_cpu(uvm_va_block_t *va_block,
|
||||
struct vm_area_struct *vma,
|
||||
const unsigned long *src_pfns,
|
||||
unsigned long *dst_pfns,
|
||||
uvm_va_block_region_t region,
|
||||
uvm_page_mask_t *page_mask,
|
||||
uvm_page_mask_t *same_devmem_page_mask,
|
||||
uvm_processor_id_t processor_id,
|
||||
uvm_service_block_context_t *service_context)
|
||||
static NV_STATUS alloc_page_on_cpu(uvm_va_block_t *va_block,
|
||||
uvm_page_index_t page_index,
|
||||
const unsigned long *src_pfns,
|
||||
unsigned long *dst_pfns,
|
||||
uvm_page_mask_t *same_devmem_page_mask,
|
||||
uvm_va_block_context_t *block_context)
|
||||
{
|
||||
NV_STATUS status;
|
||||
struct page *src_page;
|
||||
struct page *dst_page;
|
||||
|
||||
// This is the page that will be copied to system memory.
|
||||
src_page = migrate_pfn_to_page(src_pfns[page_index]);
|
||||
|
||||
if (src_page) {
|
||||
// mremap may have caused us to lose the gpu_chunk associated with
|
||||
// this va_block/page_index so make sure we have the correct chunk.
|
||||
if (is_device_private_page(src_page))
|
||||
gpu_chunk_add(va_block, page_index, src_page);
|
||||
|
||||
if (uvm_page_mask_test(&va_block->cpu.allocated, page_index)) {
|
||||
lock_block_cpu_page(va_block, page_index, src_page, dst_pfns, same_devmem_page_mask);
|
||||
return NV_OK;
|
||||
}
|
||||
}
|
||||
|
||||
UVM_ASSERT(!uvm_processor_mask_test(&va_block->resident, UVM_ID_CPU) ||
|
||||
!uvm_va_block_cpu_is_page_resident_on(va_block, NUMA_NO_NODE, page_index));
|
||||
|
||||
status = uvm_va_block_populate_page_cpu(va_block, page_index, block_context);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
// TODO: Bug 3368756: add support for transparent huge pages
|
||||
// Support for large CPU pages means the page_index may need fixing
|
||||
dst_page = migrate_pfn_to_page(block_context->hmm.dst_pfns[page_index]);
|
||||
|
||||
// Note that we don't call get_page(dst_page) since alloc_page_vma()
|
||||
// returns with a page reference count of one and we are passing
|
||||
// ownership to Linux. Also, uvm_va_block_cpu_page_populate() recorded
|
||||
// the page as "mirrored" so that migrate_vma_finalize() and
|
||||
// hmm_va_block_cpu_page_unpopulate() don't double free the page.
|
||||
lock_page(dst_page);
|
||||
dst_pfns[page_index] = migrate_pfn(page_to_pfn(dst_page));
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
// Allocates pages on the CPU to handle migration due to a page fault
|
||||
static NV_STATUS fault_alloc_on_cpu(uvm_va_block_t *va_block,
|
||||
const unsigned long *src_pfns,
|
||||
unsigned long *dst_pfns,
|
||||
uvm_va_block_region_t region,
|
||||
uvm_page_mask_t *page_mask,
|
||||
uvm_page_mask_t *same_devmem_page_mask,
|
||||
uvm_processor_id_t fault_processor_id,
|
||||
uvm_service_block_context_t *service_context)
|
||||
{
|
||||
uvm_page_index_t page_index;
|
||||
NV_STATUS status = NV_OK;
|
||||
|
||||
for_each_va_block_page_in_region_mask(page_index, page_mask, region) {
|
||||
struct page *src_page;
|
||||
struct page *dst_page;
|
||||
gfp_t gfp;
|
||||
UVM_ASSERT(service_context);
|
||||
|
||||
for_each_va_block_page_in_region_mask(page_index, page_mask, region) {
|
||||
if (!(src_pfns[page_index] & MIGRATE_PFN_MIGRATE)) {
|
||||
// Device exclusive PTEs are not selected but we still want to
|
||||
// process the page so record it as such.
|
||||
if (service_context && !UVM_ID_IS_CPU(processor_id) &&
|
||||
if (!UVM_ID_IS_CPU(fault_processor_id) &&
|
||||
service_context->access_type[page_index] == UVM_FAULT_ACCESS_TYPE_ATOMIC_STRONG) {
|
||||
uvm_page_mask_set(same_devmem_page_mask, page_index);
|
||||
continue;
|
||||
@ -2004,74 +2070,20 @@ static NV_STATUS alloc_and_copy_to_cpu(uvm_va_block_t *va_block,
|
||||
goto clr_mask;
|
||||
}
|
||||
|
||||
// This is the page that will be copied to system memory.
|
||||
src_page = migrate_pfn_to_page(src_pfns[page_index]);
|
||||
|
||||
if (src_page) {
|
||||
// mremap may have caused us to loose the gpu_chunk associated with
|
||||
// this va_block/page_index so make sure we have the correct chunk.
|
||||
if (is_device_private_page(src_page))
|
||||
gpu_chunk_add(va_block, page_index, src_page);
|
||||
|
||||
if (uvm_page_mask_test(&va_block->cpu.allocated, page_index)) {
|
||||
lock_block_cpu_page(va_block, page_index, src_page, dst_pfns, same_devmem_page_mask);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
UVM_ASSERT(!uvm_processor_mask_test(&va_block->resident, UVM_ID_CPU) ||
|
||||
!uvm_va_block_cpu_is_page_resident_on(va_block, NUMA_NO_NODE, page_index));
|
||||
|
||||
// Allocate a user system memory page for the destination.
|
||||
// This is the typical case since Linux will free the source page when
|
||||
// migrating to device private memory.
|
||||
// If there is no source page, it means the page is pte_none() or the
|
||||
// zero page. This case "shouldn't happen" because we asked
|
||||
// migrate_vma_setup() only for device private pages but
|
||||
// migrate_vma_collect_hole() doesn't check the
|
||||
// MIGRATE_VMA_SELECT_SYSTEM flag.
|
||||
gfp = GFP_HIGHUSER_MOVABLE;
|
||||
if (!src_page)
|
||||
gfp |= __GFP_ZERO;
|
||||
|
||||
dst_page = alloc_page_vma(gfp,
|
||||
vma,
|
||||
va_block->start + (page_index << PAGE_SHIFT));
|
||||
if (!dst_page) {
|
||||
// Ignore errors if the page is only for prefetching.
|
||||
if (service_context &&
|
||||
service_context->access_type[page_index] == UVM_FAULT_ACCESS_TYPE_PREFETCH)
|
||||
goto clr_mask;
|
||||
UVM_ERR_PRINT("cannot allocate page %u (addr 0x%llx)\n",
|
||||
page_index, va_block->start + (page_index << PAGE_SHIFT));
|
||||
status = NV_ERR_NO_MEMORY;
|
||||
break;
|
||||
}
|
||||
|
||||
status = hmm_va_block_cpu_page_populate(va_block, page_index, dst_page);
|
||||
status = alloc_page_on_cpu(va_block, page_index, src_pfns, dst_pfns, same_devmem_page_mask, service_context->block_context);
|
||||
if (status != NV_OK) {
|
||||
__free_page(dst_page);
|
||||
// Ignore errors if the page is only for prefetching.
|
||||
if (service_context &&
|
||||
service_context->access_type[page_index] == UVM_FAULT_ACCESS_TYPE_PREFETCH)
|
||||
goto clr_mask;
|
||||
break;
|
||||
}
|
||||
|
||||
// Note that we don't call get_page(dst_page) since alloc_page_vma()
|
||||
// returns with a page reference count of one and we are passing
|
||||
// ownership to Linux. Also, uvm_va_block_cpu_page_populate() recorded
|
||||
// the page as "mirrored" so that migrate_vma_finalize() and
|
||||
// hmm_va_block_cpu_page_unpopulate() don't double free the page.
|
||||
lock_page(dst_page);
|
||||
dst_pfns[page_index] = migrate_pfn(page_to_pfn(dst_page));
|
||||
continue;
|
||||
|
||||
clr_mask:
|
||||
// TODO: Bug 3900774: clean up murky mess of mask clearing.
|
||||
uvm_page_mask_clear(page_mask, page_index);
|
||||
if (service_context)
|
||||
clear_service_context_masks(service_context, UVM_ID_CPU, page_index);
|
||||
clear_service_context_masks(service_context, UVM_ID_CPU, page_index);
|
||||
}
|
||||
|
||||
if (status != NV_OK)
|
||||
@ -2082,6 +2094,40 @@ static NV_STATUS alloc_and_copy_to_cpu(uvm_va_block_t *va_block,
|
||||
return status;
|
||||
}
|
||||
|
||||
// Allocates pages on the CPU for explicit migration calls.
|
||||
static NV_STATUS migrate_alloc_on_cpu(uvm_va_block_t *va_block,
|
||||
const unsigned long *src_pfns,
|
||||
unsigned long *dst_pfns,
|
||||
uvm_va_block_region_t region,
|
||||
uvm_page_mask_t *page_mask,
|
||||
uvm_page_mask_t *same_devmem_page_mask,
|
||||
uvm_va_block_context_t *block_context)
|
||||
{
|
||||
uvm_page_index_t page_index;
|
||||
NV_STATUS status = NV_OK;
|
||||
|
||||
for_each_va_block_page_in_region_mask(page_index, page_mask, region) {
|
||||
if (!(src_pfns[page_index] & MIGRATE_PFN_MIGRATE)) {
|
||||
// We have previously found a page that is CPU resident which can't
|
||||
// be migrated (probably a shared mapping) so make sure we establish
|
||||
// a remote mapping for it.
|
||||
if (uvm_page_mask_test(same_devmem_page_mask, page_index))
|
||||
continue;
|
||||
|
||||
uvm_page_mask_clear(page_mask, page_index);
|
||||
continue;
|
||||
}
|
||||
|
||||
status = alloc_page_on_cpu(va_block, page_index, src_pfns, dst_pfns, same_devmem_page_mask, block_context);
|
||||
}
|
||||
|
||||
if (status != NV_OK)
|
||||
clean_up_non_migrating_pages(va_block, src_pfns, dst_pfns, region, page_mask);
|
||||
else if (uvm_page_mask_empty(page_mask))
|
||||
return NV_WARN_MORE_PROCESSING_REQUIRED;
|
||||
|
||||
return status;
|
||||
}
|
||||
static NV_STATUS uvm_hmm_devmem_fault_alloc_and_copy(uvm_hmm_devmem_fault_context_t *devmem_fault_context)
|
||||
{
|
||||
uvm_processor_id_t processor_id;
|
||||
@ -2107,15 +2153,14 @@ static NV_STATUS uvm_hmm_devmem_fault_alloc_and_copy(uvm_hmm_devmem_fault_contex
|
||||
page_mask = &devmem_fault_context->page_mask;
|
||||
uvm_page_mask_copy(page_mask, &service_context->per_processor_masks[UVM_ID_CPU_VALUE].new_residency);
|
||||
|
||||
status = alloc_and_copy_to_cpu(va_block,
|
||||
service_context->block_context->hmm.vma,
|
||||
src_pfns,
|
||||
dst_pfns,
|
||||
service_context->region,
|
||||
page_mask,
|
||||
same_devmem_page_mask,
|
||||
processor_id,
|
||||
service_context);
|
||||
status = fault_alloc_on_cpu(va_block,
|
||||
src_pfns,
|
||||
dst_pfns,
|
||||
service_context->region,
|
||||
page_mask,
|
||||
same_devmem_page_mask,
|
||||
processor_id,
|
||||
service_context);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
@ -2640,12 +2685,8 @@ static NV_STATUS dmamap_src_sysmem_pages(uvm_va_block_t *va_block,
|
||||
if (PageSwapCache(src_page)) {
|
||||
// TODO: Bug 4050579: Remove this when swap cached pages can be
|
||||
// migrated.
|
||||
if (service_context) {
|
||||
service_context->block_context->hmm.swap_cached = true;
|
||||
break;
|
||||
}
|
||||
|
||||
goto clr_mask;
|
||||
status = NV_WARN_MISMATCHED_TARGET;
|
||||
break;
|
||||
}
|
||||
|
||||
// If the page is already allocated, it is most likely a mirrored
|
||||
@ -2699,8 +2740,7 @@ static NV_STATUS dmamap_src_sysmem_pages(uvm_va_block_t *va_block,
|
||||
clear_service_context_masks(service_context, dest_id, page_index);
|
||||
}
|
||||
|
||||
if (uvm_page_mask_empty(page_mask) ||
|
||||
(service_context && service_context->block_context->hmm.swap_cached))
|
||||
if (uvm_page_mask_empty(page_mask))
|
||||
status = NV_WARN_MORE_PROCESSING_REQUIRED;
|
||||
|
||||
if (status != NV_OK)
|
||||
@ -2945,15 +2985,13 @@ static NV_STATUS uvm_hmm_migrate_alloc_and_copy(struct vm_area_struct *vma,
|
||||
uvm_assert_mutex_locked(&va_block->lock);
|
||||
|
||||
if (UVM_ID_IS_CPU(dest_id)) {
|
||||
status = alloc_and_copy_to_cpu(va_block,
|
||||
vma,
|
||||
src_pfns,
|
||||
dst_pfns,
|
||||
region,
|
||||
page_mask,
|
||||
&uvm_hmm_migrate_event->same_devmem_page_mask,
|
||||
UVM_ID_INVALID,
|
||||
NULL);
|
||||
status = migrate_alloc_on_cpu(va_block,
|
||||
src_pfns,
|
||||
dst_pfns,
|
||||
region,
|
||||
page_mask,
|
||||
&uvm_hmm_migrate_event->same_devmem_page_mask,
|
||||
va_block_context);
|
||||
}
|
||||
else {
|
||||
status = dmamap_src_sysmem_pages(va_block,
|
||||
@ -3154,7 +3192,7 @@ NV_STATUS uvm_hmm_va_block_migrate_locked(uvm_va_block_t *va_block,
|
||||
|
||||
migrate_vma_finalize(args);
|
||||
|
||||
if (status == NV_WARN_NOTHING_TO_DO)
|
||||
if (status == NV_WARN_NOTHING_TO_DO || status == NV_WARN_MISMATCHED_TARGET)
|
||||
status = NV_OK;
|
||||
|
||||
return status;
|
||||
@ -3288,15 +3326,13 @@ static NV_STATUS hmm_va_block_evict_chunks(uvm_va_block_t *va_block,
|
||||
// TODO: Bug 3660922: Need to handle read duplication at some point.
|
||||
UVM_ASSERT(uvm_page_mask_region_empty(cpu_resident_mask, region));
|
||||
|
||||
status = alloc_and_copy_to_cpu(va_block,
|
||||
NULL,
|
||||
src_pfns,
|
||||
dst_pfns,
|
||||
region,
|
||||
page_mask,
|
||||
NULL,
|
||||
UVM_ID_INVALID,
|
||||
NULL);
|
||||
status = migrate_alloc_on_cpu(va_block,
|
||||
src_pfns,
|
||||
dst_pfns,
|
||||
region,
|
||||
page_mask,
|
||||
NULL,
|
||||
va_block_context);
|
||||
if (status != NV_OK)
|
||||
goto err;
|
||||
|
||||
@ -3392,7 +3428,6 @@ NV_STATUS uvm_hmm_remote_cpu_fault(struct vm_fault *vmf)
|
||||
unsigned long dst_pfn;
|
||||
struct migrate_vma args;
|
||||
struct page *src_page = vmf->page;
|
||||
uvm_tracker_t tracker = UVM_TRACKER_INIT();
|
||||
int ret;
|
||||
|
||||
args.vma = vmf->vma;
|
||||
@ -3421,9 +3456,7 @@ NV_STATUS uvm_hmm_remote_cpu_fault(struct vm_fault *vmf)
|
||||
lock_page(dst_page);
|
||||
dst_pfn = migrate_pfn(page_to_pfn(dst_page));
|
||||
|
||||
status = uvm_hmm_copy_devmem_page(dst_page, src_page, &tracker);
|
||||
if (status == NV_OK)
|
||||
status = uvm_tracker_wait_deinit(&tracker);
|
||||
hmm_copy_devmem_page(dst_page, src_page);
|
||||
}
|
||||
|
||||
migrate_vma_pages(&args);
|
||||
@ -3591,6 +3624,7 @@ NV_STATUS uvm_hmm_va_range_info(uvm_va_space_t *va_space,
|
||||
params->va_range_end = ULONG_MAX;
|
||||
params->read_duplication = UVM_TEST_READ_DUPLICATION_UNSET;
|
||||
memset(¶ms->preferred_location, 0, sizeof(params->preferred_location));
|
||||
params->preferred_cpu_nid = NUMA_NO_NODE;
|
||||
params->accessed_by_count = 0;
|
||||
params->managed.vma_start = 0;
|
||||
params->managed.vma_end = 0;
|
||||
@ -3633,8 +3667,10 @@ NV_STATUS uvm_hmm_va_range_info(uvm_va_space_t *va_space,
|
||||
|
||||
params->read_duplication = node->policy.read_duplication;
|
||||
|
||||
if (!UVM_ID_IS_INVALID(node->policy.preferred_location))
|
||||
if (!UVM_ID_IS_INVALID(node->policy.preferred_location)) {
|
||||
uvm_va_space_processor_uuid(va_space, ¶ms->preferred_location, node->policy.preferred_location);
|
||||
params->preferred_cpu_nid = node->policy.preferred_nid;
|
||||
}
|
||||
|
||||
for_each_id_in_mask(processor_id, &node->policy.accessed_by)
|
||||
uvm_va_space_processor_uuid(va_space, ¶ms->accessed_by[params->accessed_by_count++], processor_id);
|
||||
@ -3652,22 +3688,16 @@ NV_STATUS uvm_hmm_va_range_info(uvm_va_space_t *va_space,
|
||||
// TODO: Bug 3660968: Remove this hack as soon as HMM migration is implemented
|
||||
// for VMAs other than anonymous private memory.
|
||||
bool uvm_hmm_must_use_sysmem(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context)
|
||||
struct vm_area_struct *vma)
|
||||
{
|
||||
struct vm_area_struct *vma = va_block_context->hmm.vma;
|
||||
|
||||
uvm_assert_mutex_locked(&va_block->lock);
|
||||
|
||||
if (!uvm_va_block_is_hmm(va_block))
|
||||
return false;
|
||||
|
||||
UVM_ASSERT(vma);
|
||||
UVM_ASSERT(va_block_context->mm == vma->vm_mm);
|
||||
uvm_assert_mmap_lock_locked(va_block_context->mm);
|
||||
|
||||
// TODO: Bug 4050579: Remove this when swap cached pages can be migrated.
|
||||
if (va_block_context->hmm.swap_cached)
|
||||
return true;
|
||||
UVM_ASSERT(va_block->hmm.va_space->va_space_mm.mm == vma->vm_mm);
|
||||
uvm_assert_mmap_lock_locked(vma->vm_mm);
|
||||
|
||||
// migrate_vma_setup() can't migrate VM_SPECIAL so we have to force GPU
|
||||
// remote mapping.
|
||||
|
@ -114,11 +114,6 @@ typedef struct
|
||||
struct vm_area_struct *vma,
|
||||
uvm_va_block_region_t region);
|
||||
|
||||
// Initialize the HMM portion of the service_context.
|
||||
// This should be called one time before any retry loops calling
|
||||
// uvm_va_block_service_locked().
|
||||
void uvm_hmm_service_context_init(uvm_service_block_context_t *service_context);
|
||||
|
||||
// Begin a migration critical section. When calling into the kernel it is
|
||||
// sometimes necessary to drop the va_block lock. This function returns
|
||||
// NV_OK when no other thread has started a migration critical section.
|
||||
@ -183,6 +178,7 @@ typedef struct
|
||||
// and the va_space lock must be held in write mode.
|
||||
NV_STATUS uvm_hmm_set_preferred_location(uvm_va_space_t *va_space,
|
||||
uvm_processor_id_t preferred_location,
|
||||
int preferred_cpu_nid,
|
||||
NvU64 base,
|
||||
NvU64 last_address,
|
||||
uvm_tracker_t *out_tracker);
|
||||
@ -271,6 +267,18 @@ typedef struct
|
||||
NvU64 addr);
|
||||
|
||||
// This is called to service a GPU fault.
|
||||
// processor_id is the faulting processor.
|
||||
// new_residency is the processor where the data should be migrated to.
|
||||
// Special return values (besides things like NV_ERR_NO_MEMORY):
|
||||
// NV_WARN_MORE_PROCESSING_REQUIRED indicates that one or more pages could
|
||||
// not be migrated and that a retry might succeed after unlocking the
|
||||
// va_block lock, va_space lock, and mmap lock.
|
||||
// NV_WARN_MISMATCHED_TARGET is a special case of GPU fault handling when a
|
||||
// GPU is chosen as the destination and the source is a HMM CPU page that
|
||||
// can't be migrated (i.e., must remain in system memory). In that case,
|
||||
// uvm_va_block_select_residency() should be called with 'hmm_migratable'
|
||||
// set to false so that system memory will be selected. Then this call can
|
||||
// be retried to service the GPU fault by migrating to system memory.
|
||||
// Locking: the va_space->va_space_mm.mm mmap_lock must be locked,
|
||||
// the va_space read lock must be held, and the va_block lock held.
|
||||
NV_STATUS uvm_hmm_va_block_service_locked(uvm_processor_id_t processor_id,
|
||||
@ -282,8 +290,10 @@ typedef struct
|
||||
// This is called to migrate a region within a HMM va_block.
|
||||
// va_block_context must not be NULL and va_block_context->hmm.vma
|
||||
// must be valid.
|
||||
// Locking: the va_space->va_space_mm.mm must be retained, mmap_lock must be
|
||||
// locked, and the va_block lock held.
|
||||
// Special return values (besides things like NV_ERR_NO_MEMORY):
|
||||
// NV_WARN_MORE_PROCESSING_REQUIRED indicates that one or more pages could
|
||||
// not be migrated and that a retry might succeed after unlocking the
|
||||
// va_block lock, va_space lock, and mmap lock.
|
||||
NV_STATUS uvm_hmm_va_block_migrate_locked(uvm_va_block_t *va_block,
|
||||
uvm_va_block_retry_t *va_block_retry,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
@ -382,7 +392,7 @@ typedef struct
|
||||
// va_block, the va_block_context->mm must be retained and locked for least
|
||||
// read.
|
||||
bool uvm_hmm_must_use_sysmem(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context);
|
||||
struct vm_area_struct *vma);
|
||||
|
||||
#else // UVM_IS_CONFIG_HMM()
|
||||
|
||||
@ -441,10 +451,6 @@ typedef struct
|
||||
return true;
|
||||
}
|
||||
|
||||
static void uvm_hmm_service_context_init(uvm_service_block_context_t *service_context)
|
||||
{
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_migrate_begin(uvm_va_block_t *va_block)
|
||||
{
|
||||
return NV_OK;
|
||||
@ -485,6 +491,7 @@ typedef struct
|
||||
|
||||
static NV_STATUS uvm_hmm_set_preferred_location(uvm_va_space_t *va_space,
|
||||
uvm_processor_id_t preferred_location,
|
||||
int preferred_cpu_nid,
|
||||
NvU64 base,
|
||||
NvU64 last_address,
|
||||
uvm_tracker_t *out_tracker)
|
||||
@ -648,7 +655,7 @@ typedef struct
|
||||
}
|
||||
|
||||
static bool uvm_hmm_must_use_sysmem(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context)
|
||||
struct vm_area_struct *vma)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
@ -55,7 +55,7 @@ void uvm_hal_hopper_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
parent_gpu->uvm_mem_va_size = UVM_MEM_VA_SIZE;
|
||||
|
||||
// See uvm_mmu.h for mapping placement
|
||||
parent_gpu->flat_vidmem_va_base = (64 * UVM_SIZE_1PB) + (8 * UVM_SIZE_1TB);
|
||||
parent_gpu->flat_vidmem_va_base = (64 * UVM_SIZE_1PB) + (32 * UVM_SIZE_1TB);
|
||||
|
||||
// Physical CE writes to vidmem are non-coherent with respect to the CPU on
|
||||
// GH180.
|
||||
@ -88,6 +88,8 @@ void uvm_hal_hopper_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
parent_gpu->access_counters_supported = true;
|
||||
|
||||
parent_gpu->access_counters_can_use_physical_addresses = false;
|
||||
|
||||
parent_gpu->fault_cancel_va_supported = true;
|
||||
|
||||
parent_gpu->scoped_atomics_supported = true;
|
||||
@ -103,5 +105,6 @@ void uvm_hal_hopper_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
parent_gpu->map_remap_larger_page_promotion = false;
|
||||
|
||||
parent_gpu->plc_supported = true;
|
||||
}
|
||||
|
||||
parent_gpu->no_ats_range_required = true;
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2020-2022 NVIDIA Corporation
|
||||
Copyright (c) 2020-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -22,6 +22,7 @@
|
||||
*******************************************************************************/
|
||||
|
||||
#include "uvm_hal.h"
|
||||
#include "uvm_global.h"
|
||||
#include "uvm_push.h"
|
||||
#include "uvm_mem.h"
|
||||
#include "uvm_conf_computing.h"
|
||||
@ -154,7 +155,8 @@ static NvU32 hopper_memset_push_phys_mode(uvm_push_t *push, uvm_gpu_address_t ds
|
||||
|
||||
static bool va_is_flat_vidmem(uvm_gpu_t *gpu, NvU64 va)
|
||||
{
|
||||
return (uvm_mmu_gpu_needs_static_vidmem_mapping(gpu) || uvm_mmu_gpu_needs_dynamic_vidmem_mapping(gpu)) &&
|
||||
return (uvm_mmu_parent_gpu_needs_static_vidmem_mapping(gpu->parent) ||
|
||||
uvm_mmu_parent_gpu_needs_dynamic_vidmem_mapping(gpu->parent)) &&
|
||||
va >= gpu->parent->flat_vidmem_va_base &&
|
||||
va < gpu->parent->flat_vidmem_va_base + UVM_GPU_MAX_PHYS_MEM;
|
||||
}
|
||||
@ -180,17 +182,18 @@ static bool hopper_scrub_enable(uvm_gpu_t *gpu, uvm_gpu_address_t *dst, size_t s
|
||||
return !dst->is_virtual && dst->aperture == UVM_APERTURE_VID;
|
||||
}
|
||||
|
||||
static NvU32 hopper_memset_copy_type(uvm_push_t *push, uvm_gpu_address_t dst)
|
||||
static NvU32 hopper_memset_copy_type(uvm_gpu_address_t dst)
|
||||
{
|
||||
if (uvm_conf_computing_mode_enabled(uvm_push_get_gpu(push)) && dst.is_unprotected)
|
||||
if (g_uvm_global.conf_computing_enabled && dst.is_unprotected)
|
||||
return HWCONST(C8B5, LAUNCH_DMA, COPY_TYPE, NONPROT2NONPROT);
|
||||
return HWCONST(C8B5, LAUNCH_DMA, COPY_TYPE, DEFAULT);
|
||||
}
|
||||
|
||||
NvU32 uvm_hal_hopper_ce_memcopy_copy_type(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src)
|
||||
NvU32 uvm_hal_hopper_ce_memcopy_copy_type(uvm_gpu_address_t dst, uvm_gpu_address_t src)
|
||||
{
|
||||
if (uvm_conf_computing_mode_enabled(uvm_push_get_gpu(push)) && dst.is_unprotected && src.is_unprotected)
|
||||
if (g_uvm_global.conf_computing_enabled && dst.is_unprotected && src.is_unprotected)
|
||||
return HWCONST(C8B5, LAUNCH_DMA, COPY_TYPE, NONPROT2NONPROT);
|
||||
|
||||
return HWCONST(C8B5, LAUNCH_DMA, COPY_TYPE, DEFAULT);
|
||||
}
|
||||
|
||||
@ -210,7 +213,7 @@ static void hopper_memset_common(uvm_push_t *push,
|
||||
NvU32 launch_dma_remap_enable;
|
||||
NvU32 launch_dma_scrub_enable;
|
||||
NvU32 flush_value = HWCONST(C8B5, LAUNCH_DMA, FLUSH_ENABLE, FALSE);
|
||||
NvU32 copy_type_value = hopper_memset_copy_type(push, dst);
|
||||
NvU32 copy_type_value = hopper_memset_copy_type(dst);
|
||||
bool is_scrub = hopper_scrub_enable(gpu, &dst, num_elements * memset_element_size);
|
||||
|
||||
UVM_ASSERT_MSG(gpu->parent->ce_hal->memset_is_valid(push, dst, num_elements, memset_element_size),
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2020-2022 NVIDIA Corporation
|
||||
Copyright (c) 2020-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -33,6 +33,7 @@
|
||||
|
||||
#include "uvm_types.h"
|
||||
#include "uvm_global.h"
|
||||
#include "uvm_common.h"
|
||||
#include "uvm_hal.h"
|
||||
#include "uvm_hal_types.h"
|
||||
#include "uvm_hopper_fault_buffer.h"
|
||||
@ -42,6 +43,10 @@
|
||||
#define MMU_BIG 0
|
||||
#define MMU_SMALL 1
|
||||
|
||||
// Used in pde_pcf().
|
||||
#define ATS_ALLOWED 0
|
||||
#define ATS_NOT_ALLOWED 1
|
||||
|
||||
uvm_mmu_engine_type_t uvm_hal_hopper_mmu_engine_id_to_type(NvU16 mmu_engine_id)
|
||||
{
|
||||
if (mmu_engine_id >= NV_PFAULT_MMU_ENG_ID_HOST0 && mmu_engine_id <= NV_PFAULT_MMU_ENG_ID_HOST44)
|
||||
@ -260,7 +265,108 @@ static NvU64 poisoned_pte_hopper(void)
|
||||
return WRITE_HWCONST64(pte_bits, _MMU_VER3, PTE, PCF, PRIVILEGE_RO_NO_ATOMIC_UNCACHED_ACD);
|
||||
}
|
||||
|
||||
static NvU64 single_pde_hopper(uvm_mmu_page_table_alloc_t *phys_alloc, NvU32 depth)
|
||||
typedef enum
|
||||
{
|
||||
PDE_TYPE_SINGLE,
|
||||
PDE_TYPE_DUAL_BIG,
|
||||
PDE_TYPE_DUAL_SMALL,
|
||||
PDE_TYPE_COUNT,
|
||||
} pde_type_t;
|
||||
|
||||
static const NvU8 valid_pcf[][2] = { { NV_MMU_VER3_PDE_PCF_VALID_UNCACHED_ATS_ALLOWED,
|
||||
NV_MMU_VER3_PDE_PCF_VALID_UNCACHED_ATS_NOT_ALLOWED },
|
||||
{ NV_MMU_VER3_DUAL_PDE_PCF_BIG_VALID_UNCACHED_ATS_ALLOWED,
|
||||
NV_MMU_VER3_DUAL_PDE_PCF_BIG_VALID_UNCACHED_ATS_NOT_ALLOWED },
|
||||
{ NV_MMU_VER3_DUAL_PDE_PCF_SMALL_VALID_UNCACHED_ATS_ALLOWED,
|
||||
NV_MMU_VER3_DUAL_PDE_PCF_SMALL_VALID_UNCACHED_ATS_NOT_ALLOWED } };
|
||||
|
||||
static const NvU8 invalid_pcf[][2] = { { NV_MMU_VER3_PDE_PCF_INVALID_ATS_ALLOWED,
|
||||
NV_MMU_VER3_PDE_PCF_INVALID_ATS_NOT_ALLOWED },
|
||||
{ NV_MMU_VER3_DUAL_PDE_PCF_BIG_INVALID_ATS_ALLOWED,
|
||||
NV_MMU_VER3_DUAL_PDE_PCF_BIG_INVALID_ATS_NOT_ALLOWED },
|
||||
{ NV_MMU_VER3_DUAL_PDE_PCF_SMALL_INVALID_ATS_ALLOWED,
|
||||
NV_MMU_VER3_DUAL_PDE_PCF_SMALL_INVALID_ATS_NOT_ALLOWED } };
|
||||
|
||||
static const NvU8 va_base[] = { 56, 47, 38, 29, 21 };
|
||||
|
||||
static bool is_ats_range_valid(uvm_page_directory_t *dir, NvU32 child_index)
|
||||
{
|
||||
NvU64 pde_base_va;
|
||||
NvU64 min_va_upper;
|
||||
NvU64 max_va_lower;
|
||||
NvU32 index_in_dir;
|
||||
|
||||
uvm_cpu_get_unaddressable_range(&max_va_lower, &min_va_upper);
|
||||
|
||||
UVM_ASSERT(dir->depth < ARRAY_SIZE(va_base));
|
||||
|
||||
// We can use UVM_PAGE_SIZE_AGNOSTIC because page_size is only used in
|
||||
// index_bits_hopper() for PTE table, i.e., depth 5+, which does not use a
|
||||
// PDE PCF or an ATS_ALLOWED/NOT_ALLOWED setting.
|
||||
UVM_ASSERT(child_index < (1ull << index_bits_hopper(dir->depth, UVM_PAGE_SIZE_AGNOSTIC)));
|
||||
|
||||
pde_base_va = 0;
|
||||
index_in_dir = child_index;
|
||||
while (dir) {
|
||||
pde_base_va += index_in_dir * (1ull << va_base[dir->depth]);
|
||||
index_in_dir = dir->index_in_parent;
|
||||
dir = dir->host_parent;
|
||||
}
|
||||
pde_base_va = (NvU64)((NvS64)(pde_base_va << (64 - num_va_bits_hopper())) >> (64 - num_va_bits_hopper()));
|
||||
|
||||
if (pde_base_va < max_va_lower || pde_base_va >= min_va_upper)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// PDE Permission Control Flags
|
||||
static NvU32 pde_pcf(bool valid, pde_type_t pde_type, uvm_page_directory_t *dir, NvU32 child_index)
|
||||
{
|
||||
const NvU8 (*pcf)[2] = valid ? valid_pcf : invalid_pcf;
|
||||
NvU8 depth = dir->depth;
|
||||
|
||||
UVM_ASSERT(pde_type < PDE_TYPE_COUNT);
|
||||
UVM_ASSERT(depth < 5);
|
||||
|
||||
// On non-ATS systems, PDE PCF only sets the valid and volatile/cache bits.
|
||||
if (!g_uvm_global.ats.enabled)
|
||||
return pcf[pde_type][ATS_ALLOWED];
|
||||
|
||||
// We assume all supported ATS platforms use canonical form address.
|
||||
// See comments in uvm_gpu.c:uvm_gpu_can_address() and in
|
||||
// uvm_mmu.c:page_tree_ats_init();
|
||||
UVM_ASSERT(uvm_platform_uses_canonical_form_address());
|
||||
|
||||
// Hopper GPUs on ATS-enabled systems, perform a parallel lookup on both
|
||||
// ATS and GMMU page tables. For managed memory we need to prevent this
|
||||
// parallel lookup since we would not get any GPU fault if the CPU has
|
||||
// a valid mapping. Also, for external ranges that are known to be
|
||||
// mapped entirely on the GMMU page table we can skip the ATS lookup
|
||||
// for performance reasons. Parallel ATS lookup is disabled in PDE1
|
||||
// (depth 3) and, therefore, it applies to the underlying 512MB VA
|
||||
// range.
|
||||
//
|
||||
// UVM sets ATS_NOT_ALLOWED for all Hopper+ mappings on ATS systems.
|
||||
// This is fine because CUDA ensures that all managed and external
|
||||
// allocations are properly compartmentalized in 512MB-aligned VA
|
||||
// regions. For cudaHostRegister CUDA cannot control the VA range, but
|
||||
// we rely on ATS for those allocations so they can't choose the
|
||||
// ATS_NOT_ALLOWED mode.
|
||||
// TODO: Bug 3254055: Relax the NO_ATS setting from 512MB (pde1) range to
|
||||
// PTEs.
|
||||
// HW complies with the leaf PDE's ATS_ALLOWED/ATS_NOT_ALLOWED settings,
|
||||
// enabling us to treat any upper-level PDE as a don't care as long as there
|
||||
// are leaf PDEs for the entire upper-level PDE range. We assume PDE4
|
||||
// entries (depth == 0) are always ATS enabled, and the no_ats_range is in
|
||||
// PDE3 or lower.
|
||||
if (depth == 0 || (!valid && is_ats_range_valid(dir, child_index)))
|
||||
return pcf[pde_type][ATS_ALLOWED];
|
||||
|
||||
return pcf[pde_type][ATS_NOT_ALLOWED];
|
||||
}
|
||||
|
||||
static NvU64 single_pde_hopper(uvm_mmu_page_table_alloc_t *phys_alloc, uvm_page_directory_t *dir, NvU32 child_index)
|
||||
{
|
||||
NvU64 pde_bits = 0;
|
||||
|
||||
@ -280,38 +386,17 @@ static NvU64 single_pde_hopper(uvm_mmu_page_table_alloc_t *phys_alloc, NvU32 dep
|
||||
break;
|
||||
}
|
||||
|
||||
// PCF (permission control flags) 5:3
|
||||
// Hopper GPUs on ATS-enabled systems, perform a parallel lookup on both
|
||||
// ATS and GMMU page tables. For managed memory we need to prevent this
|
||||
// parallel lookup since we would not get any GPU fault if the CPU has
|
||||
// a valid mapping. Also, for external ranges that are known to be
|
||||
// mapped entirely on the GMMU page table we can skip the ATS lookup
|
||||
// for performance reasons. Parallel ATS lookup is disabled in PDE1
|
||||
// (depth 3) and, therefore, it applies to the underlying 512MB VA
|
||||
// range.
|
||||
//
|
||||
// UVM sets ATS_NOT_ALLOWED for all Hopper+ mappings on ATS systems.
|
||||
// This is fine because CUDA ensures that all managed and external
|
||||
// allocations are properly compartmentalized in 512MB-aligned VA
|
||||
// regions. For cudaHostRegister CUDA cannot control the VA range, but
|
||||
// we rely on ATS for those allocations so they can't choose the
|
||||
// ATS_NOT_ALLOWED mode.
|
||||
//
|
||||
// TODO: Bug 3254055: Relax the NO_ATS setting from 512MB (pde1) range
|
||||
// to PTEs.
|
||||
if (depth == 3 && g_uvm_global.ats.enabled)
|
||||
pde_bits |= HWCONST64(_MMU_VER3, PDE, PCF, VALID_UNCACHED_ATS_NOT_ALLOWED);
|
||||
else
|
||||
pde_bits |= HWCONST64(_MMU_VER3, PDE, PCF, VALID_UNCACHED_ATS_ALLOWED);
|
||||
|
||||
// address 51:12
|
||||
pde_bits |= HWVALUE64(_MMU_VER3, PDE, ADDRESS, address);
|
||||
}
|
||||
|
||||
// PCF (permission control flags) 5:3
|
||||
pde_bits |= HWVALUE64(_MMU_VER3, PDE, PCF, pde_pcf(phys_alloc != NULL, PDE_TYPE_SINGLE, dir, child_index));
|
||||
|
||||
return pde_bits;
|
||||
}
|
||||
|
||||
static NvU64 big_half_pde_hopper(uvm_mmu_page_table_alloc_t *phys_alloc)
|
||||
static NvU64 big_half_pde_hopper(uvm_mmu_page_table_alloc_t *phys_alloc, uvm_page_directory_t *dir, NvU32 child_index)
|
||||
{
|
||||
NvU64 pde_bits = 0;
|
||||
|
||||
@ -330,17 +415,20 @@ static NvU64 big_half_pde_hopper(uvm_mmu_page_table_alloc_t *phys_alloc)
|
||||
break;
|
||||
}
|
||||
|
||||
// PCF (permission control flags) 5:3
|
||||
pde_bits |= HWCONST64(_MMU_VER3, DUAL_PDE, PCF_BIG, VALID_UNCACHED_ATS_NOT_ALLOWED);
|
||||
|
||||
// address 51:8
|
||||
pde_bits |= HWVALUE64(_MMU_VER3, DUAL_PDE, ADDRESS_BIG, address);
|
||||
}
|
||||
|
||||
// PCF (permission control flags) 5:3
|
||||
pde_bits |= HWVALUE64(_MMU_VER3,
|
||||
DUAL_PDE,
|
||||
PCF_BIG,
|
||||
pde_pcf(phys_alloc != NULL, PDE_TYPE_DUAL_BIG, dir, child_index));
|
||||
|
||||
return pde_bits;
|
||||
}
|
||||
|
||||
static NvU64 small_half_pde_hopper(uvm_mmu_page_table_alloc_t *phys_alloc)
|
||||
static NvU64 small_half_pde_hopper(uvm_mmu_page_table_alloc_t *phys_alloc, uvm_page_directory_t *dir, NvU32 child_index)
|
||||
{
|
||||
NvU64 pde_bits = 0;
|
||||
|
||||
@ -359,29 +447,40 @@ static NvU64 small_half_pde_hopper(uvm_mmu_page_table_alloc_t *phys_alloc)
|
||||
break;
|
||||
}
|
||||
|
||||
// PCF (permission control flags) 69:67 [5:3]
|
||||
pde_bits |= HWCONST64(_MMU_VER3, DUAL_PDE, PCF_SMALL, VALID_UNCACHED_ATS_NOT_ALLOWED);
|
||||
|
||||
// address 115:76 [51:12]
|
||||
pde_bits |= HWVALUE64(_MMU_VER3, DUAL_PDE, ADDRESS_SMALL, address);
|
||||
}
|
||||
|
||||
// PCF (permission control flags) 69:67 [5:3]
|
||||
pde_bits |= HWVALUE64(_MMU_VER3,
|
||||
DUAL_PDE,
|
||||
PCF_SMALL,
|
||||
pde_pcf(phys_alloc != NULL, PDE_TYPE_DUAL_SMALL, dir, child_index));
|
||||
|
||||
return pde_bits;
|
||||
}
|
||||
|
||||
static void make_pde_hopper(void *entry, uvm_mmu_page_table_alloc_t **phys_allocs, NvU32 depth)
|
||||
static void make_pde_hopper(void *entry,
|
||||
uvm_mmu_page_table_alloc_t **phys_allocs,
|
||||
uvm_page_directory_t *dir,
|
||||
NvU32 child_index)
|
||||
{
|
||||
NvU32 entry_count = entries_per_index_hopper(depth);
|
||||
NvU32 entry_count;
|
||||
NvU64 *entry_bits = (NvU64 *)entry;
|
||||
|
||||
UVM_ASSERT(dir);
|
||||
|
||||
entry_count = entries_per_index_hopper(dir->depth);
|
||||
|
||||
if (entry_count == 1) {
|
||||
*entry_bits = single_pde_hopper(*phys_allocs, depth);
|
||||
*entry_bits = single_pde_hopper(*phys_allocs, dir, child_index);
|
||||
}
|
||||
else if (entry_count == 2) {
|
||||
entry_bits[MMU_BIG] = big_half_pde_hopper(phys_allocs[MMU_BIG]);
|
||||
entry_bits[MMU_SMALL] = small_half_pde_hopper(phys_allocs[MMU_SMALL]);
|
||||
entry_bits[MMU_BIG] = big_half_pde_hopper(phys_allocs[MMU_BIG], dir, child_index);
|
||||
entry_bits[MMU_SMALL] = small_half_pde_hopper(phys_allocs[MMU_SMALL], dir, child_index);
|
||||
|
||||
// This entry applies to the whole dual PDE but is stored in the lower
|
||||
// bits
|
||||
// bits.
|
||||
entry_bits[MMU_BIG] |= HWCONST64(_MMU_VER3, DUAL_PDE, IS_PTE, FALSE);
|
||||
}
|
||||
else {
|
||||
|
@ -633,6 +633,7 @@ typedef struct
|
||||
NvU64 requestedBase NV_ALIGN_BYTES(8); // IN
|
||||
NvU64 length NV_ALIGN_BYTES(8); // IN
|
||||
NvProcessorUuid preferredLocation; // IN
|
||||
NvS32 preferredCpuNumaNode; // IN
|
||||
NV_STATUS rmStatus; // OUT
|
||||
} UVM_SET_PREFERRED_LOCATION_PARAMS;
|
||||
|
||||
@ -766,8 +767,19 @@ typedef struct
|
||||
#define UVM_MIGRATE_FLAGS_ALL (UVM_MIGRATE_FLAG_ASYNC | \
|
||||
UVM_MIGRATE_FLAGS_TEST_ALL)
|
||||
|
||||
// For pageable migrations, cpuNumaNode is used as the destination NUMA node if
|
||||
// destinationUuid is the CPU.
|
||||
// If NV_ERR_INVALID_ARGUMENT is returned it is because cpuMemoryNode is not
|
||||
// valid and the destination processor is the CPU. cpuMemoryNode is considered
|
||||
// invalid if:
|
||||
// * it is less than -1,
|
||||
// * it is equal to or larger than the maximum number of nodes, or
|
||||
// * it corresponds to a registered GPU.
|
||||
// * it is not in the node_possible_map set of nodes,
|
||||
// * it does not have onlined memory
|
||||
//
|
||||
// For pageable migrations:
|
||||
//
|
||||
// In addition to the above, in the case of pageable memory, the
|
||||
// cpuMemoryNode is considered invalid if it's -1.
|
||||
//
|
||||
// If NV_WARN_NOTHING_TO_DO is returned, user-space is responsible for
|
||||
// completing the migration of the VA range described by userSpaceStart and
|
||||
@ -775,6 +787,7 @@ typedef struct
|
||||
//
|
||||
// If NV_ERR_MORE_PROCESSING_REQUIRED is returned, user-space is responsible
|
||||
// for re-trying with a different cpuNumaNode, starting at userSpaceStart.
|
||||
//
|
||||
#define UVM_MIGRATE UVM_IOCTL_BASE(51)
|
||||
typedef struct
|
||||
{
|
||||
@ -784,7 +797,7 @@ typedef struct
|
||||
NvU32 flags; // IN
|
||||
NvU64 semaphoreAddress NV_ALIGN_BYTES(8); // IN
|
||||
NvU32 semaphorePayload; // IN
|
||||
NvU32 cpuNumaNode; // IN
|
||||
NvS32 cpuNumaNode; // IN
|
||||
NvU64 userSpaceStart NV_ALIGN_BYTES(8); // OUT
|
||||
NvU64 userSpaceLength NV_ALIGN_BYTES(8); // OUT
|
||||
NV_STATUS rmStatus; // OUT
|
||||
|
@ -36,7 +36,7 @@
|
||||
typedef struct
|
||||
{
|
||||
size_t alloc_size;
|
||||
uint8_t ptr[0];
|
||||
uint8_t ptr[];
|
||||
} uvm_vmalloc_hdr_t;
|
||||
|
||||
typedef struct
|
||||
|
@ -27,7 +27,7 @@
|
||||
|
||||
const char *uvm_lock_order_to_string(uvm_lock_order_t lock_order)
|
||||
{
|
||||
BUILD_BUG_ON(UVM_LOCK_ORDER_COUNT != 33);
|
||||
BUILD_BUG_ON(UVM_LOCK_ORDER_COUNT != 34);
|
||||
|
||||
switch (lock_order) {
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_INVALID);
|
||||
@ -62,6 +62,7 @@ const char *uvm_lock_order_to_string(uvm_lock_order_t lock_order)
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_VA_SPACE_TOOLS);
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_SEMA_POOL_TRACKER);
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_SECURE_SEMAPHORE);
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_CSL_CTX);
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_LEAF);
|
||||
UVM_ENUM_STRING_DEFAULT();
|
||||
}
|
||||
@ -362,10 +363,7 @@ NV_STATUS uvm_bit_locks_init(uvm_bit_locks_t *bit_locks, size_t count, uvm_lock_
|
||||
if (!bit_locks->bits)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
#if UVM_IS_DEBUG()
|
||||
uvm_locking_assert_initialized();
|
||||
bit_locks->lock_order = lock_order;
|
||||
#endif
|
||||
uvm_lock_debug_init(bit_locks, lock_order);
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
@ -448,6 +448,12 @@
|
||||
//
|
||||
// CE semaphore payloads are encrypted, and require to take the CSL lock
|
||||
// (UVM_LOCK_ORDER_LEAF) to decrypt the payload.
|
||||
|
||||
// - CSL Context
|
||||
// Order: UVM_LOCK_ORDER_CSL_CTX
|
||||
// When the Confidential Computing feature is enabled, encrypt/decrypt
|
||||
// operations to communicate with GPU are handled by the CSL context.
|
||||
// This lock protects RM calls that use this context.
|
||||
//
|
||||
// - Leaf locks
|
||||
// Order: UVM_LOCK_ORDER_LEAF
|
||||
@ -492,6 +498,11 @@ typedef enum
|
||||
UVM_LOCK_ORDER_VA_SPACE_TOOLS,
|
||||
UVM_LOCK_ORDER_SEMA_POOL_TRACKER,
|
||||
UVM_LOCK_ORDER_SECURE_SEMAPHORE,
|
||||
|
||||
// TODO: Bug 4184836: [uvm][hcc] Remove UVM_LOCK_ORDER_CSL_CTX
|
||||
// This lock order can be removed after RM no longer relies on RPC event
|
||||
// notifications.
|
||||
UVM_LOCK_ORDER_CSL_CTX,
|
||||
UVM_LOCK_ORDER_LEAF,
|
||||
UVM_LOCK_ORDER_COUNT,
|
||||
} uvm_lock_order_t;
|
||||
@ -648,6 +659,15 @@ bool __uvm_locking_initialized(void);
|
||||
#define uvm_assert_lockable_order(order) UVM_ASSERT(__uvm_check_lockable_order(order, UVM_LOCK_FLAGS_MODE_ANY))
|
||||
#define uvm_assert_unlocked_order(order) UVM_ASSERT(__uvm_check_unlocked_order(order))
|
||||
|
||||
#if UVM_IS_DEBUG()
|
||||
#define uvm_lock_debug_init(lock, order) ({ \
|
||||
uvm_locking_assert_initialized(); \
|
||||
(lock)->lock_order = (order); \
|
||||
})
|
||||
#else
|
||||
#define uvm_lock_debug_init(lock, order) ((void) order)
|
||||
#endif
|
||||
|
||||
// Helpers for locking mmap_lock (mmap_sem in kernels < 5.8)
|
||||
// and recording its usage
|
||||
#define uvm_assert_mmap_lock_locked_mode(mm, flags) ({ \
|
||||
@ -738,15 +758,12 @@ typedef struct
|
||||
|
||||
#define uvm_assert_rwsem_unlocked(uvm_sem) UVM_ASSERT(!rwsem_is_locked(&(uvm_sem)->sem))
|
||||
|
||||
static void uvm_init_rwsem(uvm_rw_semaphore_t *uvm_sem, uvm_lock_order_t lock_order)
|
||||
{
|
||||
init_rwsem(&uvm_sem->sem);
|
||||
#if UVM_IS_DEBUG()
|
||||
uvm_locking_assert_initialized();
|
||||
uvm_sem->lock_order = lock_order;
|
||||
#endif
|
||||
uvm_assert_rwsem_unlocked(uvm_sem);
|
||||
}
|
||||
#define uvm_init_rwsem(uvm_sem, order) ({ \
|
||||
uvm_rw_semaphore_t *uvm_sem_ ## order = (uvm_sem); \
|
||||
init_rwsem(&uvm_sem_ ## order->sem); \
|
||||
uvm_lock_debug_init(uvm_sem, order); \
|
||||
uvm_assert_rwsem_unlocked(uvm_sem); \
|
||||
})
|
||||
|
||||
#define uvm_down_read(uvm_sem) ({ \
|
||||
typeof(uvm_sem) _sem = (uvm_sem); \
|
||||
@ -874,15 +891,12 @@ typedef struct
|
||||
UVM_ASSERT_MSG(!irqs_disabled() && !in_interrupt(), "Mutexes cannot be used with interrupts disabled"); \
|
||||
})
|
||||
|
||||
static void uvm_mutex_init(uvm_mutex_t *mutex, uvm_lock_order_t lock_order)
|
||||
{
|
||||
mutex_init(&mutex->m);
|
||||
#if UVM_IS_DEBUG()
|
||||
uvm_locking_assert_initialized();
|
||||
mutex->lock_order = lock_order;
|
||||
#endif
|
||||
uvm_assert_mutex_unlocked(mutex);
|
||||
}
|
||||
#define uvm_mutex_init(mutex, order) ({ \
|
||||
uvm_mutex_t *mutex_ ## order = (mutex); \
|
||||
mutex_init(&mutex_ ## order->m); \
|
||||
uvm_lock_debug_init(mutex, order); \
|
||||
uvm_assert_mutex_unlocked(mutex); \
|
||||
})
|
||||
|
||||
#define uvm_mutex_lock(mutex) ({ \
|
||||
typeof(mutex) _mutex = (mutex); \
|
||||
@ -892,11 +906,14 @@ static void uvm_mutex_init(uvm_mutex_t *mutex, uvm_lock_order_t lock_order)
|
||||
uvm_assert_mutex_locked(_mutex); \
|
||||
})
|
||||
|
||||
// Lock w/o any tracking. This should be extremely rare and *_no_tracking
|
||||
// helpers will be added only as needed.
|
||||
#define uvm_mutex_lock_no_tracking(mutex) ({ \
|
||||
// Lock while already holding a lock of the same order taken with
|
||||
// uvm_mutex_lock() variant. Note this shouldn't be used if the held lock was
|
||||
// taken with uvm_mutex_lock_nested() because we only support a single level of
|
||||
// nesting. This should be extremely rare and *_nested helpers will only be
|
||||
// added as needed.
|
||||
#define uvm_mutex_lock_nested(mutex) ({ \
|
||||
uvm_assert_mutex_interrupts(); \
|
||||
mutex_lock(&(mutex)->m); \
|
||||
mutex_lock_nested(&(mutex)->m, 1); \
|
||||
})
|
||||
|
||||
#define uvm_mutex_trylock(mutex) ({ \
|
||||
@ -926,9 +943,8 @@ static void uvm_mutex_init(uvm_mutex_t *mutex, uvm_lock_order_t lock_order)
|
||||
uvm_record_unlock_out_of_order(_mutex, UVM_LOCK_FLAGS_MODE_EXCLUSIVE); \
|
||||
})
|
||||
|
||||
// Unlock w/o any tracking. This should be extremely rare and *_no_tracking
|
||||
// helpers will be added only as needed.
|
||||
#define uvm_mutex_unlock_no_tracking(mutex) ({ \
|
||||
// Unlock w/o any tracking.
|
||||
#define uvm_mutex_unlock_nested(mutex) ({ \
|
||||
uvm_assert_mutex_interrupts(); \
|
||||
mutex_unlock(&(mutex)->m); \
|
||||
})
|
||||
@ -941,14 +957,11 @@ typedef struct
|
||||
#endif
|
||||
} uvm_semaphore_t;
|
||||
|
||||
static void uvm_sema_init(uvm_semaphore_t *semaphore, int val, uvm_lock_order_t lock_order)
|
||||
{
|
||||
sema_init(&semaphore->sem, val);
|
||||
#if UVM_IS_DEBUG()
|
||||
uvm_locking_assert_initialized();
|
||||
semaphore->lock_order = lock_order;
|
||||
#endif
|
||||
}
|
||||
#define uvm_sema_init(semaphore, val, order) ({ \
|
||||
uvm_semaphore_t *sem_ ## order = (semaphore); \
|
||||
sema_init(&sem_ ## order->sem, (val)); \
|
||||
uvm_lock_debug_init(semaphore, order); \
|
||||
})
|
||||
|
||||
#define uvm_sem_is_locked(uvm_sem) uvm_check_locked(uvm_sem, UVM_LOCK_FLAGS_MODE_SHARED)
|
||||
|
||||
@ -1012,15 +1025,12 @@ typedef struct
|
||||
|
||||
#define uvm_assert_spinlock_unlocked(spinlock) UVM_ASSERT(!spin_is_locked(&(spinlock)->lock))
|
||||
|
||||
static void uvm_spin_lock_init(uvm_spinlock_t *spinlock, uvm_lock_order_t lock_order)
|
||||
{
|
||||
spin_lock_init(&spinlock->lock);
|
||||
#if UVM_IS_DEBUG()
|
||||
uvm_locking_assert_initialized();
|
||||
spinlock->lock_order = lock_order;
|
||||
#endif
|
||||
uvm_assert_spinlock_unlocked(spinlock);
|
||||
}
|
||||
#define uvm_spin_lock_init(spinlock, order) ({ \
|
||||
uvm_spinlock_t *spinlock_ ## order = (spinlock); \
|
||||
spin_lock_init(&spinlock_ ## order->lock); \
|
||||
uvm_lock_debug_init(spinlock, order); \
|
||||
uvm_assert_spinlock_unlocked(spinlock); \
|
||||
})
|
||||
|
||||
#define uvm_spin_lock(uvm_lock) ({ \
|
||||
typeof(uvm_lock) _lock = (uvm_lock); \
|
||||
@ -1036,15 +1046,12 @@ static void uvm_spin_lock_init(uvm_spinlock_t *spinlock, uvm_lock_order_t lock_o
|
||||
uvm_record_unlock(_lock, UVM_LOCK_FLAGS_MODE_EXCLUSIVE); \
|
||||
})
|
||||
|
||||
static void uvm_spin_lock_irqsave_init(uvm_spinlock_irqsave_t *spinlock, uvm_lock_order_t lock_order)
|
||||
{
|
||||
spin_lock_init(&spinlock->lock);
|
||||
#if UVM_IS_DEBUG()
|
||||
uvm_locking_assert_initialized();
|
||||
spinlock->lock_order = lock_order;
|
||||
#endif
|
||||
uvm_assert_spinlock_unlocked(spinlock);
|
||||
}
|
||||
#define uvm_spin_lock_irqsave_init(spinlock, order) ({ \
|
||||
uvm_spinlock_irqsave_t *spinlock_ ## order = (spinlock); \
|
||||
spin_lock_init(&spinlock_ ## order->lock); \
|
||||
uvm_lock_debug_init(spinlock, order); \
|
||||
uvm_assert_spinlock_unlocked(spinlock); \
|
||||
})
|
||||
|
||||
// Use a temp to not rely on flags being written after acquiring the lock.
|
||||
#define uvm_spin_lock_irqsave(uvm_lock) ({ \
|
||||
@ -1119,16 +1126,12 @@ static void uvm_rwlock_irqsave_dec(uvm_rwlock_irqsave_t *rwlock)
|
||||
#define uvm_assert_rwlock_unlocked(uvm_rwlock)
|
||||
#endif
|
||||
|
||||
static void uvm_rwlock_irqsave_init(uvm_rwlock_irqsave_t *rwlock, uvm_lock_order_t lock_order)
|
||||
{
|
||||
rwlock_init(&rwlock->lock);
|
||||
#if UVM_IS_DEBUG()
|
||||
uvm_locking_assert_initialized();
|
||||
rwlock->lock_order = lock_order;
|
||||
atomic_set(&rwlock->lock_count, 0);
|
||||
#endif
|
||||
uvm_assert_rwlock_unlocked(rwlock);
|
||||
}
|
||||
#define uvm_rwlock_irqsave_init(rwlock, order) ({ \
|
||||
uvm_rwlock_irqsave_t *rwlock_ ## order = rwlock; \
|
||||
rwlock_init(&rwlock_ ## order->lock); \
|
||||
uvm_lock_debug_init(rwlock, order); \
|
||||
uvm_assert_rwlock_unlocked(rwlock); \
|
||||
})
|
||||
|
||||
// We can't store the irq_flags within the lock itself for readers, so they must
|
||||
// pass in their flags.
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2016-2022 NVIDIA Corporation
|
||||
Copyright (c) 2016-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -633,10 +633,17 @@ static NV_STATUS set_ext_gpu_map_location(uvm_ext_gpu_map_t *ext_gpu_map,
|
||||
uvm_gpu_t *mapping_gpu,
|
||||
const UvmGpuMemoryInfo *mem_info)
|
||||
{
|
||||
uvm_gpu_t *owning_gpu;
|
||||
uvm_gpu_t *owning_gpu = NULL;
|
||||
uvm_gpu_t *gpu;
|
||||
|
||||
if (mem_info->egm)
|
||||
UVM_ASSERT(mem_info->sysmem);
|
||||
|
||||
// !mem_info->deviceDescendant && !mem_info->sysmem imply fabric allocation.
|
||||
// !mem_info->deviceDescendant also means that mem_info->uuid is invalid. In
|
||||
// this case the owning GPU is NULL, meaning that UVM is oblivious to the
|
||||
// topology and relies on RM and/or the fabric manager (FM) for memory
|
||||
// lifetime management and GPU ref counting.
|
||||
if (!mem_info->deviceDescendant && !mem_info->sysmem) {
|
||||
ext_gpu_map->owning_gpu = NULL;
|
||||
ext_gpu_map->is_sysmem = false;
|
||||
@ -645,7 +652,17 @@ static NV_STATUS set_ext_gpu_map_location(uvm_ext_gpu_map_t *ext_gpu_map,
|
||||
// This is a local or peer allocation, so the owning GPU must have been
|
||||
// registered.
|
||||
// This also checks for if EGM owning GPU is registered.
|
||||
owning_gpu = uvm_va_space_get_gpu_by_uuid(va_space, &mem_info->uuid);
|
||||
|
||||
// TODO: Bug 4351121: RM will return the GI UUID, but
|
||||
// uvm_va_space_get_gpu_by_uuid() currently matches on physical GPU UUIDs.
|
||||
// Match on GI UUID until the UVM user level API has been updated to use
|
||||
// the GI UUID.
|
||||
for_each_va_space_gpu(gpu, va_space) {
|
||||
if (uvm_uuid_eq(&gpu->uuid, &mem_info->uuid)) {
|
||||
owning_gpu = gpu;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!owning_gpu)
|
||||
return NV_ERR_INVALID_DEVICE;
|
||||
|
||||
@ -1343,7 +1360,9 @@ static NV_STATUS uvm_free(uvm_va_space_t *va_space, NvU64 base, NvU64 length)
|
||||
{
|
||||
uvm_va_range_t *va_range;
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_global_processor_mask_t retained_mask;
|
||||
// TODO: Bug 4351121: retained_mask should be pre-allocated, not on the
|
||||
// stack.
|
||||
uvm_processor_mask_t retained_mask;
|
||||
LIST_HEAD(deferred_free_list);
|
||||
|
||||
if (uvm_api_range_invalid_4k(base, length))
|
||||
@ -1379,14 +1398,14 @@ static NV_STATUS uvm_free(uvm_va_space_t *va_space, NvU64 base, NvU64 length)
|
||||
// External ranges may have deferred free work, so the GPUs may have to
|
||||
// be retained. Construct the mask of all the GPUs that need to be
|
||||
// retained.
|
||||
uvm_va_space_global_gpus_in_mask(va_space, &retained_mask, &va_range->external.mapped_gpus);
|
||||
uvm_processor_mask_and(&retained_mask, &va_range->external.mapped_gpus, &va_space->registered_gpus);
|
||||
}
|
||||
|
||||
uvm_va_range_destroy(va_range, &deferred_free_list);
|
||||
|
||||
// If there is deferred work, retain the required GPUs.
|
||||
if (!list_empty(&deferred_free_list))
|
||||
uvm_global_mask_retain(&retained_mask);
|
||||
uvm_global_gpu_retain(&retained_mask);
|
||||
|
||||
out:
|
||||
uvm_va_space_up_write(va_space);
|
||||
@ -1394,7 +1413,7 @@ out:
|
||||
if (!list_empty(&deferred_free_list)) {
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
uvm_deferred_free_object_list(&deferred_free_list);
|
||||
uvm_global_mask_release(&retained_mask);
|
||||
uvm_global_gpu_release(&retained_mask);
|
||||
}
|
||||
|
||||
return status;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2016-2021 NVIDIA Corporation
|
||||
Copyright (c) 2016-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -60,6 +60,8 @@ void uvm_hal_maxwell_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
parent_gpu->access_counters_supported = false;
|
||||
|
||||
parent_gpu->access_counters_can_use_physical_addresses = false;
|
||||
|
||||
parent_gpu->fault_cancel_va_supported = false;
|
||||
|
||||
parent_gpu->scoped_atomics_supported = false;
|
||||
@ -71,4 +73,6 @@ void uvm_hal_maxwell_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
parent_gpu->smc.supported = false;
|
||||
|
||||
parent_gpu->plc_supported = false;
|
||||
|
||||
parent_gpu->no_ats_range_required = false;
|
||||
}
|
||||
|
@ -26,39 +26,53 @@
|
||||
|
||||
void uvm_hal_maxwell_enable_access_counter_notifications_unsupported(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
UVM_ASSERT_MSG(false, "enable_access_counter_notifications is not supported on GPU: %s.\n", parent_gpu->name);
|
||||
UVM_ASSERT_MSG(false,
|
||||
"enable_access_counter_notifications is not supported on GPU: %s.\n",
|
||||
uvm_parent_gpu_name(parent_gpu));
|
||||
}
|
||||
|
||||
void uvm_hal_maxwell_disable_access_counter_notifications_unsupported(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
UVM_ASSERT_MSG(false, "disable_access_counter_notifications is not supported on GPU: %s.\n", parent_gpu->name);
|
||||
UVM_ASSERT_MSG(false,
|
||||
"disable_access_counter_notifications is not supported on GPU: %s.\n",
|
||||
uvm_parent_gpu_name(parent_gpu));
|
||||
}
|
||||
|
||||
void uvm_hal_maxwell_clear_access_counter_notifications_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 get)
|
||||
{
|
||||
UVM_ASSERT_MSG(false, "clear_access_counter_notifications is not supported on GPU: %s.\n", parent_gpu->name);
|
||||
UVM_ASSERT_MSG(false,
|
||||
"clear_access_counter_notifications is not supported on GPU: %s.\n",
|
||||
uvm_parent_gpu_name(parent_gpu));
|
||||
}
|
||||
|
||||
NvU32 uvm_hal_maxwell_access_counter_buffer_entry_size_unsupported(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
UVM_ASSERT_MSG(false, "access_counter_buffer_entry_size is not supported on GPU: %s.\n", parent_gpu->name);
|
||||
UVM_ASSERT_MSG(false,
|
||||
"access_counter_buffer_entry_size is not supported on GPU: %s.\n",
|
||||
uvm_parent_gpu_name(parent_gpu));
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool uvm_hal_maxwell_access_counter_buffer_entry_is_valid_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index)
|
||||
{
|
||||
UVM_ASSERT_MSG(false, "access_counter_buffer_entry_is_valid is not supported on GPU: %s.\n", parent_gpu->name);
|
||||
UVM_ASSERT_MSG(false,
|
||||
"access_counter_buffer_entry_is_valid is not supported on GPU: %s.\n",
|
||||
uvm_parent_gpu_name(parent_gpu));
|
||||
return false;
|
||||
}
|
||||
|
||||
void uvm_hal_maxwell_access_counter_buffer_entry_clear_valid_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index)
|
||||
{
|
||||
UVM_ASSERT_MSG(false, "access_counter_buffer_entry_clear_valid is not supported on GPU: %s.\n", parent_gpu->name);
|
||||
UVM_ASSERT_MSG(false,
|
||||
"access_counter_buffer_entry_clear_valid is not supported on GPU: %s.\n",
|
||||
uvm_parent_gpu_name(parent_gpu));
|
||||
}
|
||||
|
||||
void uvm_hal_maxwell_access_counter_buffer_parse_entry_unsupported(uvm_parent_gpu_t *parent_gpu,
|
||||
NvU32 index,
|
||||
uvm_access_counter_buffer_entry_t *buffer_entry)
|
||||
{
|
||||
UVM_ASSERT_MSG(false, "access_counter_buffer_parse_entry is not supported on GPU: %s.\n", parent_gpu->name);
|
||||
UVM_ASSERT_MSG(false,
|
||||
"access_counter_buffer_parse_entry is not supported on GPU: %s.\n",
|
||||
uvm_parent_gpu_name(parent_gpu));
|
||||
}
|
||||
|
@ -186,7 +186,7 @@ NvU32 uvm_hal_maxwell_ce_plc_mode(void)
|
||||
}
|
||||
|
||||
// Noop, since COPY_TYPE doesn't exist in Maxwell.
|
||||
NvU32 uvm_hal_maxwell_ce_memcopy_copy_type(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src)
|
||||
NvU32 uvm_hal_maxwell_ce_memcopy_copy_type(uvm_gpu_address_t dst, uvm_gpu_address_t src)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
@ -212,7 +212,7 @@ void uvm_hal_maxwell_ce_memcopy(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu
|
||||
|
||||
launch_dma_src_dst_type = gpu->parent->ce_hal->phys_mode(push, dst, src);
|
||||
launch_dma_plc_mode = gpu->parent->ce_hal->plc_mode();
|
||||
copy_type_value = gpu->parent->ce_hal->memcopy_copy_type(push, dst, src);
|
||||
copy_type_value = gpu->parent->ce_hal->memcopy_copy_type(dst, src);
|
||||
|
||||
if (uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_CE_NEXT_PIPELINED))
|
||||
pipelined_value = HWCONST(B0B5, LAUNCH_DMA, DATA_TRANSFER_TYPE, PIPELINED);
|
||||
|
@ -26,34 +26,46 @@
|
||||
|
||||
void uvm_hal_maxwell_enable_replayable_faults_unsupported(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
UVM_ASSERT_MSG(false, "enable_replayable_faults is not supported on GPU: %s.\n", parent_gpu->name);
|
||||
UVM_ASSERT_MSG(false,
|
||||
"enable_replayable_faults is not supported on GPU: %s.\n",
|
||||
uvm_parent_gpu_name(parent_gpu));
|
||||
}
|
||||
|
||||
void uvm_hal_maxwell_disable_replayable_faults_unsupported(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
UVM_ASSERT_MSG(false, "disable_replayable_faults is not supported on GPU: %s.\n", parent_gpu->name);
|
||||
UVM_ASSERT_MSG(false,
|
||||
"disable_replayable_faults is not supported on GPU: %s.\n",
|
||||
uvm_parent_gpu_name(parent_gpu));
|
||||
}
|
||||
|
||||
void uvm_hal_maxwell_clear_replayable_faults_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 get)
|
||||
{
|
||||
UVM_ASSERT_MSG(false, "clear_replayable_faults is not supported on GPU: %s.\n", parent_gpu->name);
|
||||
UVM_ASSERT_MSG(false,
|
||||
"clear_replayable_faults is not supported on GPU: %s.\n",
|
||||
uvm_parent_gpu_name(parent_gpu));
|
||||
}
|
||||
|
||||
NvU32 uvm_hal_maxwell_fault_buffer_read_put_unsupported(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
UVM_ASSERT_MSG(false, "fault_buffer_read_put is not supported on GPU: %s.\n", parent_gpu->name);
|
||||
UVM_ASSERT_MSG(false,
|
||||
"fault_buffer_read_put is not supported on GPU: %s.\n",
|
||||
uvm_parent_gpu_name(parent_gpu));
|
||||
return 0;
|
||||
}
|
||||
|
||||
NvU32 uvm_hal_maxwell_fault_buffer_read_get_unsupported(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
UVM_ASSERT_MSG(false, "fault_buffer_read_get is not supported on GPU: %s.\n", parent_gpu->name);
|
||||
UVM_ASSERT_MSG(false,
|
||||
"fault_buffer_read_get is not supported on GPU: %s.\n",
|
||||
uvm_parent_gpu_name(parent_gpu));
|
||||
return 0;
|
||||
}
|
||||
|
||||
void uvm_hal_maxwell_fault_buffer_write_get_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index)
|
||||
{
|
||||
UVM_ASSERT_MSG(false, "fault_buffer_write_get is not supported on GPU: %s.\n", parent_gpu->name);
|
||||
UVM_ASSERT_MSG(false,
|
||||
"fault_buffer_write_get is not supported on GPU: %s.\n",
|
||||
uvm_parent_gpu_name(parent_gpu));
|
||||
}
|
||||
|
||||
NvU8 uvm_hal_maxwell_fault_buffer_get_ve_id_unsupported(NvU16 mmu_engine_id, uvm_mmu_engine_type_t mmu_engine_type)
|
||||
@ -72,24 +84,32 @@ NV_STATUS uvm_hal_maxwell_fault_buffer_parse_replayable_entry_unsupported(uvm_pa
|
||||
NvU32 index,
|
||||
uvm_fault_buffer_entry_t *buffer_entry)
|
||||
{
|
||||
UVM_ASSERT_MSG(false, "fault_buffer_parse_entry is not supported on GPU: %s.\n", parent_gpu->name);
|
||||
UVM_ASSERT_MSG(false,
|
||||
"fault_buffer_parse_entry is not supported on GPU: %s.\n",
|
||||
uvm_parent_gpu_name(parent_gpu));
|
||||
return NV_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
bool uvm_hal_maxwell_fault_buffer_entry_is_valid_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index)
|
||||
{
|
||||
UVM_ASSERT_MSG(false, "fault_buffer_entry_is_valid is not supported on GPU: %s.\n", parent_gpu->name);
|
||||
UVM_ASSERT_MSG(false,
|
||||
"fault_buffer_entry_is_valid is not supported on GPU: %s.\n",
|
||||
uvm_parent_gpu_name(parent_gpu));
|
||||
return false;
|
||||
}
|
||||
|
||||
void uvm_hal_maxwell_fault_buffer_entry_clear_valid_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index)
|
||||
{
|
||||
UVM_ASSERT_MSG(false, "fault_buffer_entry_clear_valid is not supported on GPU: %s.\n", parent_gpu->name);
|
||||
UVM_ASSERT_MSG(false,
|
||||
"fault_buffer_entry_clear_valid is not supported on GPU: %s.\n",
|
||||
uvm_parent_gpu_name(parent_gpu));
|
||||
}
|
||||
|
||||
NvU32 uvm_hal_maxwell_fault_buffer_entry_size_unsupported(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
UVM_ASSERT_MSG(false, "fault_buffer_entry_size is not supported on GPU: %s.\n", parent_gpu->name);
|
||||
UVM_ASSERT_MSG(false,
|
||||
"fault_buffer_entry_size is not supported on GPU: %s.\n",
|
||||
uvm_parent_gpu_name(parent_gpu));
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -97,6 +117,8 @@ void uvm_hal_maxwell_fault_buffer_parse_non_replayable_entry_unsupported(uvm_par
|
||||
void *fault_packet,
|
||||
uvm_fault_buffer_entry_t *buffer_entry)
|
||||
{
|
||||
UVM_ASSERT_MSG(false, "fault_buffer_parse_non_replayable_entry is not supported on GPU: %s.\n", parent_gpu->name);
|
||||
UVM_ASSERT_MSG(false,
|
||||
"fault_buffer_parse_non_replayable_entry is not supported on GPU: %s.\n",
|
||||
uvm_parent_gpu_name(parent_gpu));
|
||||
}
|
||||
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user