mirror of
https://github.com/NVIDIA/open-gpu-kernel-modules.git
synced 2025-02-07 14:54:14 +01:00
570.86.15
This commit is contained in:
parent
9d0b0414a5
commit
54d69484da
11
README.md
11
README.md
@ -1,7 +1,7 @@
|
||||
# NVIDIA Linux Open GPU Kernel Module Source
|
||||
|
||||
This is the source release of the NVIDIA Linux open GPU kernel modules,
|
||||
version 565.77.
|
||||
version 570.86.15.
|
||||
|
||||
|
||||
## How to Build
|
||||
@ -17,7 +17,7 @@ as root:
|
||||
|
||||
Note that the kernel modules built here must be used with GSP
|
||||
firmware and user-space NVIDIA GPU driver components from a corresponding
|
||||
565.77 driver release. This can be achieved by installing
|
||||
570.86.15 driver release. This can be achieved by installing
|
||||
the NVIDIA GPU driver from the .run file using the `--no-kernel-modules`
|
||||
option. E.g.,
|
||||
|
||||
@ -185,7 +185,7 @@ table below).
|
||||
For details on feature support and limitations, see the NVIDIA GPU driver
|
||||
end user README here:
|
||||
|
||||
https://us.download.nvidia.com/XFree86/Linux-x86_64/565.77/README/kernel_open.html
|
||||
https://us.download.nvidia.com/XFree86/Linux-x86_64/570.86.15/README/kernel_open.html
|
||||
|
||||
For vGPU support, please refer to the README.vgpu packaged in the vGPU Host
|
||||
Package for more details.
|
||||
@ -755,7 +755,6 @@ Subsystem Device ID.
|
||||
| NVIDIA H20 | 2329 10DE 198B |
|
||||
| NVIDIA H20 | 2329 10DE 198C |
|
||||
| NVIDIA H20-3e | 232C 10DE 2063 |
|
||||
| NVIDIA H20-3e | 232C 10DE 2064 |
|
||||
| NVIDIA H100 80GB HBM3 | 2330 10DE 16C0 |
|
||||
| NVIDIA H100 80GB HBM3 | 2330 10DE 16C1 |
|
||||
| NVIDIA H100 PCIe | 2331 10DE 1626 |
|
||||
@ -925,6 +924,7 @@ Subsystem Device ID.
|
||||
| NVIDIA GeForce RTX 4060 | 2882 |
|
||||
| NVIDIA GeForce RTX 4060 Laptop GPU | 28A0 |
|
||||
| NVIDIA GeForce RTX 4050 Laptop GPU | 28A1 |
|
||||
| NVIDIA GeForce RTX 3050 A Laptop GPU | 28A3 |
|
||||
| NVIDIA RTX 2000 Ada Generation | 28B0 1028 1870 |
|
||||
| NVIDIA RTX 2000 Ada Generation | 28B0 103C 1870 |
|
||||
| NVIDIA RTX 2000E Ada Generation | 28B0 103C 1871 |
|
||||
@ -939,3 +939,6 @@ Subsystem Device ID.
|
||||
| NVIDIA GeForce RTX 4060 Laptop GPU | 28E0 |
|
||||
| NVIDIA GeForce RTX 4050 Laptop GPU | 28E1 |
|
||||
| NVIDIA RTX 2000 Ada Generation Embedded GPU | 28F8 |
|
||||
| NVIDIA B200 | 2901 10DE 1999 |
|
||||
| NVIDIA B200 | 2901 10DE 199B |
|
||||
| NVIDIA B200 | 2901 10DE 20DA |
|
||||
|
@ -57,6 +57,20 @@ ifeq ($(NV_UNDEF_BEHAVIOR_SANITIZER),1)
|
||||
UBSAN_SANITIZE := y
|
||||
endif
|
||||
|
||||
#
|
||||
# Command to create a symbolic link, explicitly resolving the symlink target
|
||||
# to an absolute path to abstract away the difference between Linux < 6.13,
|
||||
# where the CWD is the Linux kernel source tree for Kbuild extmod builds, and
|
||||
# Linux >= 6.13, where the CWD is the external module source tree.
|
||||
#
|
||||
# This is used to create the nv*-kernel.o -> nv*-kernel.o_binary symlinks for
|
||||
# kernel modules which use precompiled binary object files.
|
||||
#
|
||||
|
||||
quiet_cmd_symlink = SYMLINK $@
|
||||
cmd_symlink = ln -sf $(abspath $<) $@
|
||||
|
||||
|
||||
$(foreach _module, $(NV_KERNEL_MODULES), \
|
||||
$(eval include $(src)/$(_module)/$(_module).Kbuild))
|
||||
|
||||
@ -72,7 +86,7 @@ EXTRA_CFLAGS += -I$(src)/common/inc
|
||||
EXTRA_CFLAGS += -I$(src)
|
||||
EXTRA_CFLAGS += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-format-extra-args
|
||||
EXTRA_CFLAGS += -D__KERNEL__ -DMODULE -DNVRM
|
||||
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"565.77\"
|
||||
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"570.86.15\"
|
||||
|
||||
ifneq ($(SYSSRCHOST1X),)
|
||||
EXTRA_CFLAGS += -I$(SYSSRCHOST1X)
|
||||
|
@ -32,7 +32,10 @@
|
||||
typedef enum
|
||||
{
|
||||
NV_FIRMWARE_TYPE_GSP,
|
||||
NV_FIRMWARE_TYPE_GSP_LOG
|
||||
NV_FIRMWARE_TYPE_GSP_LOG,
|
||||
#if defined(NV_VMWARE)
|
||||
NV_FIRMWARE_TYPE_BINDATA
|
||||
#endif
|
||||
} nv_firmware_type_t;
|
||||
|
||||
typedef enum
|
||||
@ -45,6 +48,7 @@ typedef enum
|
||||
NV_FIRMWARE_CHIP_FAMILY_AD10X = 5,
|
||||
NV_FIRMWARE_CHIP_FAMILY_GH100 = 6,
|
||||
NV_FIRMWARE_CHIP_FAMILY_GB10X = 8,
|
||||
NV_FIRMWARE_CHIP_FAMILY_GB20X = 9,
|
||||
NV_FIRMWARE_CHIP_FAMILY_END,
|
||||
} nv_firmware_chip_family_t;
|
||||
|
||||
@ -54,6 +58,7 @@ static inline const char *nv_firmware_chip_family_to_string(
|
||||
{
|
||||
switch (fw_chip_family) {
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GB10X: return "gb10x";
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GB20X: return "gb20x";
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GH100: return "gh100";
|
||||
case NV_FIRMWARE_CHIP_FAMILY_AD10X: return "ad10x";
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GA10X: return "ga10x";
|
||||
@ -84,6 +89,7 @@ static inline const char *nv_firmware_for_chip_family(
|
||||
switch (fw_chip_family)
|
||||
{
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GB10X: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GB20X: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GH100: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_AD10X: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GA10X:
|
||||
@ -104,6 +110,7 @@ static inline const char *nv_firmware_for_chip_family(
|
||||
switch (fw_chip_family)
|
||||
{
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GB10X: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GB20X: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GH100: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_AD10X: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GA10X:
|
||||
@ -119,7 +126,12 @@ static inline const char *nv_firmware_for_chip_family(
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(NV_VMWARE)
|
||||
else if (fw_type == NV_FIRMWARE_TYPE_BINDATA)
|
||||
{
|
||||
return NV_FIRMWARE_FOR_NAME("bindata_image");
|
||||
}
|
||||
#endif
|
||||
return "";
|
||||
}
|
||||
#endif // defined(NV_FIRMWARE_FOR_NAME)
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2020-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2020-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@ -128,6 +128,9 @@ typedef struct nv_ioctl_register_fd
|
||||
|
||||
#define NV_DMABUF_EXPORT_MAX_HANDLES 128
|
||||
|
||||
#define NV_DMABUF_EXPORT_MAPPING_TYPE_DEFAULT 0
|
||||
#define NV_DMABUF_EXPORT_MAPPING_TYPE_FORCE_PCIE 1
|
||||
|
||||
typedef struct nv_ioctl_export_to_dma_buf_fd
|
||||
{
|
||||
int fd;
|
||||
@ -136,6 +139,7 @@ typedef struct nv_ioctl_export_to_dma_buf_fd
|
||||
NvU32 numObjects;
|
||||
NvU32 index;
|
||||
NvU64 totalSize NV_ALIGN_BYTES(8);
|
||||
NvU8 mappingType;
|
||||
NvHandle handles[NV_DMABUF_EXPORT_MAX_HANDLES];
|
||||
NvU64 offsets[NV_DMABUF_EXPORT_MAX_HANDLES] NV_ALIGN_BYTES(8);
|
||||
NvU64 sizes[NV_DMABUF_EXPORT_MAX_HANDLES] NV_ALIGN_BYTES(8);
|
||||
|
@ -231,12 +231,6 @@ NV_STATUS nvos_forward_error_to_cray(struct pci_dev *, NvU32,
|
||||
const char *, va_list);
|
||||
#endif
|
||||
|
||||
#if defined(NVCPU_PPC64LE) && defined(CONFIG_EEH)
|
||||
#include <asm/eeh.h>
|
||||
#define NV_PCI_ERROR_RECOVERY_ENABLED() eeh_enabled()
|
||||
#define NV_PCI_ERROR_RECOVERY
|
||||
#endif
|
||||
|
||||
#if defined(NV_ASM_SET_MEMORY_H_PRESENT)
|
||||
#include <asm/set_memory.h>
|
||||
#endif
|
||||
@ -609,7 +603,7 @@ static NvBool nv_numa_node_has_memory(int node_id)
|
||||
|
||||
#define NV_ALLOC_PAGES_NODE(ptr, nid, order, gfp_mask) \
|
||||
{ \
|
||||
(ptr) = (unsigned long)page_address(alloc_pages_node(nid, gfp_mask, order)); \
|
||||
(ptr) = (unsigned long) alloc_pages_node(nid, gfp_mask, order); \
|
||||
}
|
||||
|
||||
#define NV_GET_FREE_PAGES(ptr, order, gfp_mask) \
|
||||
@ -881,16 +875,6 @@ typedef void irqreturn_t;
|
||||
#define PCI_CAP_ID_EXP 0x10
|
||||
#endif
|
||||
|
||||
/*
|
||||
* On Linux on PPC64LE enable basic support for Linux PCI error recovery (see
|
||||
* Documentation/PCI/pci-error-recovery.txt). Currently RM only supports error
|
||||
* notification and data collection, not actual recovery of the device.
|
||||
*/
|
||||
#if defined(NVCPU_PPC64LE) && defined(CONFIG_EEH)
|
||||
#include <asm/eeh.h>
|
||||
#define NV_PCI_ERROR_RECOVERY
|
||||
#endif
|
||||
|
||||
/*
|
||||
* If the host OS has page sizes larger than 4KB, we may have a security
|
||||
* problem. Registers are typically grouped in 4KB pages, but if there are
|
||||
@ -1419,8 +1403,6 @@ typedef struct nv_dma_map_s {
|
||||
0 ? NV_OK : NV_ERR_OPERATING_SYSTEM)
|
||||
#endif
|
||||
|
||||
typedef struct nv_ibmnpu_info nv_ibmnpu_info_t;
|
||||
|
||||
typedef struct nv_work_s {
|
||||
struct work_struct task;
|
||||
void *data;
|
||||
@ -1468,7 +1450,6 @@ struct nv_dma_device {
|
||||
} addressable_range;
|
||||
|
||||
struct device *dev;
|
||||
NvBool nvlink;
|
||||
};
|
||||
|
||||
/* Properties of the coherent link */
|
||||
@ -1517,9 +1498,6 @@ typedef struct nv_linux_state_s {
|
||||
struct device *dev;
|
||||
struct pci_dev *pci_dev;
|
||||
|
||||
/* IBM-NPU info associated with this GPU */
|
||||
nv_ibmnpu_info_t *npu;
|
||||
|
||||
/* coherent link information */
|
||||
coherent_link_info_t coherent_link_info;
|
||||
|
||||
@ -1835,7 +1813,7 @@ static inline int nv_is_control_device(struct inode *inode)
|
||||
return (minor((inode)->i_rdev) == NV_MINOR_DEVICE_NUMBER_CONTROL_DEVICE);
|
||||
}
|
||||
|
||||
#if defined(NV_DOM0_KERNEL_PRESENT) || defined(NV_VGPU_KVM_BUILD)
|
||||
#if defined(NV_DOM0_KERNEL_PRESENT) || defined(NV_VGPU_KVM_BUILD) || defined(NV_DEVICE_VM_BUILD)
|
||||
#define NV_VGX_HYPER
|
||||
#if defined(NV_XEN_IOEMU_INJECT_MSI)
|
||||
#include <xen/ioemu.h>
|
||||
@ -1872,59 +1850,6 @@ static inline NvBool nv_alloc_release(nv_linux_file_private_t *nvlfp, nv_alloc_t
|
||||
#define RB_EMPTY_ROOT(root) ((root)->rb_node == NULL)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Starting on Power9 systems, DMA addresses for NVLink are no longer
|
||||
* the same as used over PCIe.
|
||||
*
|
||||
* Power9 supports a 56-bit Real Address. This address range is compressed
|
||||
* when accessed over NVLink to allow the GPU to access all of memory using
|
||||
* its 47-bit Physical address.
|
||||
*
|
||||
* If there is an NPU device present on the system, it implies that NVLink
|
||||
* sysmem links are present and we need to apply the required address
|
||||
* conversion for NVLink within the driver.
|
||||
*
|
||||
* See Bug 1920398 for further background and details.
|
||||
*
|
||||
* Note, a deviation from the documented compression scheme is that the
|
||||
* upper address bits (i.e. bit 56-63) instead of being set to zero are
|
||||
* preserved during NVLink address compression so the orignal PCIe DMA
|
||||
* address can be reconstructed on expansion. These bits can be safely
|
||||
* ignored on NVLink since they are truncated by the GPU.
|
||||
*
|
||||
* Bug 1968345: As a performance enhancement it is the responsibility of
|
||||
* the caller on PowerPC platforms to check for presence of an NPU device
|
||||
* before the address transformation is applied.
|
||||
*/
|
||||
static inline NvU64 nv_compress_nvlink_addr(NvU64 addr)
|
||||
{
|
||||
NvU64 addr47 = addr;
|
||||
|
||||
#if defined(NVCPU_PPC64LE)
|
||||
addr47 = addr & ((1ULL << 43) - 1);
|
||||
addr47 |= (addr & (0x3ULL << 45)) >> 2;
|
||||
WARN_ON(addr47 & (1ULL << 44));
|
||||
addr47 |= (addr & (0x3ULL << 49)) >> 4;
|
||||
addr47 |= addr & ~((1ULL << 56) - 1);
|
||||
#endif
|
||||
|
||||
return addr47;
|
||||
}
|
||||
|
||||
static inline NvU64 nv_expand_nvlink_addr(NvU64 addr47)
|
||||
{
|
||||
NvU64 addr = addr47;
|
||||
|
||||
#if defined(NVCPU_PPC64LE)
|
||||
addr = addr47 & ((1ULL << 43) - 1);
|
||||
addr |= (addr47 & (3ULL << 43)) << 2;
|
||||
addr |= (addr47 & (3ULL << 45)) << 4;
|
||||
addr |= addr47 & ~((1ULL << 56) - 1);
|
||||
#endif
|
||||
|
||||
return addr;
|
||||
}
|
||||
|
||||
// Default flags for ISRs
|
||||
static inline NvU32 nv_default_irq_flags(nv_state_t *nv)
|
||||
{
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2019 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2019-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@ -36,5 +36,6 @@ int nv_pci_count_devices(void);
|
||||
NvU8 nv_find_pci_capability(struct pci_dev *, NvU8);
|
||||
int nvidia_dev_get_pci_info(const NvU8 *, struct pci_dev **, NvU64 *, NvU64 *);
|
||||
nv_linux_state_t * find_pci(NvU32, NvU8, NvU8, NvU8);
|
||||
NvBool nv_pci_is_valid_topology_for_direct_pci(nv_state_t *, struct device *);
|
||||
|
||||
#endif
|
||||
|
@ -368,6 +368,8 @@ typedef struct nv_state_t
|
||||
{
|
||||
NvBool valid;
|
||||
NvU8 uuid[GPU_UUID_LEN];
|
||||
NvBool pci_uuid_read_attempted;
|
||||
NV_STATUS pci_uuid_status;
|
||||
} nv_uuid_cache;
|
||||
void *handle;
|
||||
|
||||
@ -479,6 +481,8 @@ typedef struct nv_state_t
|
||||
/* Bool to check if the GPU has a coherent sysmem link */
|
||||
NvBool coherent;
|
||||
|
||||
/* OS detected GPU has ATS capability */
|
||||
NvBool ats_support;
|
||||
/*
|
||||
* NUMA node ID of the CPU to which the GPU is attached.
|
||||
* Holds NUMA_NO_NODE on platforms that don't support NUMA configuration.
|
||||
@ -570,7 +574,8 @@ typedef NV_STATUS (*nvPmaEvictRangeCallback)(void *, NvU64, NvU64, nvgpuGpuMemor
|
||||
#define NV_FLAG_PASSTHRU 0x0080
|
||||
#define NV_FLAG_SUSPENDED 0x0100
|
||||
#define NV_FLAG_SOC_IGPU 0x0200
|
||||
// Unused 0x0400
|
||||
/* To be set when an FLR needs to be triggered after device shut down. */
|
||||
#define NV_FLAG_TRIGGER_FLR 0x0400
|
||||
#define NV_FLAG_PERSISTENT_SW_STATE 0x0800
|
||||
#define NV_FLAG_IN_RECOVERY 0x1000
|
||||
// Unused 0x2000
|
||||
@ -613,6 +618,7 @@ typedef struct
|
||||
const char *gc6_support;
|
||||
const char *gcoff_support;
|
||||
const char *s0ix_status;
|
||||
const char *db_support;
|
||||
} nv_power_info_t;
|
||||
|
||||
#define NV_PRIMARY_VGA(nv) ((nv)->primary_vga)
|
||||
@ -758,6 +764,7 @@ static inline NvBool IS_IMEM_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length)
|
||||
#define NV_ALIGN_DOWN(v,g) ((v) & ~((g) - 1))
|
||||
#endif
|
||||
|
||||
|
||||
/*
|
||||
* driver internal interfaces
|
||||
*/
|
||||
@ -813,7 +820,6 @@ NV_STATUS NV_API_CALL nv_dma_map_mmio (nv_dma_device_t *, NvU64, NvU6
|
||||
void NV_API_CALL nv_dma_unmap_mmio (nv_dma_device_t *, NvU64, NvU64);
|
||||
|
||||
void NV_API_CALL nv_dma_cache_invalidate (nv_dma_device_t *, void *);
|
||||
void NV_API_CALL nv_dma_enable_nvlink (nv_dma_device_t *);
|
||||
|
||||
NvS32 NV_API_CALL nv_start_rc_timer (nv_state_t *);
|
||||
NvS32 NV_API_CALL nv_stop_rc_timer (nv_state_t *);
|
||||
@ -840,9 +846,7 @@ NV_STATUS NV_API_CALL nv_acpi_mux_method (nv_state_t *, NvU32 *, NvU32,
|
||||
|
||||
NV_STATUS NV_API_CALL nv_log_error (nv_state_t *, NvU32, const char *, va_list);
|
||||
|
||||
NvU64 NV_API_CALL nv_get_dma_start_address (nv_state_t *);
|
||||
NV_STATUS NV_API_CALL nv_set_primary_vga_status(nv_state_t *);
|
||||
NV_STATUS NV_API_CALL nv_pci_trigger_recovery (nv_state_t *);
|
||||
NvBool NV_API_CALL nv_requires_dma_remap (nv_state_t *);
|
||||
|
||||
NvBool NV_API_CALL nv_is_rm_firmware_active(nv_state_t *);
|
||||
@ -855,19 +859,8 @@ void NV_API_CALL nv_put_file_private(void *);
|
||||
NV_STATUS NV_API_CALL nv_get_device_memory_config(nv_state_t *, NvU64 *, NvU64 *, NvU64 *, NvU32 *, NvS32 *);
|
||||
NV_STATUS NV_API_CALL nv_get_egm_info(nv_state_t *, NvU64 *, NvU64 *, NvS32 *);
|
||||
|
||||
NV_STATUS NV_API_CALL nv_get_ibmnpu_genreg_info(nv_state_t *, NvU64 *, NvU64 *, void**);
|
||||
NV_STATUS NV_API_CALL nv_get_ibmnpu_relaxed_ordering_mode(nv_state_t *nv, NvBool *mode);
|
||||
|
||||
void NV_API_CALL nv_wait_for_ibmnpu_rsync(nv_state_t *nv);
|
||||
|
||||
void NV_API_CALL nv_ibmnpu_cache_flush_range(nv_state_t *nv, NvU64, NvU64);
|
||||
|
||||
void NV_API_CALL nv_p2p_free_platform_data(void *data);
|
||||
|
||||
#if defined(NVCPU_PPC64LE)
|
||||
NV_STATUS NV_API_CALL nv_get_nvlink_line_rate (nv_state_t *, NvU32 *);
|
||||
#endif
|
||||
|
||||
NV_STATUS NV_API_CALL nv_revoke_gpu_mappings (nv_state_t *);
|
||||
void NV_API_CALL nv_acquire_mmap_lock (nv_state_t *);
|
||||
void NV_API_CALL nv_release_mmap_lock (nv_state_t *);
|
||||
@ -998,18 +991,24 @@ NV_STATUS NV_API_CALL rm_p2p_init_mapping (nvidia_stack_t *, NvU64, NvU6
|
||||
NV_STATUS NV_API_CALL rm_p2p_destroy_mapping (nvidia_stack_t *, NvU64);
|
||||
NV_STATUS NV_API_CALL rm_p2p_get_pages (nvidia_stack_t *, NvU64, NvU32, NvU64, NvU64, NvU64 *, NvU32 *, NvU32 *, NvU32 *, NvU8 **, void *);
|
||||
NV_STATUS NV_API_CALL rm_p2p_get_gpu_info (nvidia_stack_t *, NvU64, NvU64, NvU8 **, void **);
|
||||
NV_STATUS NV_API_CALL rm_p2p_get_pages_persistent (nvidia_stack_t *, NvU64, NvU64, void **, NvU64 *, NvU32 *, void *, void *, void **);
|
||||
NV_STATUS NV_API_CALL rm_p2p_get_pages_persistent (nvidia_stack_t *, NvU64, NvU64, void **, NvU64 *, NvU32 *, NvBool, void *, void *, void **);
|
||||
NV_STATUS NV_API_CALL rm_p2p_register_callback (nvidia_stack_t *, NvU64, NvU64, NvU64, void *, void (*)(void *), void *);
|
||||
NV_STATUS NV_API_CALL rm_p2p_put_pages (nvidia_stack_t *, NvU64, NvU32, NvU64, void *);
|
||||
NV_STATUS NV_API_CALL rm_p2p_put_pages_persistent(nvidia_stack_t *, void *, void *, void *);
|
||||
NV_STATUS NV_API_CALL rm_p2p_dma_map_pages (nvidia_stack_t *, nv_dma_device_t *, NvU8 *, NvU64, NvU32, NvU64 *, void **);
|
||||
NV_STATUS NV_API_CALL rm_dma_buf_dup_mem_handle (nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle, NvHandle, NvHandle, void *, NvHandle, NvU64, NvU64, NvHandle *, void **);
|
||||
void NV_API_CALL rm_dma_buf_undup_mem_handle(nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle);
|
||||
NV_STATUS NV_API_CALL rm_dma_buf_map_mem_handle (nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle, MemoryRange, void *, NvBool, MemoryArea *);
|
||||
void NV_API_CALL rm_dma_buf_unmap_mem_handle(nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle, void *, NvBool, MemoryArea);
|
||||
NV_STATUS NV_API_CALL rm_dma_buf_get_client_and_device(nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle, NvHandle *, NvHandle *, NvHandle *, void **, NvBool *);
|
||||
NV_STATUS NV_API_CALL rm_dma_buf_map_mem_handle (nvidia_stack_t *, nv_state_t *,
|
||||
NvHandle, NvHandle, MemoryRange,
|
||||
NvU8, void *, NvBool, MemoryArea *);
|
||||
void NV_API_CALL rm_dma_buf_unmap_mem_handle(nvidia_stack_t *, nv_state_t *,
|
||||
NvHandle, NvHandle, NvU8, void *,
|
||||
NvBool, MemoryArea);
|
||||
NV_STATUS NV_API_CALL rm_dma_buf_get_client_and_device(nvidia_stack_t *,
|
||||
nv_state_t *, NvHandle, NvHandle,
|
||||
NvU8, NvHandle *, NvHandle *,
|
||||
NvHandle *, void **, NvBool *);
|
||||
void NV_API_CALL rm_dma_buf_put_client_and_device(nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle, NvHandle, void *);
|
||||
NV_STATUS NV_API_CALL rm_log_gpu_crash (nv_stack_t *, nv_state_t *);
|
||||
|
||||
void NV_API_CALL rm_kernel_rmapi_op(nvidia_stack_t *sp, void *ops_cmd);
|
||||
NvBool NV_API_CALL rm_get_device_remove_flag(nvidia_stack_t *sp, NvU32 gpu_id);
|
||||
@ -1026,7 +1025,6 @@ NvBool NV_API_CALL rm_is_device_sequestered(nvidia_stack_t *, nv_state_t *);
|
||||
void NV_API_CALL rm_check_for_gpu_surprise_removal(nvidia_stack_t *, nv_state_t *);
|
||||
NV_STATUS NV_API_CALL rm_set_external_kernel_client_count(nvidia_stack_t *, nv_state_t *, NvBool);
|
||||
NV_STATUS NV_API_CALL rm_schedule_gpu_wakeup(nvidia_stack_t *, nv_state_t *);
|
||||
NvBool NV_API_CALL rm_is_iommu_needed_for_sriov(nvidia_stack_t *, nv_state_t *);
|
||||
NvBool NV_API_CALL rm_disable_iomap_wc(void);
|
||||
|
||||
void NV_API_CALL rm_init_dynamic_power_management(nvidia_stack_t *, nv_state_t *, NvBool);
|
||||
@ -1043,12 +1041,14 @@ void NV_API_CALL rm_acpi_nvpcf_notify(nvidia_stack_t *);
|
||||
NvBool NV_API_CALL rm_is_altstack_in_use(void);
|
||||
|
||||
/* vGPU VFIO specific functions */
|
||||
NV_STATUS NV_API_CALL nv_vgpu_create_request(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU32, NvU16 *, NvU32);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_create_request(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU32, NvU16 *,
|
||||
NvU32 *, NvU32 *, NvU32);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_delete(nvidia_stack_t *, const NvU8 *, NvU16);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_get_type_ids(nvidia_stack_t *, nv_state_t *, NvU32 *, NvU32 *, NvBool, NvU8, NvBool);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_get_type_info(nvidia_stack_t *, nv_state_t *, NvU32, char *, int, NvU8);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_get_bar_info(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU64 *,
|
||||
NvU64 *, NvU64 *, NvU32 *, NvBool *, NvU8 *);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_update_sysfs_info(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU32, NvU32);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_get_hbm_info(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU64 *, NvU64 *);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_process_vf_info(nvidia_stack_t *, nv_state_t *, NvU8, NvU32, NvU8, NvU8, NvU8, NvBool, void *);
|
||||
NV_STATUS NV_API_CALL nv_gpu_bind_event(nvidia_stack_t *, NvU32, NvBool *);
|
||||
|
@ -592,6 +592,14 @@ void nvUvmInterfaceChannelDestroy(uvmGpuChannelHandle channel);
|
||||
Error codes:
|
||||
NV_ERR_GENERIC
|
||||
NV_ERR_NO_MEMORY
|
||||
NV_ERR_INVALID_STATE
|
||||
NV_ERR_NOT_SUPPORTED
|
||||
NV_ERR_NOT_READY
|
||||
NV_ERR_INVALID_LOCK_STATE
|
||||
NV_ERR_INVALID_STATE
|
||||
NV_ERR_NVLINK_FABRIC_NOT_READY
|
||||
NV_ERR_NVLINK_FABRIC_FAILURE
|
||||
NV_ERR_GPU_MEMORY_ONLINING_FAILURE
|
||||
*/
|
||||
NV_STATUS nvUvmInterfaceQueryCaps(uvmGpuDeviceHandle device,
|
||||
UvmGpuCaps *caps);
|
||||
|
@ -620,19 +620,12 @@ typedef struct UvmGpuClientInfo_tag
|
||||
NvHandle hSmcPartRef;
|
||||
} UvmGpuClientInfo;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
UVM_GPU_CONF_COMPUTE_MODE_NONE,
|
||||
UVM_GPU_CONF_COMPUTE_MODE_APM,
|
||||
UVM_GPU_CONF_COMPUTE_MODE_HCC,
|
||||
UVM_GPU_CONF_COMPUTE_MODE_COUNT
|
||||
} UvmGpuConfComputeMode;
|
||||
|
||||
typedef struct UvmGpuConfComputeCaps_tag
|
||||
{
|
||||
// Out: GPU's confidential compute mode
|
||||
UvmGpuConfComputeMode mode;
|
||||
// Is key rotation enabled for UVM keys
|
||||
// Out: true if Confidential Computing is enabled on the GPU
|
||||
NvBool bConfComputingEnabled;
|
||||
|
||||
// Out: true if key rotation is enabled (for UVM keys) on the GPU
|
||||
NvBool bKeyRotationEnabled;
|
||||
} UvmGpuConfComputeCaps;
|
||||
|
||||
@ -746,6 +739,8 @@ typedef struct UvmGpuInfo_tag
|
||||
// to NVSwitch peers.
|
||||
NvU64 nvswitchEgmMemoryWindowStart;
|
||||
|
||||
// GPU supports ATS capability
|
||||
NvBool atsSupport;
|
||||
} UvmGpuInfo;
|
||||
|
||||
typedef struct UvmGpuFbInfo_tag
|
||||
@ -759,6 +754,7 @@ typedef struct UvmGpuFbInfo_tag
|
||||
NvBool bZeroFb; // Zero FB mode enabled.
|
||||
NvU64 maxVidmemPageSize; // Largest GPU page size to access vidmem.
|
||||
NvBool bStaticBar1Enabled; // Static BAR1 mode is enabled
|
||||
NvU64 staticBar1StartOffset; // The start offset of the the static mapping
|
||||
NvU64 staticBar1Size; // The size of the static mapping
|
||||
} UvmGpuFbInfo;
|
||||
|
||||
|
@ -544,6 +544,9 @@ struct NvKmsKapiCreateSurfaceParams {
|
||||
* explicit_layout is NV_TRUE and layout is
|
||||
* NvKmsSurfaceMemoryLayoutBlockLinear */
|
||||
NvU8 log2GobsPerBlockY;
|
||||
|
||||
/* [IN] Whether a surface can be updated directly on the screen */
|
||||
NvBool noDisplayCaching;
|
||||
};
|
||||
|
||||
enum NvKmsKapiAllocationType {
|
||||
@ -1011,6 +1014,17 @@ struct NvKmsKapiFunctionsTable {
|
||||
const void *pLinearAddress
|
||||
);
|
||||
|
||||
/*!
|
||||
* Check if memory object allocated is video memory.
|
||||
*
|
||||
* \param [in] memory Memory allocated using allocateMemory()
|
||||
*
|
||||
* \return NV_TRUE if memory is vidmem, NV_FALSE otherwise.
|
||||
*/
|
||||
NvBool (*isVidmem)(
|
||||
const struct NvKmsKapiMemory *memory
|
||||
);
|
||||
|
||||
/*!
|
||||
* Create a formatted surface from an NvKmsKapiMemory object.
|
||||
*
|
||||
|
@ -33,38 +33,18 @@ extern "C" {
|
||||
|
||||
#include "nvtypes.h"
|
||||
|
||||
#if !defined(NVIDIA_UNDEF_LEGACY_BIT_MACROS)
|
||||
//
|
||||
// Miscellaneous macros useful for bit field manipulations
|
||||
//
|
||||
// STUPID HACK FOR CL 19434692. Will revert when fix CL is delivered bfm -> chips_a.
|
||||
#ifndef BIT
|
||||
#define BIT(b) (1U<<(b))
|
||||
#endif
|
||||
#ifndef BIT32
|
||||
#define BIT32(b) ((NvU32)1U<<(b))
|
||||
#endif
|
||||
#ifndef BIT64
|
||||
#define BIT64(b) ((NvU64)1U<<(b))
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
//
|
||||
// It is recommended to use the following bit macros to avoid macro name
|
||||
// collisions with other src code bases.
|
||||
//
|
||||
// Miscellaneous macros useful for bit field manipulations.
|
||||
#ifndef NVBIT
|
||||
#define NVBIT(b) (1U<<(b))
|
||||
#define NVBIT(b) (1U<<(b))
|
||||
#endif
|
||||
#ifndef NVBIT_TYPE
|
||||
#define NVBIT_TYPE(b, t) (((t)1U)<<(b))
|
||||
#define NVBIT_TYPE(b, t) (((t)1U)<<(b))
|
||||
#endif
|
||||
#ifndef NVBIT32
|
||||
#define NVBIT32(b) NVBIT_TYPE(b, NvU32)
|
||||
#define NVBIT32(b) NVBIT_TYPE(b, NvU32)
|
||||
#endif
|
||||
#ifndef NVBIT64
|
||||
#define NVBIT64(b) NVBIT_TYPE(b, NvU64)
|
||||
#define NVBIT64(b) NVBIT_TYPE(b, NvU64)
|
||||
#endif
|
||||
|
||||
//Concatenate 2 32bit values to a 64bit value
|
||||
@ -72,7 +52,7 @@ extern "C" {
|
||||
|
||||
// Helper macro's for 32 bit bitmasks
|
||||
#define NV_BITMASK32_ELEMENT_SIZE (sizeof(NvU32) << 3)
|
||||
#define NV_BITMASK32_IDX(chId) (((chId) & ~(0x1F)) >> 5)
|
||||
#define NV_BITMASK32_IDX(chId) (((chId) & ~(0x1F)) >> 5)
|
||||
#define NV_BITMASK32_OFFSET(chId) ((chId) & (0x1F))
|
||||
#define NV_BITMASK32_SET(pChannelMask, chId) \
|
||||
(pChannelMask)[NV_BITMASK32_IDX(chId)] |= NVBIT(NV_BITMASK32_OFFSET(chId))
|
||||
@ -990,6 +970,22 @@ static NV_FORCEINLINE void *NV_NVUPTR_TO_PTR(NvUPtr address)
|
||||
// Get the number of elements the specified fixed-size array
|
||||
#define NV_ARRAY_ELEMENTS(x) ((sizeof(x)/sizeof((x)[0])))
|
||||
|
||||
#if !defined(NVIDIA_UNDEF_LEGACY_BIT_MACROS)
|
||||
//
|
||||
// Deprecated macros whose definition can be removed once the code base no longer references them.
|
||||
// Use the NVBIT* macros instead of these macros.
|
||||
//
|
||||
#ifndef BIT
|
||||
#define BIT(b) (1U<<(b))
|
||||
#endif
|
||||
#ifndef BIT32
|
||||
#define BIT32(b) ((NvU32)1U<<(b))
|
||||
#endif
|
||||
#ifndef BIT64
|
||||
#define BIT64(b) ((NvU64)1U<<(b))
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif //__cplusplus
|
||||
|
@ -155,6 +155,10 @@ NV_STATUS_CODE(NV_ERR_KEY_ROTATION_IN_PROGRESS, 0x0000007D, "Operation no
|
||||
NV_STATUS_CODE(NV_ERR_TEST_ONLY_CODE_NOT_ENABLED, 0x0000007E, "Test-only code path not enabled")
|
||||
NV_STATUS_CODE(NV_ERR_SECURE_BOOT_FAILED, 0x0000007F, "GFW secure boot failed")
|
||||
NV_STATUS_CODE(NV_ERR_INSUFFICIENT_ZBC_ENTRY, 0x00000080, "No more ZBC entry for the client")
|
||||
NV_STATUS_CODE(NV_ERR_NVLINK_FABRIC_NOT_READY, 0x00000081, "Nvlink Fabric Status or Fabric Probe is not yet complete, caller needs to retry")
|
||||
NV_STATUS_CODE(NV_ERR_NVLINK_FABRIC_FAILURE, 0x00000082, "Nvlink Fabric Probe failed")
|
||||
NV_STATUS_CODE(NV_ERR_GPU_MEMORY_ONLINING_FAILURE, 0x00000083, "GPU Memory Onlining failed")
|
||||
NV_STATUS_CODE(NV_ERR_REDUCTION_MANAGER_NOT_AVAILABLE, 0x00000084, "Reduction Manager is not available")
|
||||
|
||||
// Warnings:
|
||||
NV_STATUS_CODE(NV_WARN_HOT_SWITCH, 0x00010001, "WARNING Hot switch")
|
||||
|
@ -170,7 +170,7 @@ NvU32 NV_API_CALL os_get_grid_csp_support (void);
|
||||
void NV_API_CALL os_bug_check (NvU32, const char *);
|
||||
NV_STATUS NV_API_CALL os_lock_user_pages (void *, NvU64, void **, NvU32);
|
||||
NV_STATUS NV_API_CALL os_lookup_user_io_memory (void *, NvU64, NvU64 **);
|
||||
NV_STATUS NV_API_CALL os_unlock_user_pages (NvU64, void *);
|
||||
NV_STATUS NV_API_CALL os_unlock_user_pages (NvU64, void *, NvU32);
|
||||
NV_STATUS NV_API_CALL os_match_mmap_offset (void *, NvU64, NvU64 *);
|
||||
NV_STATUS NV_API_CALL os_get_euid (NvU32 *);
|
||||
NV_STATUS NV_API_CALL os_get_smbios_header (NvU64 *pSmbsAddr);
|
||||
@ -178,6 +178,7 @@ NV_STATUS NV_API_CALL os_get_acpi_rsdp_from_uefi (NvU32 *);
|
||||
void NV_API_CALL os_add_record_for_crashLog (void *, NvU32);
|
||||
void NV_API_CALL os_delete_record_for_crashLog (void *);
|
||||
NV_STATUS NV_API_CALL os_call_vgpu_vfio (void *, NvU32);
|
||||
NV_STATUS NV_API_CALL os_device_vm_present (void);
|
||||
NV_STATUS NV_API_CALL os_numa_memblock_size (NvU64 *);
|
||||
NV_STATUS NV_API_CALL os_alloc_pages_node (NvS32, NvU32, NvU32, NvU64 *);
|
||||
NV_STATUS NV_API_CALL os_get_page (NvU64 address);
|
||||
@ -213,6 +214,7 @@ enum os_pci_req_atomics_type {
|
||||
OS_INTF_PCIE_REQ_ATOMICS_128BIT
|
||||
};
|
||||
NV_STATUS NV_API_CALL os_enable_pci_req_atomics (void *, enum os_pci_req_atomics_type);
|
||||
void NV_API_CALL os_pci_trigger_flr(void *handle);
|
||||
NV_STATUS NV_API_CALL os_get_numa_node_memory_usage (NvS32, NvU64 *, NvU64 *);
|
||||
NV_STATUS NV_API_CALL os_numa_add_gpu_memory (void *, NvU64, NvU64, NvU32 *);
|
||||
NV_STATUS NV_API_CALL os_numa_remove_gpu_memory (void *, NvU64, NvU64, NvU32);
|
||||
@ -220,6 +222,7 @@ NV_STATUS NV_API_CALL os_offline_page_at_address(NvU64 address);
|
||||
void* NV_API_CALL os_get_pid_info(void);
|
||||
void NV_API_CALL os_put_pid_info(void *pid_info);
|
||||
NV_STATUS NV_API_CALL os_find_ns_pid(void *pid_info, NvU32 *ns_pid);
|
||||
NvBool NV_API_CALL os_is_init_ns(void);
|
||||
|
||||
extern NvU32 os_page_size;
|
||||
extern NvU64 os_page_mask;
|
||||
|
@ -25,6 +25,7 @@ fi
|
||||
# VGX_KVM_BUILD parameter defined only vGPU builds on KVM hypervisor
|
||||
# GRID_BUILD parameter defined only for GRID builds (GRID Guest driver)
|
||||
# GRID_BUILD_CSP parameter defined only for GRID CSP builds (GRID Guest driver for CSPs)
|
||||
# VGX_DEVICE_VM_BUILD parameter defined only for Device VM VGX build (vGPU Host driver)
|
||||
|
||||
test_xen() {
|
||||
#
|
||||
@ -806,6 +807,16 @@ compile_test() {
|
||||
return
|
||||
;;
|
||||
|
||||
device_vm_build)
|
||||
# Add config parameter if running on Device VM.
|
||||
if [ -n "$VGX_DEVICE_VM_BUILD" ]; then
|
||||
echo "#define NV_DEVICE_VM_BUILD" | append_conftest "generic"
|
||||
else
|
||||
echo "#undef NV_DEVICE_VM_BUILD" | append_conftest "generic"
|
||||
fi
|
||||
return
|
||||
;;
|
||||
|
||||
vfio_register_notifier)
|
||||
#
|
||||
# Check number of arguments required.
|
||||
@ -1273,6 +1284,77 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_PFN_ADDRESS_SPACE_STRUCT_PRESENT" "" "types"
|
||||
;;
|
||||
|
||||
egm_module_helper_api_present)
|
||||
#
|
||||
# Determine if egm management api are present or not.
|
||||
#
|
||||
CODE="
|
||||
#include <linux/pci.h>
|
||||
#include <linux/nvgrace-egm.h>
|
||||
void conftest_egm_module_helper_api_present() {
|
||||
struct pci_dev *pdev;
|
||||
register_egm_node(pdev);
|
||||
unregister_egm_node(0);
|
||||
}
|
||||
"
|
||||
compile_check_conftest "$CODE" "NV_EGM_MODULE_HELPER_API_PRESENT" "" "types"
|
||||
;;
|
||||
|
||||
egm_bad_pages_handling_support)
|
||||
#
|
||||
# Determine if egm_bad_pages_list is present or not.
|
||||
#
|
||||
CODE="
|
||||
#include <linux/types.h>
|
||||
#include <linux/egm.h>
|
||||
void conftest_egm_bad_pages_handle() {
|
||||
int ioctl = EGM_BAD_PAGES_LIST;
|
||||
struct egm_bad_pages_list list;
|
||||
}
|
||||
"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_EGM_BAD_PAGES_HANDLING_SUPPORT" "" "types"
|
||||
;;
|
||||
|
||||
class_create_has_no_owner_arg)
|
||||
#
|
||||
# Determine if the class_create API with the new signature
|
||||
# is present or not.
|
||||
#
|
||||
# Added by commit 1aaba11da9aa ("driver core: class: remove
|
||||
# module * from class_create()") in v6.4 (2023-03-13)
|
||||
#
|
||||
CODE="
|
||||
#include <linux/device/class.h>
|
||||
void conftest_class_create() {
|
||||
struct class *class;
|
||||
class = class_create(\"test\");
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_CLASS_CREATE_HAS_NO_OWNER_ARG" "" "types"
|
||||
;;
|
||||
|
||||
class_devnode_has_const_arg)
|
||||
#
|
||||
# Determine if the class.devnode is present with the new signature.
|
||||
#
|
||||
# Added by commit ff62b8e6588f ("driver core: make struct
|
||||
# class.devnode() take a const *") in v6.2 (2022-11-23)
|
||||
#
|
||||
CODE="
|
||||
#include <linux/device.h>
|
||||
static char *conftest_devnode(const struct device *device, umode_t *mode) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void conftest_class_devnode() {
|
||||
struct class class;
|
||||
class.devnode = conftest_devnode;
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_CLASS_DEVNODE_HAS_CONST_ARG" "" "types"
|
||||
;;
|
||||
|
||||
pci_irq_vector_helpers)
|
||||
#
|
||||
# Determine if pci_alloc_irq_vectors(), pci_free_irq_vectors()
|
||||
@ -1770,22 +1852,6 @@ compile_test() {
|
||||
fi
|
||||
;;
|
||||
|
||||
pnv_pci_get_npu_dev)
|
||||
#
|
||||
# Determine if the pnv_pci_get_npu_dev function is present.
|
||||
#
|
||||
# Added by commit 5d2aa710e697 ("powerpc/powernv: Add support
|
||||
# for Nvlink NPUs") in v4.5
|
||||
#
|
||||
CODE="
|
||||
#include <linux/pci.h>
|
||||
void conftest_pnv_pci_get_npu_dev() {
|
||||
pnv_pci_get_npu_dev();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_PNV_PCI_GET_NPU_DEV_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
kernel_write_has_pointer_pos_arg)
|
||||
#
|
||||
# Determine the pos argument type, which was changed by commit
|
||||
@ -5604,6 +5670,26 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_ICC_GET_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
devm_of_icc_get)
|
||||
#
|
||||
# Determine if devm_of_icc_get() function is present
|
||||
#
|
||||
# Added by commit e145d9a ("interconnect: Add devm_of_icc_get() as
|
||||
# exported API for user interconnect API")
|
||||
#
|
||||
CODE="
|
||||
#if defined(NV_LINUX_INTERCONNECT_H_PRESENT)
|
||||
#include <linux/interconnect.h>
|
||||
#endif
|
||||
void conftest_devm_of_icc_get(void)
|
||||
{
|
||||
devm_of_icc_get();
|
||||
}
|
||||
"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_DEVM_ICC_GET_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
icc_set_bw)
|
||||
#
|
||||
# Determine if icc_set_bw() function is present
|
||||
@ -6050,6 +6136,20 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_PLATFORM_IRQ_COUNT_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
pcie_reset_flr)
|
||||
#
|
||||
# Determine if the pcie_reset_flr() function is present
|
||||
#
|
||||
# Added by commit 56f107d ("PCI: Add pcie_reset_flr() with
|
||||
# 'probe' argument") in v5.15.
|
||||
CODE="
|
||||
#include <linux/pci.h>
|
||||
int conftest_pcie_reset_flr(void) {
|
||||
return pcie_reset_flr();
|
||||
}"
|
||||
compile_check_conftest "$CODE" "NV_PCIE_RESET_FLR_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
devm_clk_bulk_get_all)
|
||||
#
|
||||
# Determine if devm_clk_bulk_get_all() function is present
|
||||
@ -6571,7 +6671,8 @@ compile_test() {
|
||||
# Determine whether drm_fbdev_ttm_setup is present.
|
||||
#
|
||||
# Added by commit aae4682e5d66 ("drm/fbdev-generic:
|
||||
# Convert to fbdev-ttm") in v6.11.
|
||||
# Convert to fbdev-ttm") in v6.11. Removed by commit
|
||||
# 1000634477d8 ("drm/fbdev-ttm:Convert to client-setup") in v6.13.
|
||||
#
|
||||
CODE="
|
||||
#include <drm/drm_fb_helper.h>
|
||||
@ -6585,6 +6686,25 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_DRM_FBDEV_TTM_SETUP_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
drm_client_setup)
|
||||
#
|
||||
# Determine whether drm_client_setup is present.
|
||||
#
|
||||
# Added by commit d07fdf922592 ("drm/fbdev-ttm:
|
||||
# Convert to client-setup") in v6.13.
|
||||
#
|
||||
CODE="
|
||||
#include <drm/drm_fb_helper.h>
|
||||
#if defined(NV_DRM_DRM_CLIENT_SETUP_H_PRESENT)
|
||||
#include <drm/drm_client_setup.h>
|
||||
#endif
|
||||
void conftest_drm_client_setup(void) {
|
||||
drm_client_setup();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_DRM_CLIENT_SETUP_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
drm_output_poll_changed)
|
||||
#
|
||||
# Determine whether drm_mode_config_funcs.output_poll_changed
|
||||
@ -6608,6 +6728,38 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_DRM_OUTPUT_POLL_CHANGED_PRESENT" "" "types"
|
||||
;;
|
||||
|
||||
aperture_remove_conflicting_devices)
|
||||
#
|
||||
# Determine whether aperture_remove_conflicting_devices is present.
|
||||
#
|
||||
# Added by commit 7283f862bd991 ("drm: Implement DRM aperture
|
||||
# helpers under video/") in v6.0
|
||||
CODE="
|
||||
#if defined(NV_LINUX_APERTURE_H_PRESENT)
|
||||
#include <linux/aperture.h>
|
||||
#endif
|
||||
void conftest_aperture_remove_conflicting_devices(void) {
|
||||
aperture_remove_conflicting_devices();
|
||||
}"
|
||||
compile_check_conftest "$CODE" "NV_APERTURE_REMOVE_CONFLICTING_DEVICES_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
aperture_remove_conflicting_pci_devices)
|
||||
#
|
||||
# Determine whether aperture_remove_conflicting_pci_devices is present.
|
||||
#
|
||||
# Added by commit 7283f862bd991 ("drm: Implement DRM aperture
|
||||
# helpers under video/") in v6.0
|
||||
CODE="
|
||||
#if defined(NV_LINUX_APERTURE_H_PRESENT)
|
||||
#include <linux/aperture.h>
|
||||
#endif
|
||||
void conftest_aperture_remove_conflicting_pci_devices(void) {
|
||||
aperture_remove_conflicting_pci_devices();
|
||||
}"
|
||||
compile_check_conftest "$CODE" "NV_APERTURE_REMOVE_CONFLICTING_PCI_DEVICES_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
drm_aperture_remove_conflicting_pci_framebuffers)
|
||||
#
|
||||
# Determine whether drm_aperture_remove_conflicting_pci_framebuffers is present.
|
||||
@ -6701,17 +6853,17 @@ compile_test() {
|
||||
# This test is not complete and may return false positive.
|
||||
#
|
||||
CODE="
|
||||
#include <crypto/akcipher.h>
|
||||
#include <crypto/algapi.h>
|
||||
#include <crypto/ecc_curve.h>
|
||||
#include <crypto/ecdh.h>
|
||||
#include <crypto/hash.h>
|
||||
#include <crypto/internal/ecc.h>
|
||||
#include <crypto/kpp.h>
|
||||
#include <crypto/public_key.h>
|
||||
#include <crypto/sm3.h>
|
||||
#include <keys/asymmetric-type.h>
|
||||
#include <linux/crypto.h>
|
||||
#include <crypto/akcipher.h>
|
||||
#include <crypto/algapi.h>
|
||||
#include <crypto/ecc_curve.h>
|
||||
#include <crypto/ecdh.h>
|
||||
#include <crypto/hash.h>
|
||||
#include <crypto/internal/ecc.h>
|
||||
#include <crypto/kpp.h>
|
||||
#include <crypto/public_key.h>
|
||||
#include <crypto/sm3.h>
|
||||
#include <keys/asymmetric-type.h>
|
||||
#include <linux/crypto.h>
|
||||
void conftest_crypto(void) {
|
||||
struct shash_desc sd;
|
||||
struct crypto_shash cs;
|
||||
@ -6721,6 +6873,47 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_CRYPTO_PRESENT" "" "symbols"
|
||||
;;
|
||||
|
||||
crypto_akcipher_verify)
|
||||
#
|
||||
# Determine whether the crypto_akcipher_verify API is still present.
|
||||
# It was removed by commit 6b34562 ('crypto: akcipher - Drop sign/verify operations')
|
||||
# in v6.13-rc1 (2024-10-04).
|
||||
#
|
||||
# This test is dependent on the crypto conftest to determine whether crypto should be
|
||||
# enabled at all. That means that if the kernel is old enough such that crypto_akcipher_verify
|
||||
#
|
||||
# The test merely checks for the presence of the API, as it assumes that if the API
|
||||
# is no longer present, the new API to replace it (crypto_sig_verify) must be present.
|
||||
# If the kernel version is too old to have crypto_akcipher_verify, it will fail the crypto
|
||||
# conftest above and all crypto code will be compiled out.
|
||||
#
|
||||
CODE="
|
||||
#include <crypto/akcipher.h>
|
||||
#include <linux/crypto.h>
|
||||
void conftest_crypto_akcipher_verify(void) {
|
||||
(void)crypto_akcipher_verify;
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_CRYPTO_AKCIPHER_VERIFY_PRESENT" "" "symbols"
|
||||
;;
|
||||
|
||||
ecc_digits_from_bytes)
|
||||
#
|
||||
# Determine whether ecc_digits_from_bytes is present.
|
||||
# It was added in commit c6ab5c915da4 ('crypto: ecc - Prevent ecc_digits_from_bytes from
|
||||
# reading too many bytes') in v6.10.
|
||||
#
|
||||
# This functionality is needed when crypto_akcipher_verify is not present.
|
||||
#
|
||||
CODE="
|
||||
#include <crypto/internal/ecc.h>
|
||||
void conftest_ecc_digits_from_bytes(void) {
|
||||
(void)ecc_digits_from_bytes;
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_ECC_DIGITS_FROM_BYTES_PRESENT" "" "symbols"
|
||||
;;
|
||||
|
||||
mempolicy_has_unified_nodes)
|
||||
#
|
||||
# Determine if the 'mempolicy' structure has
|
||||
@ -7142,6 +7335,131 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_DRM_GEM_OBJECT_FUNCS_PRESENT" "" "types"
|
||||
;;
|
||||
|
||||
sg_dma_page_iter)
|
||||
#
|
||||
# Determine if the struct sg_dma_page_iter is present.
|
||||
# This also serves to know if the argument type of the macro
|
||||
# sg_page_iter_dma_address() changed:
|
||||
# - before: struct sg_page_iter *piter
|
||||
# - after: struct sg_dma_page_iter *dma_iter
|
||||
#
|
||||
# Added by commit d901b2760dc6c ("lib/scatterlist: Provide a DMA
|
||||
# page iterator") v5.0.
|
||||
#
|
||||
CODE="
|
||||
#include <linux/scatterlist.h>
|
||||
struct sg_dma_page_iter conftest_dma_page_iter;"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_SG_DMA_PAGE_ITER_PRESENT" "" "types"
|
||||
;;
|
||||
|
||||
# FIXME: See if we can remove this test
|
||||
for_each_sgtable_dma_page)
|
||||
#
|
||||
# Determine if macro for_each_sgtable_dma_page is present.
|
||||
#
|
||||
# Added by commit 709d6d73c756 ("scatterlist: add generic wrappers
|
||||
# for iterating over sgtable objects") v5.7.
|
||||
#
|
||||
CODE="
|
||||
#include <linux/scatterlist.h>
|
||||
void conftest_for_each_sgtable_dma_page(void) {
|
||||
for_each_sgtable_dma_page();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_FOR_EACH_SGTABLE_DMA_PAGE_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
drm_aperture_remove_conflicting_framebuffers)
|
||||
#
|
||||
# Determine whether drm_aperture_remove_conflicting_framebuffers is present.
|
||||
#
|
||||
# drm_aperture_remove_conflicting_framebuffers was added in commit 2916059147ea
|
||||
# ("drm/aperture: Add infrastructure for aperture ownership) in
|
||||
# v5.14-rc1 (2021-04-12)
|
||||
#
|
||||
CODE="
|
||||
#if defined(NV_DRM_DRM_APERTURE_H_PRESENT)
|
||||
#include <drm/drm_aperture.h>
|
||||
#endif
|
||||
void conftest_drm_aperture_remove_conflicting_framebuffers(void) {
|
||||
drm_aperture_remove_conflicting_framebuffers();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_DRM_APERTURE_REMOVE_CONFLICTING_FRAMEBUFFERS_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
drm_aperture_remove_conflicting_framebuffers_has_driver_arg)
|
||||
#
|
||||
# Determine whether drm_aperture_remove_conflicting_framebuffers
|
||||
# takes a struct drm_driver * as its fourth argument.
|
||||
#
|
||||
# Prior to commit 97c9bfe3f6605d41eb8f1206e6e0f62b31ba15d6, the
|
||||
# second argument was a char * pointer to the driver's name.
|
||||
#
|
||||
# To test if drm_aperture_remove_conflicting_framebuffers() has
|
||||
# a req_driver argument, define a function with the expected
|
||||
# signature and then define the corresponding function
|
||||
# implementation with the expected signature. Successful compilation
|
||||
# indicates that this function has the expected signature.
|
||||
#
|
||||
# This change occurred in commit 97c9bfe3f660 ("drm/aperture: Pass
|
||||
# DRM driver structure instead of driver name") in v5.15
|
||||
# (2021-06-29).
|
||||
#
|
||||
CODE="
|
||||
#if defined(NV_DRM_DRM_DRV_H_PRESENT)
|
||||
#include <drm/drm_drv.h>
|
||||
#endif
|
||||
#if defined(NV_DRM_DRM_APERTURE_H_PRESENT)
|
||||
#include <drm/drm_aperture.h>
|
||||
#endif
|
||||
typeof(drm_aperture_remove_conflicting_framebuffers) conftest_drm_aperture_remove_conflicting_framebuffers;
|
||||
int conftest_drm_aperture_remove_conflicting_framebuffers(resource_size_t base, resource_size_t size,
|
||||
bool primary, const struct drm_driver *req_driver)
|
||||
{
|
||||
return 0;
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_DRM_APERTURE_REMOVE_CONFLICTING_FRAMEBUFFERS_HAS_DRIVER_ARG" "" "types"
|
||||
;;
|
||||
|
||||
drm_aperture_remove_conflicting_framebuffers_has_no_primary_arg)
|
||||
#
|
||||
# Determine whether drm_aperture_remove_conflicting_framebuffers
|
||||
# has its third argument as a bool.
|
||||
#
|
||||
# Prior to commit 62aeaeaa1b267c5149abee6b45967a5df3feed58, the
|
||||
# third argument was a bool for figuring out whether the legacy vga
|
||||
# stuff should be nuked, but it's only for pci devices and not
|
||||
# really needed in this function.
|
||||
#
|
||||
# To test if drm_aperture_remove_conflicting_framebuffers() has
|
||||
# a bool primary argument, define a function with the expected
|
||||
# signature and then define the corresponding function
|
||||
# implementation with the expected signature. Successful compilation
|
||||
# indicates that this function has the expected signature.
|
||||
#
|
||||
# This change occurred in commit 62aeaeaa1b26 ("drm/aperture: Remove
|
||||
# primary argument") in v6.5 (2023-04-16).
|
||||
#
|
||||
CODE="
|
||||
#if defined(NV_DRM_DRM_DRV_H_PRESENT)
|
||||
#include <drm/drm_drv.h>
|
||||
#endif
|
||||
#if defined(NV_DRM_DRM_APERTURE_H_PRESENT)
|
||||
#include <drm/drm_aperture.h>
|
||||
#endif
|
||||
typeof(drm_aperture_remove_conflicting_framebuffers) conftest_drm_aperture_remove_conflicting_framebuffers;
|
||||
int conftest_drm_aperture_remove_conflicting_framebuffers(resource_size_t base, resource_size_t size,
|
||||
const struct drm_driver *req_driver)
|
||||
{
|
||||
return 0;
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_DRM_APERTURE_REMOVE_CONFLICTING_FRAMEBUFFERS_HAS_NO_PRIMARY_ARG" "" "types"
|
||||
;;
|
||||
|
||||
struct_page_has_zone_device_data)
|
||||
#
|
||||
# Determine if struct page has a 'zone_device_data' field.
|
||||
@ -7174,6 +7492,23 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_FOLIO_TEST_SWAPCACHE_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
module_import_ns_takes_constant)
|
||||
#
|
||||
# Determine if the MODULE_IMPORT_NS macro takes a string literal
|
||||
# or constant.
|
||||
#
|
||||
# Commit cdd30ebb1b9f ("module: Convert symbol namespace to
|
||||
# string literal") changed MODULE_IMPORT_NS to take a string
|
||||
# literal in Linux kernel v6.13.
|
||||
#
|
||||
CODE="
|
||||
#include <linux/module.h>
|
||||
|
||||
MODULE_IMPORT_NS(DMA_BUF);"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_MODULE_IMPORT_NS_TAKES_CONSTANT" "" "generic"
|
||||
;;
|
||||
|
||||
# When adding a new conftest entry, please use the correct format for
|
||||
# specifying the relevant upstream Linux kernel commit. Please
|
||||
# avoid specifying -rc kernels, and only use SHAs that actually exist
|
||||
|
@ -16,6 +16,7 @@ NV_HEADER_PRESENCE_TESTS = \
|
||||
drm/drm_drv.h \
|
||||
drm/drm_fbdev_generic.h \
|
||||
drm/drm_fbdev_ttm.h \
|
||||
drm/drm_client_setup.h \
|
||||
drm/drm_framebuffer.h \
|
||||
drm/drm_connector.h \
|
||||
drm/drm_probe_helper.h \
|
||||
@ -34,6 +35,8 @@ NV_HEADER_PRESENCE_TESTS = \
|
||||
generated/autoconf.h \
|
||||
generated/compile.h \
|
||||
generated/utsrelease.h \
|
||||
linux/aperture.h \
|
||||
linux/dma-direct.h \
|
||||
linux/efi.h \
|
||||
linux/kconfig.h \
|
||||
linux/platform/tegra/mc_utils.h \
|
||||
@ -102,5 +105,6 @@ NV_HEADER_PRESENCE_TESTS = \
|
||||
asm/cpufeature.h \
|
||||
linux/mpi.h \
|
||||
asm/mshyperv.h \
|
||||
crypto/sig.h \
|
||||
linux/pfn_t.h
|
||||
|
||||
|
@ -62,6 +62,13 @@
|
||||
#undef NV_DRM_FENCE_AVAILABLE
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_CLIENT_SETUP_PRESENT) && \
|
||||
(defined(NV_DRM_APERTURE_REMOVE_CONFLICTING_PCI_FRAMEBUFFERS_PRESENT) || \
|
||||
defined(NV_APERTURE_REMOVE_CONFLICTING_PCI_DEVICES_PRESENT))
|
||||
#define NV_DRM_FBDEV_AVAILABLE
|
||||
#define NV_DRM_CLIENT_AVAILABLE
|
||||
#endif
|
||||
|
||||
/*
|
||||
* We can support color management if either drm_helper_crtc_enable_color_mgmt()
|
||||
* or drm_crtc_enable_color_mgmt() exist.
|
||||
|
@ -64,11 +64,22 @@
|
||||
#include <drm/drm_ioctl.h>
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_FBDEV_AVAILABLE)
|
||||
#if defined(NV_LINUX_APERTURE_H_PRESENT)
|
||||
#include <linux/aperture.h>
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_DRM_APERTURE_H_PRESENT)
|
||||
#include <drm/drm_aperture.h>
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_FBDEV_AVAILABLE)
|
||||
#include <drm/drm_fb_helper.h>
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_DRM_CLIENT_SETUP_H_PRESENT)
|
||||
#include <drm/drm_client_setup.h>
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_DRM_FBDEV_TTM_H_PRESENT)
|
||||
#include <drm/drm_fbdev_ttm.h>
|
||||
#elif defined(NV_DRM_DRM_FBDEV_GENERIC_H_PRESENT)
|
||||
@ -1911,6 +1922,9 @@ static struct drm_driver nv_drm_driver = {
|
||||
#elif defined(NV_DRM_DRIVER_HAS_LEGACY_DEV_LIST)
|
||||
.legacy_dev_list = LIST_HEAD_INIT(nv_drm_driver.legacy_dev_list),
|
||||
#endif
|
||||
#if defined(DRM_FBDEV_TTM_DRIVER_OPS)
|
||||
DRM_FBDEV_TTM_DRIVER_OPS,
|
||||
#endif
|
||||
};
|
||||
|
||||
|
||||
@ -2013,14 +2027,22 @@ void nv_drm_register_drm_device(const nv_gpu_info_t *gpu_info)
|
||||
if (bus_is_pci) {
|
||||
struct pci_dev *pdev = to_pci_dev(device);
|
||||
|
||||
#if defined(NV_DRM_APERTURE_REMOVE_CONFLICTING_PCI_FRAMEBUFFERS_PRESENT)
|
||||
|
||||
#if defined(NV_DRM_APERTURE_REMOVE_CONFLICTING_PCI_FRAMEBUFFERS_HAS_DRIVER_ARG)
|
||||
drm_aperture_remove_conflicting_pci_framebuffers(pdev, &nv_drm_driver);
|
||||
#else
|
||||
drm_aperture_remove_conflicting_pci_framebuffers(pdev, nv_drm_driver.name);
|
||||
#endif
|
||||
|
||||
#elif defined(NV_APERTURE_REMOVE_CONFLICTING_PCI_DEVICES_PRESENT)
|
||||
aperture_remove_conflicting_pci_devices(pdev, nv_drm_driver.name);
|
||||
#endif
|
||||
nvKms->framebufferConsoleDisabled(nv_dev->pDevice);
|
||||
}
|
||||
#if defined(NV_DRM_FBDEV_TTM_AVAILABLE)
|
||||
#if defined(NV_DRM_CLIENT_AVAILABLE)
|
||||
drm_client_setup(dev, NULL);
|
||||
#elif defined(NV_DRM_FBDEV_TTM_AVAILABLE)
|
||||
drm_fbdev_ttm_setup(dev, 32);
|
||||
#elif defined(NV_DRM_FBDEV_GENERIC_AVAILABLE)
|
||||
drm_fbdev_generic_setup(dev, 32);
|
||||
|
@ -161,6 +161,20 @@ static int nv_drm_framebuffer_init(struct drm_device *dev,
|
||||
params.planes[i].memory = nv_gem->pMemory;
|
||||
params.planes[i].offset = fb->offsets[i];
|
||||
params.planes[i].pitch = fb->pitches[i];
|
||||
|
||||
/*
|
||||
* XXX Use drm_framebuffer_funcs.dirty and
|
||||
* drm_fb_helper_funcs.fb_dirty instead
|
||||
*
|
||||
* Currently using noDisplayCaching when registering surfaces with
|
||||
* NVKMS that are using memory allocated through the DRM
|
||||
* Dumb-Buffers API. This prevents Display Idle Frame Rate from
|
||||
* kicking in and preventing CPU updates to the surface memory from
|
||||
* not being reflected on the display. Ideally, DIFR would be
|
||||
* dynamically disabled whenever a user of the memory blits to the
|
||||
* frontbuffer. DRM provides the needed callbacks to achieve this.
|
||||
*/
|
||||
params.noDisplayCaching |= !!nv_gem->is_drm_dumb;
|
||||
}
|
||||
}
|
||||
params.height = fb->height;
|
||||
|
@ -167,7 +167,7 @@ static int __nv_drm_gem_nvkms_map(
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (!nv_dev->hasVideoMemory) {
|
||||
if (!nvKms->isVidmem(pMemory)) {
|
||||
goto done;
|
||||
}
|
||||
|
||||
@ -218,11 +218,13 @@ static void *__nv_drm_gem_nvkms_prime_vmap(
|
||||
|
||||
/*
|
||||
* If this buffer isn't physically mapped, it might be backed by struct
|
||||
* pages. Use vmap in that case.
|
||||
* pages. Use vmap in that case. Do a noncached mapping for system memory
|
||||
* as display is non io-coherent device in case of Tegra.
|
||||
*/
|
||||
if (nv_nvkms_memory->pages_count > 0) {
|
||||
return nv_drm_vmap(nv_nvkms_memory->pages,
|
||||
nv_nvkms_memory->pages_count);
|
||||
nv_nvkms_memory->pages_count,
|
||||
false);
|
||||
}
|
||||
|
||||
return ERR_PTR(-ENOMEM);
|
||||
@ -310,7 +312,7 @@ static int __nv_drm_nvkms_gem_obj_init(
|
||||
pMemory,
|
||||
&pages,
|
||||
&numPages) &&
|
||||
!nv_dev->hasVideoMemory) {
|
||||
!nvKms->isVidmem(pMemory)) {
|
||||
/* GetMemoryPages may fail for vidmem allocations,
|
||||
* but it should not fail for sysmem allocations. */
|
||||
NV_DRM_DEV_LOG_ERR(nv_dev,
|
||||
@ -383,6 +385,8 @@ int nv_drm_dumb_create(
|
||||
goto nvkms_gem_obj_init_failed;
|
||||
}
|
||||
|
||||
nv_nvkms_memory->base.is_drm_dumb = true;
|
||||
|
||||
/* Always map dumb buffer memory up front. Clients are only expected
|
||||
* to use dumb buffers for software rendering, so they're not much use
|
||||
* without a CPU mapping.
|
||||
|
@ -72,7 +72,8 @@ static void *__nv_drm_gem_user_memory_prime_vmap(
|
||||
struct nv_drm_gem_user_memory *nv_user_memory = to_nv_user_memory(nv_gem);
|
||||
|
||||
return nv_drm_vmap(nv_user_memory->pages,
|
||||
nv_user_memory->pages_count);
|
||||
nv_user_memory->pages_count,
|
||||
true);
|
||||
}
|
||||
|
||||
static void __nv_drm_gem_user_memory_prime_vunmap(
|
||||
|
@ -172,8 +172,11 @@ struct drm_gem_object *nv_drm_gem_prime_import(struct drm_device *dev,
|
||||
*/
|
||||
gem_dst = nv_gem_src->ops->prime_dup(dev, nv_gem_src);
|
||||
|
||||
if (gem_dst)
|
||||
return gem_dst;
|
||||
if (gem_dst == NULL) {
|
||||
return ERR_PTR(-ENOTSUPP);
|
||||
}
|
||||
|
||||
return gem_dst;
|
||||
}
|
||||
}
|
||||
#endif /* NV_DMA_BUF_OWNER_PRESENT */
|
||||
|
@ -73,6 +73,8 @@ struct nv_drm_gem_object {
|
||||
|
||||
struct NvKmsKapiMemory *pMemory;
|
||||
|
||||
bool is_drm_dumb;
|
||||
|
||||
#if defined(NV_DRM_FENCE_AVAILABLE) && !defined(NV_DRM_GEM_OBJECT_HAS_RESV)
|
||||
nv_dma_resv_t resv;
|
||||
#endif
|
||||
|
@ -37,7 +37,7 @@ module_param_named(modeset, nv_drm_modeset_module_param, bool, 0400);
|
||||
#if defined(NV_DRM_FBDEV_AVAILABLE)
|
||||
MODULE_PARM_DESC(
|
||||
fbdev,
|
||||
"Create a framebuffer device (1 = enable, 0 = disable (default)) (EXPERIMENTAL)");
|
||||
"Create a framebuffer device (1 = enable (default), 0 = disable)");
|
||||
module_param_named(fbdev, nv_drm_fbdev_module_param, bool, 0400);
|
||||
#endif
|
||||
|
||||
|
@ -42,7 +42,7 @@
|
||||
#endif
|
||||
|
||||
bool nv_drm_modeset_module_param = false;
|
||||
bool nv_drm_fbdev_module_param = false;
|
||||
bool nv_drm_fbdev_module_param = true;
|
||||
|
||||
void *nv_drm_calloc(size_t nmemb, size_t size)
|
||||
{
|
||||
@ -156,9 +156,15 @@ void nv_drm_unlock_user_pages(unsigned long pages_count, struct page **pages)
|
||||
#define VM_USERMAP 0
|
||||
#endif
|
||||
|
||||
void *nv_drm_vmap(struct page **pages, unsigned long pages_count)
|
||||
void *nv_drm_vmap(struct page **pages, unsigned long pages_count, bool cached)
|
||||
{
|
||||
return vmap(pages, pages_count, VM_USERMAP, PAGE_KERNEL);
|
||||
pgprot_t prot = PAGE_KERNEL;
|
||||
|
||||
if (!cached) {
|
||||
prot = pgprot_noncached(PAGE_KERNEL);
|
||||
}
|
||||
|
||||
return vmap(pages, pages_count, VM_USERMAP, prot);
|
||||
}
|
||||
|
||||
void nv_drm_vunmap(void *address)
|
||||
|
@ -90,7 +90,7 @@ int nv_drm_lock_user_pages(unsigned long address,
|
||||
|
||||
void nv_drm_unlock_user_pages(unsigned long pages_count, struct page **pages);
|
||||
|
||||
void *nv_drm_vmap(struct page **pages, unsigned long pages_count);
|
||||
void *nv_drm_vmap(struct page **pages, unsigned long pages_count, bool cached);
|
||||
|
||||
void nv_drm_vunmap(void *address);
|
||||
|
||||
|
@ -66,8 +66,11 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += dma_fence_set_error
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += fence_set_error
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += sync_file_get_fence
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_aperture_remove_conflicting_pci_framebuffers
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += aperture_remove_conflicting_devices
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += aperture_remove_conflicting_pci_devices
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_fbdev_generic_setup
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_fbdev_ttm_setup
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_client_setup
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_connector_attach_hdr_output_metadata_property
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_helper_crtc_enable_color_mgmt
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_crtc_enable_color_mgmt
|
||||
|
@ -89,6 +89,9 @@ module_param_named(opportunistic_display_sync, opportunistic_display_sync, bool,
|
||||
static enum NvKmsDebugForceColorSpace debug_force_color_space = NVKMS_DEBUG_FORCE_COLOR_SPACE_NONE;
|
||||
module_param_named(debug_force_color_space, debug_force_color_space, uint, 0400);
|
||||
|
||||
static bool enable_overlay_layers = true;
|
||||
module_param_named(enable_overlay_layers, enable_overlay_layers, bool, 0400);
|
||||
|
||||
/* These parameters are used for fault injection tests. Normally the defaults
|
||||
* should be used. */
|
||||
MODULE_PARM_DESC(fail_malloc, "Fail the Nth call to nvkms_alloc");
|
||||
@ -99,19 +102,40 @@ MODULE_PARM_DESC(malloc_verbose, "Report information about malloc calls on modul
|
||||
static bool malloc_verbose = false;
|
||||
module_param_named(malloc_verbose, malloc_verbose, bool, 0400);
|
||||
|
||||
/* Fail allocating the RM core channel for NVKMS using the i-th method (see
|
||||
* FailAllocCoreChannelMethod). Failures not using the i-th method are ignored. */
|
||||
MODULE_PARM_DESC(fail_alloc_core_channel, "Control testing for hardware core channel allocation failure");
|
||||
static int fail_alloc_core_channel_method = -1;
|
||||
module_param_named(fail_alloc_core_channel, fail_alloc_core_channel_method, int, 0400);
|
||||
|
||||
#if NVKMS_CONFIG_FILE_SUPPORTED
|
||||
/* This parameter is used to find the dpy override conf file */
|
||||
#define NVKMS_CONF_FILE_SPECIFIED (nvkms_conf != NULL)
|
||||
|
||||
MODULE_PARM_DESC(config_file,
|
||||
"Path to the nvidia-modeset configuration file "
|
||||
"(default: disabled)");
|
||||
"Path to the nvidia-modeset configuration file (default: disabled)");
|
||||
static char *nvkms_conf = NULL;
|
||||
module_param_named(config_file, nvkms_conf, charp, 0400);
|
||||
#endif
|
||||
|
||||
static atomic_t nvkms_alloc_called_count;
|
||||
|
||||
NvBool nvkms_test_fail_alloc_core_channel(
|
||||
enum FailAllocCoreChannelMethod method
|
||||
)
|
||||
{
|
||||
if (method != fail_alloc_core_channel_method) {
|
||||
// don't fail if it's not the currently specified method
|
||||
return NV_FALSE;
|
||||
}
|
||||
|
||||
printk(KERN_INFO NVKMS_LOG_PREFIX
|
||||
"Failing core channel allocation using method %d",
|
||||
fail_alloc_core_channel_method);
|
||||
|
||||
return NV_TRUE;
|
||||
}
|
||||
|
||||
NvBool nvkms_output_rounding_fix(void)
|
||||
{
|
||||
return output_rounding_fix;
|
||||
@ -150,6 +174,11 @@ enum NvKmsDebugForceColorSpace nvkms_debug_force_color_space(void)
|
||||
return debug_force_color_space;
|
||||
}
|
||||
|
||||
NvBool nvkms_enable_overlay_layers(void)
|
||||
{
|
||||
return enable_overlay_layers;
|
||||
}
|
||||
|
||||
NvBool nvkms_kernel_supports_syncpts(void)
|
||||
{
|
||||
/*
|
||||
@ -1463,6 +1492,8 @@ static size_t nvkms_config_file_open
|
||||
loff_t pos = 0;
|
||||
#endif
|
||||
|
||||
*buff = NULL;
|
||||
|
||||
if (!nvkms_fs_mounted()) {
|
||||
printk(KERN_ERR NVKMS_LOG_PREFIX "ERROR: Filesystems not mounted\n");
|
||||
return 0;
|
||||
@ -1486,6 +1517,11 @@ static size_t nvkms_config_file_open
|
||||
goto done;
|
||||
}
|
||||
|
||||
// Do not alloc a 0 sized buffer
|
||||
if (file_size == 0) {
|
||||
goto done;
|
||||
}
|
||||
|
||||
*buff = nvkms_alloc(file_size, NV_FALSE);
|
||||
if (*buff == NULL) {
|
||||
printk(KERN_WARNING NVKMS_LOG_PREFIX "WARNING: Out of memory\n");
|
||||
|
@ -104,6 +104,12 @@ typedef struct {
|
||||
} read_minval;
|
||||
} NvKmsSyncPtOpParams;
|
||||
|
||||
enum FailAllocCoreChannelMethod {
|
||||
FAIL_ALLOC_CORE_CHANNEL_RM_SETUP_CORE_CHANNEL = 0,
|
||||
FAIL_ALLOC_CORE_CHANNEL_RESTORE_CONSOLE = 1,
|
||||
};
|
||||
|
||||
NvBool nvkms_test_fail_alloc_core_channel(enum FailAllocCoreChannelMethod method);
|
||||
NvBool nvkms_output_rounding_fix(void);
|
||||
NvBool nvkms_disable_hdmi_frl(void);
|
||||
NvBool nvkms_disable_vrr_memclk_switch(void);
|
||||
@ -111,6 +117,7 @@ NvBool nvkms_hdmi_deepcolor(void);
|
||||
NvBool nvkms_vblank_sem_control(void);
|
||||
NvBool nvkms_opportunistic_display_sync(void);
|
||||
enum NvKmsDebugForceColorSpace nvkms_debug_force_color_space(void);
|
||||
NvBool nvkms_enable_overlay_layers(void);
|
||||
|
||||
void nvkms_call_rm (void *ops);
|
||||
void* nvkms_alloc (size_t size,
|
||||
|
@ -40,9 +40,6 @@ NV_KERNEL_MODULE_TARGETS += $(NVIDIA_MODESET_KO)
|
||||
NVIDIA_MODESET_BINARY_OBJECT := $(src)/nvidia-modeset/nv-modeset-kernel.o_binary
|
||||
NVIDIA_MODESET_BINARY_OBJECT_O := nvidia-modeset/nv-modeset-kernel.o
|
||||
|
||||
quiet_cmd_symlink = SYMLINK $@
|
||||
cmd_symlink = ln -sf $< $@
|
||||
|
||||
targets += $(NVIDIA_MODESET_BINARY_OBJECT_O)
|
||||
|
||||
$(obj)/$(NVIDIA_MODESET_BINARY_OBJECT_O): $(NVIDIA_MODESET_BINARY_OBJECT) FORCE
|
||||
|
@ -189,6 +189,12 @@ int nvidia_p2p_get_pages( uint64_t p2p_token, uint32_t va_space,
|
||||
struct nvidia_p2p_page_table **page_table,
|
||||
void (*free_callback)(void *data), void *data);
|
||||
|
||||
/*
|
||||
* Flags to be used with persistent APIs
|
||||
*/
|
||||
#define NVIDIA_P2P_FLAGS_DEFAULT 0
|
||||
#define NVIDIA_P2P_FLAGS_FORCE_BAR1_MAPPING 1
|
||||
|
||||
/*
|
||||
* @brief
|
||||
* Pin and make the pages underlying a range of GPU virtual memory
|
||||
@ -212,7 +218,11 @@ int nvidia_p2p_get_pages( uint64_t p2p_token, uint32_t va_space,
|
||||
* @param[out] page_table
|
||||
* A pointer to an array of structures with P2P PTEs.
|
||||
* @param[in] flags
|
||||
* Must be set to zero for now.
|
||||
* NVIDIA_P2P_FLAGS_DEFAULT:
|
||||
* Default value to be used if no specific behavior is expected.
|
||||
* NVIDIA_P2P_FLAGS_FORCE_BAR1_MAPPING:
|
||||
* Force BAR1 mappings on certain coherent platforms,
|
||||
* subject to capability and supported topology.
|
||||
*
|
||||
* @return
|
||||
* 0 upon successful completion.
|
||||
|
@ -1,30 +1,25 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
|
||||
// AUTO GENERATED -- DO NOT EDIT - this file automatically generated by refhdr2class.pl
|
||||
// Command: ../../../bin/manuals/refhdr2class.pl clc365.h c365 ACCESS_COUNTER_NOTIFY_BUFFER --search_str=NV_ACCESS_COUNTER --input_file=nv_ref_dev_access_counter.h
|
||||
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef _clc365_h_
|
||||
#define _clc365_h_
|
||||
|
@ -1,30 +1,25 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
|
||||
// AUTO GENERATED -- DO NOT EDIT - this file automatically generated by refhdr2class.pl
|
||||
// Command: ../../../bin/manuals/refhdr2class.pl clc369.h c369 MMU_FAULT_BUFFER --search_str=NV_MMU_FAULT --input_file=nv_ref_dev_mmu_fault.h
|
||||
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef _clc369_h_
|
||||
#define _clc369_h_
|
||||
|
@ -1,26 +1,25 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2012-2015 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef _clc36f_h_
|
||||
#define _clc36f_h_
|
||||
@ -257,7 +256,6 @@ typedef volatile struct Nvc36fControl_struct {
|
||||
#define NVC36F_CLEAR_FAULTED_TYPE 31:31
|
||||
#define NVC36F_CLEAR_FAULTED_TYPE_PBDMA_FAULTED 0x00000000
|
||||
#define NVC36F_CLEAR_FAULTED_TYPE_ENG_FAULTED 0x00000001
|
||||
#define NVC36F_QUADRO_VERIFY (0x000000a0)
|
||||
|
||||
|
||||
/* GPFIFO entry format */
|
||||
|
@ -1,26 +1,25 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2012-2015 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef _clc46f_h_
|
||||
#define _clc46f_h_
|
||||
@ -259,7 +258,6 @@ typedef volatile struct Nvc46fControl_struct {
|
||||
#define NVC46F_CLEAR_FAULTED_TYPE 31:31
|
||||
#define NVC46F_CLEAR_FAULTED_TYPE_PBDMA_FAULTED 0x00000000
|
||||
#define NVC46F_CLEAR_FAULTED_TYPE_ENG_FAULTED 0x00000001
|
||||
#define NVC46F_QUADRO_VERIFY (0x000000a0)
|
||||
|
||||
|
||||
/* GPFIFO entry format */
|
||||
|
@ -1,26 +1,25 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2012-2015 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef _clc56f_h_
|
||||
#define _clc56f_h_
|
||||
@ -261,7 +260,6 @@ typedef volatile struct Nvc56fControl_struct {
|
||||
#define NVC56F_CLEAR_FAULTED_TYPE 31:31
|
||||
#define NVC56F_CLEAR_FAULTED_TYPE_PBDMA_FAULTED 0x00000000
|
||||
#define NVC56F_CLEAR_FAULTED_TYPE_ENG_FAULTED 0x00000001
|
||||
#define NVC56F_QUADRO_VERIFY (0x000000a0)
|
||||
|
||||
|
||||
/* GPFIFO entry format */
|
||||
|
@ -1,19 +1,19 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 1993-2004 NVIDIA Corporation
|
||||
Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
Permission is hereby granted, free of charge, to any person obtaining a
|
||||
copy of this software and associated documentation files (the "Software"),
|
||||
to deal in the Software without restriction, including without limitation
|
||||
the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
and/or sell copies of the Software, and to permit persons to whom the
|
||||
Software is furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
@ -21,8 +21,6 @@
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
|
||||
|
||||
#include "nvtypes.h"
|
||||
|
||||
#ifndef _clc5b5_h_
|
||||
@ -34,64 +32,6 @@ extern "C" {
|
||||
|
||||
#define TURING_DMA_COPY_A (0x0000C5B5)
|
||||
|
||||
typedef volatile struct _clc5b5_tag0 {
|
||||
NvV32 Reserved00[0x40];
|
||||
NvV32 Nop; // 0x00000100 - 0x00000103
|
||||
NvV32 Reserved01[0xF];
|
||||
NvV32 PmTrigger; // 0x00000140 - 0x00000143
|
||||
NvV32 Reserved02[0x3F];
|
||||
NvV32 SetSemaphoreA; // 0x00000240 - 0x00000243
|
||||
NvV32 SetSemaphoreB; // 0x00000244 - 0x00000247
|
||||
NvV32 SetSemaphorePayload; // 0x00000248 - 0x0000024B
|
||||
NvV32 Reserved03[0x2];
|
||||
NvV32 SetRenderEnableA; // 0x00000254 - 0x00000257
|
||||
NvV32 SetRenderEnableB; // 0x00000258 - 0x0000025B
|
||||
NvV32 SetRenderEnableC; // 0x0000025C - 0x0000025F
|
||||
NvV32 SetSrcPhysMode; // 0x00000260 - 0x00000263
|
||||
NvV32 SetDstPhysMode; // 0x00000264 - 0x00000267
|
||||
NvV32 Reserved04[0x6];
|
||||
NvV32 SetGlobalCounterUpper; // 0x00000280 - 0x00000283
|
||||
NvV32 SetGlobalCounterLower; // 0x00000284 - 0x00000287
|
||||
NvV32 SetPageoutStartPAUpper; // 0x00000288 - 0x0000028B
|
||||
NvV32 SetPageoutStartPALower; // 0x0000028C - 0x0000028F
|
||||
NvV32 Reserved05[0x1C];
|
||||
NvV32 LaunchDma; // 0x00000300 - 0x00000303
|
||||
NvV32 Reserved06[0x3F];
|
||||
NvV32 OffsetInUpper; // 0x00000400 - 0x00000403
|
||||
NvV32 OffsetInLower; // 0x00000404 - 0x00000407
|
||||
NvV32 OffsetOutUpper; // 0x00000408 - 0x0000040B
|
||||
NvV32 OffsetOutLower; // 0x0000040C - 0x0000040F
|
||||
NvV32 PitchIn; // 0x00000410 - 0x00000413
|
||||
NvV32 PitchOut; // 0x00000414 - 0x00000417
|
||||
NvV32 LineLengthIn; // 0x00000418 - 0x0000041B
|
||||
NvV32 LineCount; // 0x0000041C - 0x0000041F
|
||||
NvV32 Reserved07[0xB8];
|
||||
NvV32 SetRemapConstA; // 0x00000700 - 0x00000703
|
||||
NvV32 SetRemapConstB; // 0x00000704 - 0x00000707
|
||||
NvV32 SetRemapComponents; // 0x00000708 - 0x0000070B
|
||||
NvV32 SetDstBlockSize; // 0x0000070C - 0x0000070F
|
||||
NvV32 SetDstWidth; // 0x00000710 - 0x00000713
|
||||
NvV32 SetDstHeight; // 0x00000714 - 0x00000717
|
||||
NvV32 SetDstDepth; // 0x00000718 - 0x0000071B
|
||||
NvV32 SetDstLayer; // 0x0000071C - 0x0000071F
|
||||
NvV32 SetDstOrigin; // 0x00000720 - 0x00000723
|
||||
NvV32 Reserved08[0x1];
|
||||
NvV32 SetSrcBlockSize; // 0x00000728 - 0x0000072B
|
||||
NvV32 SetSrcWidth; // 0x0000072C - 0x0000072F
|
||||
NvV32 SetSrcHeight; // 0x00000730 - 0x00000733
|
||||
NvV32 SetSrcDepth; // 0x00000734 - 0x00000737
|
||||
NvV32 SetSrcLayer; // 0x00000738 - 0x0000073B
|
||||
NvV32 SetSrcOrigin; // 0x0000073C - 0x0000073F
|
||||
NvV32 Reserved09[0x1];
|
||||
NvV32 SrcOriginX; // 0x00000744 - 0x00000747
|
||||
NvV32 SrcOriginY; // 0x00000748 - 0x0000074B
|
||||
NvV32 DstOriginX; // 0x0000074C - 0x0000074F
|
||||
NvV32 DstOriginY; // 0x00000750 - 0x00000753
|
||||
NvV32 Reserved10[0x270];
|
||||
NvV32 PmTriggerEnd; // 0x00001114 - 0x00001117
|
||||
NvV32 Reserved11[0x3BA];
|
||||
} turing_dma_copy_aControlPio;
|
||||
|
||||
#define NVC5B5_NOP (0x00000100)
|
||||
#define NVC5B5_NOP_PARAMETER 31:0
|
||||
#define NVC5B5_PM_TRIGGER (0x00000140)
|
||||
@ -125,14 +65,6 @@ typedef volatile struct _clc5b5_tag0 {
|
||||
#define NVC5B5_SET_DST_PHYS_MODE_TARGET_COHERENT_SYSMEM (0x00000001)
|
||||
#define NVC5B5_SET_DST_PHYS_MODE_TARGET_NONCOHERENT_SYSMEM (0x00000002)
|
||||
#define NVC5B5_SET_DST_PHYS_MODE_BASIC_KIND 5:2
|
||||
#define NVC5B5_SET_GLOBAL_COUNTER_UPPER (0x00000280)
|
||||
#define NVC5B5_SET_GLOBAL_COUNTER_UPPER_V 31:0
|
||||
#define NVC5B5_SET_GLOBAL_COUNTER_LOWER (0x00000284)
|
||||
#define NVC5B5_SET_GLOBAL_COUNTER_LOWER_V 31:0
|
||||
#define NVC5B5_SET_PAGEOUT_START_PAUPPER (0x00000288)
|
||||
#define NVC5B5_SET_PAGEOUT_START_PAUPPER_V 4:0
|
||||
#define NVC5B5_SET_PAGEOUT_START_PALOWER (0x0000028C)
|
||||
#define NVC5B5_SET_PAGEOUT_START_PALOWER_V 31:0
|
||||
#define NVC5B5_LAUNCH_DMA (0x00000300)
|
||||
#define NVC5B5_LAUNCH_DMA_DATA_TRANSFER_TYPE 1:0
|
||||
#define NVC5B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_NONE (0x00000000)
|
||||
@ -199,8 +131,6 @@ typedef volatile struct _clc5b5_tag0 {
|
||||
#define NVC5B5_LAUNCH_DMA_VPRMODE 23:22
|
||||
#define NVC5B5_LAUNCH_DMA_VPRMODE_VPR_NONE (0x00000000)
|
||||
#define NVC5B5_LAUNCH_DMA_VPRMODE_VPR_VID2VID (0x00000001)
|
||||
#define NVC5B5_LAUNCH_DMA_VPRMODE_VPR_VID2SYS (0x00000002)
|
||||
#define NVC5B5_LAUNCH_DMA_VPRMODE_VPR_SYS2VID (0x00000003)
|
||||
#define NVC5B5_LAUNCH_DMA_RESERVED_START_OF_COPY 24:24
|
||||
#define NVC5B5_LAUNCH_DMA_DISABLE_PLC 26:26
|
||||
#define NVC5B5_LAUNCH_DMA_DISABLE_PLC_FALSE (0x00000000)
|
||||
|
@ -1,19 +1,19 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 1993-2004 NVIDIA Corporation
|
||||
Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
Permission is hereby granted, free of charge, to any person obtaining a
|
||||
copy of this software and associated documentation files (the "Software"),
|
||||
to deal in the Software without restriction, including without limitation
|
||||
the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
and/or sell copies of the Software, and to permit persons to whom the
|
||||
Software is furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
@ -21,8 +21,6 @@
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
|
||||
|
||||
#include "nvtypes.h"
|
||||
|
||||
#ifndef _clc6b5_h_
|
||||
@ -34,64 +32,6 @@ extern "C" {
|
||||
|
||||
#define AMPERE_DMA_COPY_A (0x0000C6B5)
|
||||
|
||||
typedef volatile struct _clc6b5_tag0 {
|
||||
NvV32 Reserved00[0x40];
|
||||
NvV32 Nop; // 0x00000100 - 0x00000103
|
||||
NvV32 Reserved01[0xF];
|
||||
NvV32 PmTrigger; // 0x00000140 - 0x00000143
|
||||
NvV32 Reserved02[0x3F];
|
||||
NvV32 SetSemaphoreA; // 0x00000240 - 0x00000243
|
||||
NvV32 SetSemaphoreB; // 0x00000244 - 0x00000247
|
||||
NvV32 SetSemaphorePayload; // 0x00000248 - 0x0000024B
|
||||
NvV32 Reserved03[0x2];
|
||||
NvV32 SetRenderEnableA; // 0x00000254 - 0x00000257
|
||||
NvV32 SetRenderEnableB; // 0x00000258 - 0x0000025B
|
||||
NvV32 SetRenderEnableC; // 0x0000025C - 0x0000025F
|
||||
NvV32 SetSrcPhysMode; // 0x00000260 - 0x00000263
|
||||
NvV32 SetDstPhysMode; // 0x00000264 - 0x00000267
|
||||
NvV32 Reserved04[0x6];
|
||||
NvV32 SetGlobalCounterUpper; // 0x00000280 - 0x00000283
|
||||
NvV32 SetGlobalCounterLower; // 0x00000284 - 0x00000287
|
||||
NvV32 SetPageoutStartPAUpper; // 0x00000288 - 0x0000028B
|
||||
NvV32 SetPageoutStartPALower; // 0x0000028C - 0x0000028F
|
||||
NvV32 Reserved05[0x1C];
|
||||
NvV32 LaunchDma; // 0x00000300 - 0x00000303
|
||||
NvV32 Reserved06[0x3F];
|
||||
NvV32 OffsetInUpper; // 0x00000400 - 0x00000403
|
||||
NvV32 OffsetInLower; // 0x00000404 - 0x00000407
|
||||
NvV32 OffsetOutUpper; // 0x00000408 - 0x0000040B
|
||||
NvV32 OffsetOutLower; // 0x0000040C - 0x0000040F
|
||||
NvV32 PitchIn; // 0x00000410 - 0x00000413
|
||||
NvV32 PitchOut; // 0x00000414 - 0x00000417
|
||||
NvV32 LineLengthIn; // 0x00000418 - 0x0000041B
|
||||
NvV32 LineCount; // 0x0000041C - 0x0000041F
|
||||
NvV32 Reserved07[0xB8];
|
||||
NvV32 SetRemapConstA; // 0x00000700 - 0x00000703
|
||||
NvV32 SetRemapConstB; // 0x00000704 - 0x00000707
|
||||
NvV32 SetRemapComponents; // 0x00000708 - 0x0000070B
|
||||
NvV32 SetDstBlockSize; // 0x0000070C - 0x0000070F
|
||||
NvV32 SetDstWidth; // 0x00000710 - 0x00000713
|
||||
NvV32 SetDstHeight; // 0x00000714 - 0x00000717
|
||||
NvV32 SetDstDepth; // 0x00000718 - 0x0000071B
|
||||
NvV32 SetDstLayer; // 0x0000071C - 0x0000071F
|
||||
NvV32 SetDstOrigin; // 0x00000720 - 0x00000723
|
||||
NvV32 Reserved08[0x1];
|
||||
NvV32 SetSrcBlockSize; // 0x00000728 - 0x0000072B
|
||||
NvV32 SetSrcWidth; // 0x0000072C - 0x0000072F
|
||||
NvV32 SetSrcHeight; // 0x00000730 - 0x00000733
|
||||
NvV32 SetSrcDepth; // 0x00000734 - 0x00000737
|
||||
NvV32 SetSrcLayer; // 0x00000738 - 0x0000073B
|
||||
NvV32 SetSrcOrigin; // 0x0000073C - 0x0000073F
|
||||
NvV32 Reserved09[0x1];
|
||||
NvV32 SrcOriginX; // 0x00000744 - 0x00000747
|
||||
NvV32 SrcOriginY; // 0x00000748 - 0x0000074B
|
||||
NvV32 DstOriginX; // 0x0000074C - 0x0000074F
|
||||
NvV32 DstOriginY; // 0x00000750 - 0x00000753
|
||||
NvV32 Reserved10[0x270];
|
||||
NvV32 PmTriggerEnd; // 0x00001114 - 0x00001117
|
||||
NvV32 Reserved11[0x3BA];
|
||||
} ampere_dma_copy_aControlPio;
|
||||
|
||||
#define NVC6B5_NOP (0x00000100)
|
||||
#define NVC6B5_NOP_PARAMETER 31:0
|
||||
#define NVC6B5_PM_TRIGGER (0x00000140)
|
||||
@ -131,14 +71,6 @@ typedef volatile struct _clc6b5_tag0 {
|
||||
#define NVC6B5_SET_DST_PHYS_MODE_BASIC_KIND 5:2
|
||||
#define NVC6B5_SET_DST_PHYS_MODE_PEER_ID 8:6
|
||||
#define NVC6B5_SET_DST_PHYS_MODE_FLA 9:9
|
||||
#define NVC6B5_SET_GLOBAL_COUNTER_UPPER (0x00000280)
|
||||
#define NVC6B5_SET_GLOBAL_COUNTER_UPPER_V 31:0
|
||||
#define NVC6B5_SET_GLOBAL_COUNTER_LOWER (0x00000284)
|
||||
#define NVC6B5_SET_GLOBAL_COUNTER_LOWER_V 31:0
|
||||
#define NVC6B5_SET_PAGEOUT_START_PAUPPER (0x00000288)
|
||||
#define NVC6B5_SET_PAGEOUT_START_PAUPPER_V 4:0
|
||||
#define NVC6B5_SET_PAGEOUT_START_PALOWER (0x0000028C)
|
||||
#define NVC6B5_SET_PAGEOUT_START_PALOWER_V 31:0
|
||||
#define NVC6B5_LAUNCH_DMA (0x00000300)
|
||||
#define NVC6B5_LAUNCH_DMA_DATA_TRANSFER_TYPE 1:0
|
||||
#define NVC6B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_NONE (0x00000000)
|
||||
@ -199,8 +131,6 @@ typedef volatile struct _clc6b5_tag0 {
|
||||
#define NVC6B5_LAUNCH_DMA_VPRMODE 23:22
|
||||
#define NVC6B5_LAUNCH_DMA_VPRMODE_VPR_NONE (0x00000000)
|
||||
#define NVC6B5_LAUNCH_DMA_VPRMODE_VPR_VID2VID (0x00000001)
|
||||
#define NVC6B5_LAUNCH_DMA_VPRMODE_VPR_VID2SYS (0x00000002)
|
||||
#define NVC6B5_LAUNCH_DMA_VPRMODE_VPR_SYS2VID (0x00000003)
|
||||
#define NVC6B5_LAUNCH_DMA_RESERVED_START_OF_COPY 24:24
|
||||
#define NVC6B5_LAUNCH_DMA_DISABLE_PLC 26:26
|
||||
#define NVC6B5_LAUNCH_DMA_DISABLE_PLC_FALSE (0x00000000)
|
||||
|
@ -1,19 +1,19 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 1993-2004 NVIDIA Corporation
|
||||
Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
Permission is hereby granted, free of charge, to any person obtaining a
|
||||
copy of this software and associated documentation files (the "Software"),
|
||||
to deal in the Software without restriction, including without limitation
|
||||
the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
and/or sell copies of the Software, and to permit persons to whom the
|
||||
Software is furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
@ -21,8 +21,6 @@
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
|
||||
|
||||
#include "nvtypes.h"
|
||||
|
||||
#ifndef _clc7b5_h_
|
||||
@ -34,69 +32,6 @@ extern "C" {
|
||||
|
||||
#define AMPERE_DMA_COPY_B (0x0000C7B5)
|
||||
|
||||
typedef volatile struct _clc7b5_tag0 {
|
||||
NvV32 Reserved00[0x40];
|
||||
NvV32 Nop; // 0x00000100 - 0x00000103
|
||||
NvV32 Reserved01[0xF];
|
||||
NvV32 PmTrigger; // 0x00000140 - 0x00000143
|
||||
NvV32 Reserved02[0x36];
|
||||
NvV32 SetMonitoredFenceType; // 0x0000021C - 0x0000021F
|
||||
NvV32 SetMonitoredFenceSignalAddrBaseUpper; // 0x00000220 - 0x00000223
|
||||
NvV32 SetMonitoredFenceSignalAddrBaseLower; // 0x00000224 - 0x00000227
|
||||
NvV32 Reserved03[0x6];
|
||||
NvV32 SetSemaphoreA; // 0x00000240 - 0x00000243
|
||||
NvV32 SetSemaphoreB; // 0x00000244 - 0x00000247
|
||||
NvV32 SetSemaphorePayload; // 0x00000248 - 0x0000024B
|
||||
NvV32 SetSemaphorePayloadUpper; // 0x0000024C - 0x0000024F
|
||||
NvV32 Reserved04[0x1];
|
||||
NvV32 SetRenderEnableA; // 0x00000254 - 0x00000257
|
||||
NvV32 SetRenderEnableB; // 0x00000258 - 0x0000025B
|
||||
NvV32 SetRenderEnableC; // 0x0000025C - 0x0000025F
|
||||
NvV32 SetSrcPhysMode; // 0x00000260 - 0x00000263
|
||||
NvV32 SetDstPhysMode; // 0x00000264 - 0x00000267
|
||||
NvV32 Reserved05[0x6];
|
||||
NvV32 SetGlobalCounterUpper; // 0x00000280 - 0x00000283
|
||||
NvV32 SetGlobalCounterLower; // 0x00000284 - 0x00000287
|
||||
NvV32 SetPageoutStartPAUpper; // 0x00000288 - 0x0000028B
|
||||
NvV32 SetPageoutStartPALower; // 0x0000028C - 0x0000028F
|
||||
NvV32 Reserved06[0x1C];
|
||||
NvV32 LaunchDma; // 0x00000300 - 0x00000303
|
||||
NvV32 Reserved07[0x3F];
|
||||
NvV32 OffsetInUpper; // 0x00000400 - 0x00000403
|
||||
NvV32 OffsetInLower; // 0x00000404 - 0x00000407
|
||||
NvV32 OffsetOutUpper; // 0x00000408 - 0x0000040B
|
||||
NvV32 OffsetOutLower; // 0x0000040C - 0x0000040F
|
||||
NvV32 PitchIn; // 0x00000410 - 0x00000413
|
||||
NvV32 PitchOut; // 0x00000414 - 0x00000417
|
||||
NvV32 LineLengthIn; // 0x00000418 - 0x0000041B
|
||||
NvV32 LineCount; // 0x0000041C - 0x0000041F
|
||||
NvV32 Reserved08[0xB8];
|
||||
NvV32 SetRemapConstA; // 0x00000700 - 0x00000703
|
||||
NvV32 SetRemapConstB; // 0x00000704 - 0x00000707
|
||||
NvV32 SetRemapComponents; // 0x00000708 - 0x0000070B
|
||||
NvV32 SetDstBlockSize; // 0x0000070C - 0x0000070F
|
||||
NvV32 SetDstWidth; // 0x00000710 - 0x00000713
|
||||
NvV32 SetDstHeight; // 0x00000714 - 0x00000717
|
||||
NvV32 SetDstDepth; // 0x00000718 - 0x0000071B
|
||||
NvV32 SetDstLayer; // 0x0000071C - 0x0000071F
|
||||
NvV32 SetDstOrigin; // 0x00000720 - 0x00000723
|
||||
NvV32 Reserved09[0x1];
|
||||
NvV32 SetSrcBlockSize; // 0x00000728 - 0x0000072B
|
||||
NvV32 SetSrcWidth; // 0x0000072C - 0x0000072F
|
||||
NvV32 SetSrcHeight; // 0x00000730 - 0x00000733
|
||||
NvV32 SetSrcDepth; // 0x00000734 - 0x00000737
|
||||
NvV32 SetSrcLayer; // 0x00000738 - 0x0000073B
|
||||
NvV32 SetSrcOrigin; // 0x0000073C - 0x0000073F
|
||||
NvV32 Reserved10[0x1];
|
||||
NvV32 SrcOriginX; // 0x00000744 - 0x00000747
|
||||
NvV32 SrcOriginY; // 0x00000748 - 0x0000074B
|
||||
NvV32 DstOriginX; // 0x0000074C - 0x0000074F
|
||||
NvV32 DstOriginY; // 0x00000750 - 0x00000753
|
||||
NvV32 Reserved11[0x270];
|
||||
NvV32 PmTriggerEnd; // 0x00001114 - 0x00001117
|
||||
NvV32 Reserved12[0x3BA];
|
||||
} ampere_dma_copy_bControlPio;
|
||||
|
||||
#define NVC7B5_NOP (0x00000100)
|
||||
#define NVC7B5_NOP_PARAMETER 31:0
|
||||
#define NVC7B5_PM_TRIGGER (0x00000140)
|
||||
@ -146,14 +81,6 @@ typedef volatile struct _clc7b5_tag0 {
|
||||
#define NVC7B5_SET_DST_PHYS_MODE_BASIC_KIND 5:2
|
||||
#define NVC7B5_SET_DST_PHYS_MODE_PEER_ID 8:6
|
||||
#define NVC7B5_SET_DST_PHYS_MODE_FLA 9:9
|
||||
#define NVC7B5_SET_GLOBAL_COUNTER_UPPER (0x00000280)
|
||||
#define NVC7B5_SET_GLOBAL_COUNTER_UPPER_V 31:0
|
||||
#define NVC7B5_SET_GLOBAL_COUNTER_LOWER (0x00000284)
|
||||
#define NVC7B5_SET_GLOBAL_COUNTER_LOWER_V 31:0
|
||||
#define NVC7B5_SET_PAGEOUT_START_PAUPPER (0x00000288)
|
||||
#define NVC7B5_SET_PAGEOUT_START_PAUPPER_V 4:0
|
||||
#define NVC7B5_SET_PAGEOUT_START_PALOWER (0x0000028C)
|
||||
#define NVC7B5_SET_PAGEOUT_START_PALOWER_V 31:0
|
||||
#define NVC7B5_LAUNCH_DMA (0x00000300)
|
||||
#define NVC7B5_LAUNCH_DMA_DATA_TRANSFER_TYPE 1:0
|
||||
#define NVC7B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_NONE (0x00000000)
|
||||
@ -223,8 +150,6 @@ typedef volatile struct _clc7b5_tag0 {
|
||||
#define NVC7B5_LAUNCH_DMA_VPRMODE 23:22
|
||||
#define NVC7B5_LAUNCH_DMA_VPRMODE_VPR_NONE (0x00000000)
|
||||
#define NVC7B5_LAUNCH_DMA_VPRMODE_VPR_VID2VID (0x00000001)
|
||||
#define NVC7B5_LAUNCH_DMA_VPRMODE_VPR_VID2SYS (0x00000002)
|
||||
#define NVC7B5_LAUNCH_DMA_VPRMODE_VPR_SYS2VID (0x00000003)
|
||||
#define NVC7B5_LAUNCH_DMA_RESERVED_START_OF_COPY 24:24
|
||||
#define NVC7B5_LAUNCH_DMA_DISABLE_PLC 26:26
|
||||
#define NVC7B5_LAUNCH_DMA_DISABLE_PLC_FALSE (0x00000000)
|
||||
|
74
kernel-open/nvidia-uvm/clca6f.h
Normal file
74
kernel-open/nvidia-uvm/clca6f.h
Normal file
@ -0,0 +1,74 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2003-2023 NVIDIA CORPORATION & AFFILIATES
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef __gb202_clca6f_h__
|
||||
#define __gb202_clca6f_h__
|
||||
|
||||
typedef volatile struct Nvca6fControl_struct {
|
||||
NvU32 Ignored00[0x23]; /* 0000-008b*/
|
||||
NvU32 GPPut; /* GP FIFO put offset 008c-008f*/
|
||||
NvU32 Ignored01[0x5c];
|
||||
} Nvca6fControl, BlackwellBControlGPFifo;
|
||||
|
||||
#define BLACKWELL_CHANNEL_GPFIFO_B (0x0000CA6F)
|
||||
|
||||
#define NVCA6F_SET_OBJECT (0x00000000)
|
||||
#define NVCA6F_SEM_ADDR_LO (0x0000005c)
|
||||
#define NVCA6F_SEM_ADDR_LO_OFFSET 31:2
|
||||
#define NVCA6F_SEM_ADDR_HI (0x00000060)
|
||||
#define NVCA6F_SEM_ADDR_HI_OFFSET 24:0
|
||||
#define NVCA6F_SEM_PAYLOAD_LO (0x00000064)
|
||||
#define NVCA6F_SEM_PAYLOAD_HI (0x00000068)
|
||||
#define NVCA6F_SEM_EXECUTE (0x0000006c)
|
||||
#define NVCA6F_SEM_EXECUTE_OPERATION 2:0
|
||||
#define NVCA6F_SEM_EXECUTE_OPERATION_ACQUIRE 0x00000000
|
||||
#define NVCA6F_SEM_EXECUTE_OPERATION_RELEASE 0x00000001
|
||||
#define NVCA6F_SEM_EXECUTE_RELEASE_WFI 20:20
|
||||
#define NVCA6F_SEM_EXECUTE_RELEASE_WFI_DIS 0x00000000
|
||||
#define NVCA6F_SEM_EXECUTE_PAYLOAD_SIZE 24:24
|
||||
#define NVCA6F_SEM_EXECUTE_PAYLOAD_SIZE_32BIT 0x00000000
|
||||
|
||||
/* GPFIFO entry format */
|
||||
#define NVCA6F_GP_ENTRY__SIZE 8
|
||||
#define NVCA6F_GP_ENTRY0_FETCH 0:0
|
||||
#define NVCA6F_GP_ENTRY0_FETCH_UNCONDITIONAL 0x00000000
|
||||
#define NVCA6F_GP_ENTRY0_FETCH_CONDITIONAL 0x00000001
|
||||
#define NVCA6F_GP_ENTRY0_GET 31:2
|
||||
#define NVCA6F_GP_ENTRY0_OPERAND 31:0
|
||||
#define NVCA6F_GP_ENTRY0_PB_EXTENDED_BASE_OPERAND 24:8
|
||||
#define NVCA6F_GP_ENTRY1_GET_HI 7:0
|
||||
#define NVCA6F_GP_ENTRY1_LEVEL 9:9
|
||||
#define NVCA6F_GP_ENTRY1_LEVEL_MAIN 0x00000000
|
||||
#define NVCA6F_GP_ENTRY1_LEVEL_SUBROUTINE 0x00000001
|
||||
#define NVCA6F_GP_ENTRY1_LENGTH 30:10
|
||||
#define NVCA6F_GP_ENTRY1_SYNC 31:31
|
||||
#define NVCA6F_GP_ENTRY1_SYNC_PROCEED 0x00000000
|
||||
#define NVCA6F_GP_ENTRY1_SYNC_WAIT 0x00000001
|
||||
#define NVCA6F_GP_ENTRY1_OPCODE 7:0
|
||||
#define NVCA6F_GP_ENTRY1_OPCODE_NOP 0x00000000
|
||||
#define NVCA6F_GP_ENTRY1_OPCODE_ILLEGAL 0x00000001
|
||||
#define NVCA6F_GP_ENTRY1_OPCODE_GP_CRC 0x00000002
|
||||
#define NVCA6F_GP_ENTRY1_OPCODE_PB_CRC 0x00000003
|
||||
#define NVCA6F_GP_ENTRY1_OPCODE_SET_PB_SEGMENT_EXTENDED_BASE 0x00000004
|
||||
|
||||
#endif // __gb202_clca6f_h__
|
44
kernel-open/nvidia-uvm/clcab5.h
Normal file
44
kernel-open/nvidia-uvm/clcab5.h
Normal file
@ -0,0 +1,44 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef _clcab5_h_
|
||||
#define _clcab5_h_
|
||||
|
||||
#define BLACKWELL_DMA_COPY_B (0x0000CAB5)
|
||||
|
||||
#define NVCAB5_LAUNCH_DMA (0x00000300)
|
||||
#define NVCAB5_LAUNCH_DMA_DATA_TRANSFER_TYPE 1:0
|
||||
#define NVCAB5_LAUNCH_DMA_DATA_TRANSFER_TYPE_NONE (0x00000000)
|
||||
#define NVCAB5_LAUNCH_DMA_DATA_TRANSFER_TYPE_PIPELINED (0x00000001)
|
||||
#define NVCAB5_LAUNCH_DMA_DATA_TRANSFER_TYPE_NON_PIPELINED (0x00000002)
|
||||
#define NVCAB5_LAUNCH_DMA_DATA_TRANSFER_TYPE_PREFETCH (0x00000003)
|
||||
|
||||
#define NVCAB5_REQ_ATTR (0x00000754)
|
||||
#define NVCAB5_REQ_ATTR_PREFETCH_L2_CLASS 1:0
|
||||
#define NVCAB5_REQ_ATTR_PREFETCH_L2_CLASS_EVICT_FIRST (0x00000000)
|
||||
#define NVCAB5_REQ_ATTR_PREFETCH_L2_CLASS_EVICT_NORMAL (0x00000001)
|
||||
#define NVCAB5_REQ_ATTR_PREFETCH_L2_CLASS_EVICT_LAST (0x00000002)
|
||||
#define NVCAB5_REQ_ATTR_PREFETCH_L2_CLASS_EVICT_DEMOTE (0x00000003)
|
||||
|
||||
#endif /* _clcab5_h_ */
|
||||
|
@ -1,25 +1,25 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2021-2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "nvtypes.h"
|
||||
|
||||
@ -32,6 +32,28 @@ extern "C" {
|
||||
|
||||
#define HOPPER_SEC2_WORK_LAUNCH_A (0x0000CBA2)
|
||||
|
||||
typedef volatile struct _clcba2_tag0 {
|
||||
NvV32 Reserved00[0x100];
|
||||
NvV32 DecryptCopySrcAddrHi; // 0x00000400 - 0x00000403
|
||||
NvV32 DecryptCopySrcAddrLo; // 0x00000404 - 0x00000407
|
||||
NvV32 DecryptCopyDstAddrHi; // 0x00000408 - 0x0000040B
|
||||
NvV32 DecryptCopyDstAddrLo; // 0x0000040c - 0x0000040F
|
||||
NvU32 DecryptCopySize; // 0x00000410 - 0x00000413
|
||||
NvU32 DecryptCopyAuthTagAddrHi; // 0x00000414 - 0x00000417
|
||||
NvU32 DecryptCopyAuthTagAddrLo; // 0x00000418 - 0x0000041B
|
||||
NvV32 DigestAddrHi; // 0x0000041C - 0x0000041F
|
||||
NvV32 DigestAddrLo; // 0x00000420 - 0x00000423
|
||||
NvV32 Reserved01[0x7];
|
||||
NvV32 SemaphoreA; // 0x00000440 - 0x00000443
|
||||
NvV32 SemaphoreB; // 0x00000444 - 0x00000447
|
||||
NvV32 SemaphoreSetPayloadLower; // 0x00000448 - 0x0000044B
|
||||
NvV32 SemaphoreSetPayloadUppper; // 0x0000044C - 0x0000044F
|
||||
NvV32 SemaphoreD; // 0x00000450 - 0x00000453
|
||||
NvU32 Reserved02[0x7];
|
||||
NvV32 Execute; // 0x00000470 - 0x00000473
|
||||
NvV32 Reserved03[0x23];
|
||||
} NVCBA2_HOPPER_SEC2_WORK_LAUNCH_AControlPio;
|
||||
|
||||
#define NVCBA2_DECRYPT_COPY_SRC_ADDR_HI (0x00000400)
|
||||
#define NVCBA2_DECRYPT_COPY_SRC_ADDR_HI_DATA 24:0
|
||||
#define NVCBA2_DECRYPT_COPY_SRC_ADDR_LO (0x00000404)
|
||||
@ -90,6 +112,45 @@ extern "C" {
|
||||
#define NVCBA2_EXECUTE_TIMESTAMP 5:5
|
||||
#define NVCBA2_EXECUTE_TIMESTAMP_DISABLE (0x00000000)
|
||||
#define NVCBA2_EXECUTE_TIMESTAMP_ENABLE (0x00000001)
|
||||
#define NVCBA2_EXECUTE_PHYSICAL_SCRUBBER 6:6
|
||||
#define NVCBA2_EXECUTE_PHYSICAL_SCRUBBER_DISABLE (0x00000000)
|
||||
#define NVCBA2_EXECUTE_PHYSICAL_SCRUBBER_ENABLE (0x00000001)
|
||||
|
||||
// Class definitions
|
||||
#define NVCBA2_DECRYPT_COPY_SIZE_MAX_BYTES (2*1024*1024)
|
||||
#define NVCBA2_DECRYPT_SCRUB_SIZE_MAX_BYTES (1024*1024*1024)
|
||||
|
||||
// Errors
|
||||
#define NVCBA2_ERROR_NONE (0x00000000)
|
||||
#define NVCBA2_ERROR_DECRYPT_COPY_SRC_ADDR_MISALIGNED_POINTER (0x00000001)
|
||||
#define NVCBA2_ERROR_DECRYPT_COPY_DEST_ADDR_MISALIGNED_POINTER (0x00000002)
|
||||
#define NVCBA2_ERROR_DECRYPT_COPY_AUTH_TAG_ADDR_MISALIGNED_POINTER (0x00000003)
|
||||
#define NVCBA2_ERROR_DECRYPT_COPY_DMA_NACK (0x00000004)
|
||||
#define NVCBA2_ERROR_DECRYPT_COPY_AUTH_TAG_MISMATCH (0x00000005)
|
||||
#define NVCBA2_ERROR_METHOD_STREAM_AUTH_TAG_ADDR_MISALIGNED_POINTER (0x00000006)
|
||||
#define NVCBA2_ERROR_METHOD_STREAM_AUTH_TAG_ADDR_DMA_NACK (0x00000007)
|
||||
#define NVCBA2_ERROR_METHOD_STREAM_AUTH_TAG_CHECK_FAILURE (0x00000008)
|
||||
#define NVCBA2_ERROR_MISALIGNED_SIZE (0x00000009)
|
||||
#define NVCBA2_ERROR_MISSING_METHODS (0x0000000A)
|
||||
#define NVCBA2_ERROR_SEMAPHORE_RELEASE_DMA_NACK (0x0000000B)
|
||||
#define NVCBA2_ERROR_DECRYPT_SIZE_MAX_EXCEEDED (0x0000000C)
|
||||
#define NVCBA2_ERROR_OS_APPLICATION (0x0000000D)
|
||||
#define NVCBA2_ERROR_INVALID_CTXSW_REQUEST (0x0000000E)
|
||||
#define NVCBA2_ERROR_BUFFER_OVERFLOW (0x0000000F)
|
||||
#define NVCBA2_ERROR_IV_OVERFLOW (0x00000010)
|
||||
#define NVCBA2_ERROR_INTERNAL_SETUP_FAILURE (0x00000011)
|
||||
#define NVCBA2_ERROR_DECRYPT_COPY_INTERNAL_DMA_FAILURE (0x00000012)
|
||||
#define NVCBA2_ERROR_METHOD_STREAM_AUTH_TAG_ADDR_INTERNAL_DMA_FAILURE (0x00000013)
|
||||
#define NVCBA2_ERROR_METHOD_STREAM_AUTH_TAG_HMAC_CALC_FAILURE (0x00000014)
|
||||
#define NVCBA2_ERROR_NONCE_OVERFLOW (0x00000015)
|
||||
#define NVCBA2_ERROR_AES_GCM_DECRYPTION_FAILURE (0x00000016)
|
||||
#define NVCBA2_ERROR_SEMAPHORE_RELEASE_INTERNAL_DMA_FAILURE (0x00000017)
|
||||
#define NVCBA2_ERROR_KEY_DERIVATION_FAILURE (0x00000018)
|
||||
#define NVCBA2_ERROR_SCRUBBER_FAILURE (0x00000019)
|
||||
#define NVCBA2_ERROR_SCRUBBER_INVALD_ADDRESS (0x0000001a)
|
||||
#define NVCBA2_ERROR_SCRUBBER_INSUFFICIENT_PERMISSIONS (0x0000001b)
|
||||
#define NVCBA2_ERROR_SCRUBBER_MUTEX_ACQUIRE_FAILURE (0x0000001c)
|
||||
#define NVCBA2_ERROR_SCRUB_SIZE_MAX_EXCEEDED (0x0000001d)
|
||||
|
||||
#ifdef __cplusplus
|
||||
}; /* extern "C" */
|
||||
|
@ -35,6 +35,7 @@
|
||||
#define NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GH100 (0x00000180)
|
||||
#define NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_AD100 (0x00000190)
|
||||
#define NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GB100 (0x000001A0)
|
||||
#define NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GB200 (0x000001B0)
|
||||
|
||||
/* valid ARCHITECTURE_GP10x implementation values */
|
||||
#define NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GP100 (0x00000000)
|
||||
|
@ -1,560 +0,0 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2003-2016 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
|
||||
#ifndef __gb100_dev_mmu_h__
|
||||
#define __gb100_dev_mmu_h__
|
||||
/* This file is autogenerated. Do not edit */
|
||||
#define NV_MMU_PDE /* ----G */
|
||||
#define NV_MMU_PDE_APERTURE_BIG (0*32+1):(0*32+0) /* RWXVF */
|
||||
#define NV_MMU_PDE_APERTURE_BIG_INVALID 0x00000000 /* RW--V */
|
||||
#define NV_MMU_PDE_APERTURE_BIG_VIDEO_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_PDE_APERTURE_BIG_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_PDE_APERTURE_BIG_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_PDE_SIZE (0*32+3):(0*32+2) /* RWXVF */
|
||||
#define NV_MMU_PDE_SIZE_FULL 0x00000000 /* RW--V */
|
||||
#define NV_MMU_PDE_SIZE_HALF 0x00000001 /* RW--V */
|
||||
#define NV_MMU_PDE_SIZE_QUARTER 0x00000002 /* RW--V */
|
||||
#define NV_MMU_PDE_SIZE_EIGHTH 0x00000003 /* RW--V */
|
||||
#define NV_MMU_PDE_ADDRESS_BIG_SYS (0*32+31):(0*32+4) /* RWXVF */
|
||||
#define NV_MMU_PDE_ADDRESS_BIG_VID (0*32+31-3):(0*32+4) /* RWXVF */
|
||||
#define NV_MMU_PDE_ADDRESS_BIG_VID_PEER (0*32+31):(0*32+32-3) /* RWXVF */
|
||||
#define NV_MMU_PDE_ADDRESS_BIG_VID_PEER_0 0x00000000 /* RW--V */
|
||||
#define NV_MMU_PDE_APERTURE_SMALL (1*32+1):(1*32+0) /* RWXVF */
|
||||
#define NV_MMU_PDE_APERTURE_SMALL_INVALID 0x00000000 /* RW--V */
|
||||
#define NV_MMU_PDE_APERTURE_SMALL_VIDEO_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_PDE_APERTURE_SMALL_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_PDE_APERTURE_SMALL_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_PDE_VOL_SMALL (1*32+2):(1*32+2) /* RWXVF */
|
||||
#define NV_MMU_PDE_VOL_SMALL_TRUE 0x00000001 /* RW--V */
|
||||
#define NV_MMU_PDE_VOL_SMALL_FALSE 0x00000000 /* RW--V */
|
||||
#define NV_MMU_PDE_VOL_BIG (1*32+3):(1*32+3) /* RWXVF */
|
||||
#define NV_MMU_PDE_VOL_BIG_TRUE 0x00000001 /* RW--V */
|
||||
#define NV_MMU_PDE_VOL_BIG_FALSE 0x00000000 /* RW--V */
|
||||
#define NV_MMU_PDE_ADDRESS_SMALL_SYS (1*32+31):(1*32+4) /* RWXVF */
|
||||
#define NV_MMU_PDE_ADDRESS_SMALL_VID (1*32+31-3):(1*32+4) /* RWXVF */
|
||||
#define NV_MMU_PDE_ADDRESS_SMALL_VID_PEER (1*32+31):(1*32+32-3) /* RWXVF */
|
||||
#define NV_MMU_PDE_ADDRESS_SMALL_VID_PEER_0 0x00000000 /* RW--V */
|
||||
#define NV_MMU_PDE_ADDRESS_SHIFT 0x0000000c /* */
|
||||
#define NV_MMU_PDE__SIZE 8
|
||||
#define NV_MMU_PTE /* ----G */
|
||||
#define NV_MMU_PTE_VALID (0*32+0):(0*32+0) /* RWXVF */
|
||||
#define NV_MMU_PTE_VALID_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_PTE_VALID_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_PTE_PRIVILEGE (0*32+1):(0*32+1) /* RWXVF */
|
||||
#define NV_MMU_PTE_PRIVILEGE_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_PTE_PRIVILEGE_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_PTE_READ_ONLY (0*32+2):(0*32+2) /* RWXVF */
|
||||
#define NV_MMU_PTE_READ_ONLY_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_PTE_READ_ONLY_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_PTE_ENCRYPTED (0*32+3):(0*32+3) /* RWXVF */
|
||||
#define NV_MMU_PTE_ENCRYPTED_TRUE 0x00000001 /* R---V */
|
||||
#define NV_MMU_PTE_ENCRYPTED_FALSE 0x00000000 /* R---V */
|
||||
#define NV_MMU_PTE_ADDRESS_SYS (0*32+31):(0*32+4) /* RWXVF */
|
||||
#define NV_MMU_PTE_ADDRESS_VID (0*32+31-3):(0*32+4) /* RWXVF */
|
||||
#define NV_MMU_PTE_ADDRESS_VID_PEER (0*32+31):(0*32+32-3) /* RWXVF */
|
||||
#define NV_MMU_PTE_ADDRESS_VID_PEER_0 0x00000000 /* RW--V */
|
||||
#define NV_MMU_PTE_ADDRESS_VID_PEER_1 0x00000001 /* RW--V */
|
||||
#define NV_MMU_PTE_ADDRESS_VID_PEER_2 0x00000002 /* RW--V */
|
||||
#define NV_MMU_PTE_ADDRESS_VID_PEER_3 0x00000003 /* RW--V */
|
||||
#define NV_MMU_PTE_ADDRESS_VID_PEER_4 0x00000004 /* RW--V */
|
||||
#define NV_MMU_PTE_ADDRESS_VID_PEER_5 0x00000005 /* RW--V */
|
||||
#define NV_MMU_PTE_ADDRESS_VID_PEER_6 0x00000006 /* RW--V */
|
||||
#define NV_MMU_PTE_ADDRESS_VID_PEER_7 0x00000007 /* RW--V */
|
||||
#define NV_MMU_PTE_VOL (1*32+0):(1*32+0) /* RWXVF */
|
||||
#define NV_MMU_PTE_VOL_TRUE 0x00000001 /* RW--V */
|
||||
#define NV_MMU_PTE_VOL_FALSE 0x00000000 /* RW--V */
|
||||
#define NV_MMU_PTE_APERTURE (1*32+2):(1*32+1) /* RWXVF */
|
||||
#define NV_MMU_PTE_APERTURE_VIDEO_MEMORY 0x00000000 /* RW--V */
|
||||
#define NV_MMU_PTE_APERTURE_PEER_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_PTE_APERTURE_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_PTE_LOCK (1*32+3):(1*32+3) /* RWXVF */
|
||||
#define NV_MMU_PTE_LOCK_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_PTE_LOCK_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_PTE_ATOMIC_DISABLE (1*32+3):(1*32+3) /* RWXVF */
|
||||
#define NV_MMU_PTE_ATOMIC_DISABLE_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_PTE_ATOMIC_DISABLE_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_PTE_COMPTAGLINE (1*32+20+11):(1*32+12) /* RWXVF */
|
||||
#define NV_MMU_PTE_READ_DISABLE (1*32+30):(1*32+30) /* RWXVF */
|
||||
#define NV_MMU_PTE_READ_DISABLE_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_PTE_READ_DISABLE_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_PTE_WRITE_DISABLE (1*32+31):(1*32+31) /* RWXVF */
|
||||
#define NV_MMU_PTE_WRITE_DISABLE_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_PTE_WRITE_DISABLE_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_PTE_ADDRESS_SHIFT 0x0000000c /* */
|
||||
#define NV_MMU_PTE__SIZE 8
|
||||
#define NV_MMU_PTE_COMPTAGS_NONE 0x0 /* */
|
||||
#define NV_MMU_PTE_COMPTAGS_1 0x1 /* */
|
||||
#define NV_MMU_PTE_COMPTAGS_2 0x2 /* */
|
||||
#define NV_MMU_PTE_KIND (1*32+7):(1*32+4) /* RWXVF */
|
||||
#define NV_MMU_PTE_KIND_INVALID 0x07 /* R---V */
|
||||
#define NV_MMU_PTE_KIND_PITCH 0x00 /* R---V */
|
||||
#define NV_MMU_PTE_KIND_GENERIC_MEMORY 0x6 /* R---V */
|
||||
#define NV_MMU_PTE_KIND_Z16 0x1 /* R---V */
|
||||
#define NV_MMU_PTE_KIND_S8 0x2 /* R---V */
|
||||
#define NV_MMU_PTE_KIND_S8Z24 0x3 /* R---V */
|
||||
#define NV_MMU_PTE_KIND_ZF32_X24S8 0x4 /* R---V */
|
||||
#define NV_MMU_PTE_KIND_Z24S8 0x5 /* R---V */
|
||||
#define NV_MMU_PTE_KIND_GENERIC_MEMORY_COMPRESSIBLE 0x8 /* R---V */
|
||||
#define NV_MMU_PTE_KIND_GENERIC_MEMORY_COMPRESSIBLE_DISABLE_PLC 0x9 /* R---V */
|
||||
#define NV_MMU_PTE_KIND_S8_COMPRESSIBLE_DISABLE_PLC 0xA /* R---V */
|
||||
#define NV_MMU_PTE_KIND_Z16_COMPRESSIBLE_DISABLE_PLC 0xB /* R---V */
|
||||
#define NV_MMU_PTE_KIND_S8Z24_COMPRESSIBLE_DISABLE_PLC 0xC /* R---V */
|
||||
#define NV_MMU_PTE_KIND_ZF32_X24S8_COMPRESSIBLE_DISABLE_PLC 0xD /* R---V */
|
||||
#define NV_MMU_PTE_KIND_Z24S8_COMPRESSIBLE_DISABLE_PLC 0xE /* R---V */
|
||||
#define NV_MMU_PTE_KIND_SMSKED_MESSAGE 0xF /* R---V */
|
||||
#define NV_MMU_VER1_PDE /* ----G */
|
||||
#define NV_MMU_VER1_PDE_APERTURE_BIG (0*32+1):(0*32+0) /* RWXVF */
|
||||
#define NV_MMU_VER1_PDE_APERTURE_BIG_INVALID 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_APERTURE_BIG_VIDEO_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_APERTURE_BIG_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_APERTURE_BIG_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_SIZE (0*32+3):(0*32+2) /* RWXVF */
|
||||
#define NV_MMU_VER1_PDE_SIZE_FULL 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_SIZE_HALF 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_SIZE_QUARTER 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_SIZE_EIGHTH 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_ADDRESS_BIG_SYS (0*32+31):(0*32+4) /* RWXVF */
|
||||
#define NV_MMU_VER1_PDE_ADDRESS_BIG_VID (0*32+31-3):(0*32+4) /* RWXVF */
|
||||
#define NV_MMU_VER1_PDE_ADDRESS_BIG_VID_PEER (0*32+31):(0*32+32-3) /* RWXVF */
|
||||
#define NV_MMU_VER1_PDE_ADDRESS_BIG_VID_PEER_0 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_APERTURE_SMALL (1*32+1):(1*32+0) /* RWXVF */
|
||||
#define NV_MMU_VER1_PDE_APERTURE_SMALL_INVALID 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_APERTURE_SMALL_VIDEO_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_APERTURE_SMALL_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_APERTURE_SMALL_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_VOL_SMALL (1*32+2):(1*32+2) /* RWXVF */
|
||||
#define NV_MMU_VER1_PDE_VOL_SMALL_TRUE 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_VOL_SMALL_FALSE 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_VOL_BIG (1*32+3):(1*32+3) /* RWXVF */
|
||||
#define NV_MMU_VER1_PDE_VOL_BIG_TRUE 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_VOL_BIG_FALSE 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_ADDRESS_SMALL_SYS (1*32+31):(1*32+4) /* RWXVF */
|
||||
#define NV_MMU_VER1_PDE_ADDRESS_SMALL_VID (1*32+31-3):(1*32+4) /* RWXVF */
|
||||
#define NV_MMU_VER1_PDE_ADDRESS_SMALL_VID_PEER (1*32+31):(1*32+32-3) /* RWXVF */
|
||||
#define NV_MMU_VER1_PDE_ADDRESS_SMALL_VID_PEER_0 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER1_PDE_ADDRESS_SHIFT 0x0000000c /* */
|
||||
#define NV_MMU_VER1_PDE__SIZE 8
|
||||
#define NV_MMU_VER1_PTE /* ----G */
|
||||
#define NV_MMU_VER1_PTE_VALID (0*32+0):(0*32+0) /* RWXVF */
|
||||
#define NV_MMU_VER1_PTE_VALID_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_VALID_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_PRIVILEGE (0*32+1):(0*32+1) /* RWXVF */
|
||||
#define NV_MMU_VER1_PTE_PRIVILEGE_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_PRIVILEGE_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_READ_ONLY (0*32+2):(0*32+2) /* RWXVF */
|
||||
#define NV_MMU_VER1_PTE_READ_ONLY_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_READ_ONLY_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_ENCRYPTED (0*32+3):(0*32+3) /* RWXVF */
|
||||
#define NV_MMU_VER1_PTE_ENCRYPTED_TRUE 0x00000001 /* R---V */
|
||||
#define NV_MMU_VER1_PTE_ENCRYPTED_FALSE 0x00000000 /* R---V */
|
||||
#define NV_MMU_VER1_PTE_ADDRESS_SYS (0*32+31):(0*32+4) /* RWXVF */
|
||||
#define NV_MMU_VER1_PTE_ADDRESS_VID (0*32+31-3):(0*32+4) /* RWXVF */
|
||||
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER (0*32+31):(0*32+32-3) /* RWXVF */
|
||||
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_0 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_1 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_2 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_3 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_4 0x00000004 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_5 0x00000005 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_6 0x00000006 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_7 0x00000007 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_VOL (1*32+0):(1*32+0) /* RWXVF */
|
||||
#define NV_MMU_VER1_PTE_VOL_TRUE 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_VOL_FALSE 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_APERTURE (1*32+2):(1*32+1) /* RWXVF */
|
||||
#define NV_MMU_VER1_PTE_APERTURE_VIDEO_MEMORY 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_APERTURE_PEER_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_APERTURE_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_ATOMIC_DISABLE (1*32+3):(1*32+3) /* RWXVF */
|
||||
#define NV_MMU_VER1_PTE_ATOMIC_DISABLE_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_ATOMIC_DISABLE_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER1_PTE_COMPTAGLINE (1*32+20+11):(1*32+12) /* RWXVF */
|
||||
#define NV_MMU_VER1_PTE_KIND (1*32+11):(1*32+4) /* RWXVF */
|
||||
#define NV_MMU_VER1_PTE_ADDRESS_SHIFT 0x0000000c /* */
|
||||
#define NV_MMU_VER1_PTE__SIZE 8
|
||||
#define NV_MMU_VER1_PTE_COMPTAGS_NONE 0x0 /* */
|
||||
#define NV_MMU_VER1_PTE_COMPTAGS_1 0x1 /* */
|
||||
#define NV_MMU_VER1_PTE_COMPTAGS_2 0x2 /* */
|
||||
#define NV_MMU_NEW_PDE /* ----G */
|
||||
#define NV_MMU_NEW_PDE_IS_PTE 0:0 /* RWXVF */
|
||||
#define NV_MMU_NEW_PDE_IS_PTE_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_NEW_PDE_IS_PTE_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_NEW_PDE_IS_PDE 0:0 /* RWXVF */
|
||||
#define NV_MMU_NEW_PDE_IS_PDE_TRUE 0x0 /* RW--V */
|
||||
#define NV_MMU_NEW_PDE_IS_PDE_FALSE 0x1 /* RW--V */
|
||||
#define NV_MMU_NEW_PDE_VALID 0:0 /* RWXVF */
|
||||
#define NV_MMU_NEW_PDE_VALID_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_NEW_PDE_VALID_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_NEW_PDE_APERTURE 2:1 /* RWXVF */
|
||||
#define NV_MMU_NEW_PDE_APERTURE_INVALID 0x00000000 /* RW--V */
|
||||
#define NV_MMU_NEW_PDE_APERTURE_VIDEO_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_NEW_PDE_APERTURE_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_NEW_PDE_APERTURE_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_NEW_PDE_VOL 3:3 /* RWXVF */
|
||||
#define NV_MMU_NEW_PDE_VOL_TRUE 0x00000001 /* RW--V */
|
||||
#define NV_MMU_NEW_PDE_VOL_FALSE 0x00000000 /* RW--V */
|
||||
#define NV_MMU_NEW_PDE_NO_ATS 5:5 /* RWXVF */
|
||||
#define NV_MMU_NEW_PDE_NO_ATS_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_NEW_PDE_NO_ATS_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_NEW_PDE_ADDRESS_SYS 53:8 /* RWXVF */
|
||||
#define NV_MMU_NEW_PDE_ADDRESS_VID (35-3):8 /* RWXVF */
|
||||
#define NV_MMU_NEW_PDE_ADDRESS_VID_PEER 35:(36-3) /* RWXVF */
|
||||
#define NV_MMU_NEW_PDE_ADDRESS_VID_PEER_0 0x00000000 /* RW--V */
|
||||
#define NV_MMU_NEW_PDE_ADDRESS_SHIFT 0x0000000c /* */
|
||||
#define NV_MMU_NEW_PDE__SIZE 8
|
||||
#define NV_MMU_NEW_DUAL_PDE /* ----G */
|
||||
#define NV_MMU_NEW_DUAL_PDE_IS_PTE 0:0 /* RWXVF */
|
||||
#define NV_MMU_NEW_DUAL_PDE_IS_PTE_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_IS_PTE_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_IS_PDE 0:0 /* RWXVF */
|
||||
#define NV_MMU_NEW_DUAL_PDE_IS_PDE_TRUE 0x0 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_IS_PDE_FALSE 0x1 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_VALID 0:0 /* RWXVF */
|
||||
#define NV_MMU_NEW_DUAL_PDE_VALID_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_VALID_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_APERTURE_BIG 2:1 /* RWXVF */
|
||||
#define NV_MMU_NEW_DUAL_PDE_APERTURE_BIG_INVALID 0x00000000 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_APERTURE_BIG_VIDEO_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_APERTURE_BIG_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_APERTURE_BIG_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_VOL_BIG 3:3 /* RWXVF */
|
||||
#define NV_MMU_NEW_DUAL_PDE_VOL_BIG_TRUE 0x00000001 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_VOL_BIG_FALSE 0x00000000 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_NO_ATS 5:5 /* RWXVF */
|
||||
#define NV_MMU_NEW_DUAL_PDE_NO_ATS_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_NO_ATS_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_BIG_SYS 53:(8-4) /* RWXVF */
|
||||
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_BIG_VID (35-3):(8-4) /* RWXVF */
|
||||
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_BIG_VID_PEER 35:(36-3) /* RWXVF */
|
||||
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_BIG_VID_PEER_0 0x00000000 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_APERTURE_SMALL 66:65 /* RWXVF */
|
||||
#define NV_MMU_NEW_DUAL_PDE_APERTURE_SMALL_INVALID 0x00000000 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_APERTURE_SMALL_VIDEO_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_APERTURE_SMALL_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_APERTURE_SMALL_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_VOL_SMALL 67:67 /* RWXVF */
|
||||
#define NV_MMU_NEW_DUAL_PDE_VOL_SMALL_TRUE 0x00000001 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_VOL_SMALL_FALSE 0x00000000 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_SMALL_SYS 117:72 /* RWXVF */
|
||||
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_SMALL_VID (99-3):72 /* RWXVF */
|
||||
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_SMALL_VID_PEER 99:(100-3) /* RWXVF */
|
||||
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_SMALL_VID_PEER_0 0x00000000 /* RW--V */
|
||||
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_SHIFT 0x0000000c /* */
|
||||
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_BIG_SHIFT 8 /* */
|
||||
#define NV_MMU_NEW_DUAL_PDE__SIZE 16
|
||||
#define NV_MMU_NEW_PTE /* ----G */
|
||||
#define NV_MMU_NEW_PTE_VALID 0:0 /* RWXVF */
|
||||
#define NV_MMU_NEW_PTE_VALID_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_VALID_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_APERTURE 2:1 /* RWXVF */
|
||||
#define NV_MMU_NEW_PTE_APERTURE_VIDEO_MEMORY 0x00000000 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_APERTURE_PEER_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_APERTURE_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_VOL 3:3 /* RWXVF */
|
||||
#define NV_MMU_NEW_PTE_VOL_TRUE 0x00000001 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_VOL_FALSE 0x00000000 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_ENCRYPTED 4:4 /* RWXVF */
|
||||
#define NV_MMU_NEW_PTE_ENCRYPTED_TRUE 0x00000001 /* R---V */
|
||||
#define NV_MMU_NEW_PTE_ENCRYPTED_FALSE 0x00000000 /* R---V */
|
||||
#define NV_MMU_NEW_PTE_PRIVILEGE 5:5 /* RWXVF */
|
||||
#define NV_MMU_NEW_PTE_PRIVILEGE_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_PRIVILEGE_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_READ_ONLY 6:6 /* RWXVF */
|
||||
#define NV_MMU_NEW_PTE_READ_ONLY_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_READ_ONLY_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_ATOMIC_DISABLE 7:7 /* RWXVF */
|
||||
#define NV_MMU_NEW_PTE_ATOMIC_DISABLE_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_ATOMIC_DISABLE_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_ADDRESS_SYS 53:8 /* RWXVF */
|
||||
#define NV_MMU_NEW_PTE_ADDRESS_VID (35-3):8 /* RWXVF */
|
||||
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER 35:(36-3) /* RWXVF */
|
||||
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_0 0x00000000 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_1 0x00000001 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_2 0x00000002 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_3 0x00000003 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_4 0x00000004 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_5 0x00000005 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_6 0x00000006 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_7 0x00000007 /* RW--V */
|
||||
#define NV_MMU_NEW_PTE_COMPTAGLINE (20+35):36 /* RWXVF */
|
||||
#define NV_MMU_NEW_PTE_KIND 63:56 /* RWXVF */
|
||||
#define NV_MMU_NEW_PTE_ADDRESS_SHIFT 0x0000000c /* */
|
||||
#define NV_MMU_NEW_PTE__SIZE 8
|
||||
#define NV_MMU_VER2_PDE /* ----G */
|
||||
#define NV_MMU_VER2_PDE_IS_PTE 0:0 /* RWXVF */
|
||||
#define NV_MMU_VER2_PDE_IS_PTE_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER2_PDE_IS_PTE_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER2_PDE_IS_PDE 0:0 /* RWXVF */
|
||||
#define NV_MMU_VER2_PDE_IS_PDE_TRUE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER2_PDE_IS_PDE_FALSE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER2_PDE_VALID 0:0 /* RWXVF */
|
||||
#define NV_MMU_VER2_PDE_VALID_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER2_PDE_VALID_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER2_PDE_APERTURE 2:1 /* RWXVF */
|
||||
#define NV_MMU_VER2_PDE_APERTURE_INVALID 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER2_PDE_APERTURE_VIDEO_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER2_PDE_APERTURE_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER2_PDE_APERTURE_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER2_PDE_VOL 3:3 /* RWXVF */
|
||||
#define NV_MMU_VER2_PDE_VOL_TRUE 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER2_PDE_VOL_FALSE 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER2_PDE_NO_ATS 5:5 /* RWXVF */
|
||||
#define NV_MMU_VER2_PDE_NO_ATS_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER2_PDE_NO_ATS_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER2_PDE_ADDRESS_SYS 53:8 /* RWXVF */
|
||||
#define NV_MMU_VER2_PDE_ADDRESS_VID (35-3):8 /* RWXVF */
|
||||
#define NV_MMU_VER2_PDE_ADDRESS_VID_PEER 35:(36-3) /* RWXVF */
|
||||
#define NV_MMU_VER2_PDE_ADDRESS_VID_PEER_0 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER2_PDE_ADDRESS_SHIFT 0x0000000c /* */
|
||||
#define NV_MMU_VER2_PDE__SIZE 8
|
||||
#define NV_MMU_VER2_DUAL_PDE /* ----G */
|
||||
#define NV_MMU_VER2_DUAL_PDE_IS_PTE 0:0 /* RWXVF */
|
||||
#define NV_MMU_VER2_DUAL_PDE_IS_PTE_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_IS_PTE_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_IS_PDE 0:0 /* RWXVF */
|
||||
#define NV_MMU_VER2_DUAL_PDE_IS_PDE_TRUE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_IS_PDE_FALSE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_VALID 0:0 /* RWXVF */
|
||||
#define NV_MMU_VER2_DUAL_PDE_VALID_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_VALID_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_APERTURE_BIG 2:1 /* RWXVF */
|
||||
#define NV_MMU_VER2_DUAL_PDE_APERTURE_BIG_INVALID 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_APERTURE_BIG_VIDEO_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_APERTURE_BIG_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_APERTURE_BIG_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_VOL_BIG 3:3 /* RWXVF */
|
||||
#define NV_MMU_VER2_DUAL_PDE_VOL_BIG_TRUE 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_VOL_BIG_FALSE 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_NO_ATS 5:5 /* RWXVF */
|
||||
#define NV_MMU_VER2_DUAL_PDE_NO_ATS_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_NO_ATS_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_BIG_SYS 53:(8-4) /* RWXVF */
|
||||
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_BIG_VID (35-3):(8-4) /* RWXVF */
|
||||
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_BIG_VID_PEER 35:(36-3) /* RWXVF */
|
||||
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_BIG_VID_PEER_0 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_APERTURE_SMALL 66:65 /* RWXVF */
|
||||
#define NV_MMU_VER2_DUAL_PDE_APERTURE_SMALL_INVALID 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_APERTURE_SMALL_VIDEO_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_APERTURE_SMALL_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_APERTURE_SMALL_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_VOL_SMALL 67:67 /* RWXVF */
|
||||
#define NV_MMU_VER2_DUAL_PDE_VOL_SMALL_TRUE 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_VOL_SMALL_FALSE 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_SMALL_SYS 117:72 /* RWXVF */
|
||||
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_SMALL_VID (99-3):72 /* RWXVF */
|
||||
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_SMALL_VID_PEER 99:(100-3) /* RWXVF */
|
||||
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_SMALL_VID_PEER_0 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_SHIFT 0x0000000c /* */
|
||||
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_BIG_SHIFT 8 /* */
|
||||
#define NV_MMU_VER2_DUAL_PDE__SIZE 16
|
||||
#define NV_MMU_VER2_PTE /* ----G */
|
||||
#define NV_MMU_VER2_PTE_VALID 0:0 /* RWXVF */
|
||||
#define NV_MMU_VER2_PTE_VALID_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_VALID_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_APERTURE 2:1 /* RWXVF */
|
||||
#define NV_MMU_VER2_PTE_APERTURE_VIDEO_MEMORY 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_APERTURE_PEER_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_APERTURE_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_VOL 3:3 /* RWXVF */
|
||||
#define NV_MMU_VER2_PTE_VOL_TRUE 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_VOL_FALSE 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_ENCRYPTED 4:4 /* RWXVF */
|
||||
#define NV_MMU_VER2_PTE_ENCRYPTED_TRUE 0x00000001 /* R---V */
|
||||
#define NV_MMU_VER2_PTE_ENCRYPTED_FALSE 0x00000000 /* R---V */
|
||||
#define NV_MMU_VER2_PTE_PRIVILEGE 5:5 /* RWXVF */
|
||||
#define NV_MMU_VER2_PTE_PRIVILEGE_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_PRIVILEGE_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_READ_ONLY 6:6 /* RWXVF */
|
||||
#define NV_MMU_VER2_PTE_READ_ONLY_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_READ_ONLY_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_ATOMIC_DISABLE 7:7 /* RWXVF */
|
||||
#define NV_MMU_VER2_PTE_ATOMIC_DISABLE_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_ATOMIC_DISABLE_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_ADDRESS_SYS 53:8 /* RWXVF */
|
||||
#define NV_MMU_VER2_PTE_ADDRESS_VID (35-3):8 /* RWXVF */
|
||||
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER 35:(36-3) /* RWXVF */
|
||||
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_0 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_1 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_2 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_3 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_4 0x00000004 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_5 0x00000005 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_6 0x00000006 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_7 0x00000007 /* RW--V */
|
||||
#define NV_MMU_VER2_PTE_COMPTAGLINE (20+35):36 /* RWXVF */
|
||||
#define NV_MMU_VER2_PTE_KIND 63:56 /* RWXVF */
|
||||
#define NV_MMU_VER2_PTE_ADDRESS_SHIFT 0x0000000c /* */
|
||||
#define NV_MMU_VER2_PTE__SIZE 8
|
||||
#define NV_MMU_VER3_PDE /* ----G */
|
||||
#define NV_MMU_VER3_PDE_IS_PTE 0:0 /* RWXVF */
|
||||
#define NV_MMU_VER3_PDE_IS_PTE_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_IS_PTE_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_VALID 0:0 /* RWXVF */
|
||||
#define NV_MMU_VER3_PDE_VALID_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_VALID_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_APERTURE 2:1 /* RWXVF */
|
||||
#define NV_MMU_VER3_PDE_APERTURE_INVALID 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_APERTURE_VIDEO_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_APERTURE_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_APERTURE_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_PCF 5:3 /* RWXVF */
|
||||
#define NV_MMU_VER3_PDE_PCF_VALID_CACHED_ATS_ALLOWED__OR__INVALID_ATS_ALLOWED 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_PCF_VALID_CACHED_ATS_ALLOWED 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_PCF_INVALID_ATS_ALLOWED 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_PCF_VALID_UNCACHED_ATS_ALLOWED__OR__SPARSE_ATS_ALLOWED 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_PCF_VALID_UNCACHED_ATS_ALLOWED 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_PCF_SPARSE_ATS_ALLOWED 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_PCF_VALID_CACHED_ATS_NOT_ALLOWED__OR__INVALID_ATS_NOT_ALLOWED 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_PCF_VALID_CACHED_ATS_NOT_ALLOWED 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_PCF_INVALID_ATS_NOT_ALLOWED 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_PCF_VALID_UNCACHED_ATS_NOT_ALLOWED__OR__SPARSE_ATS_NOT_ALLOWED 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_PCF_VALID_UNCACHED_ATS_NOT_ALLOWED 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_PCF_SPARSE_ATS_NOT_ALLOWED 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER3_PDE_ADDRESS 51:12 /* RWXVF */
|
||||
#define NV_MMU_VER3_PDE_ADDRESS_SHIFT 0x0000000c /* */
|
||||
#define NV_MMU_VER3_PDE__SIZE 8
|
||||
#define NV_MMU_VER3_DUAL_PDE /* ----G */
|
||||
#define NV_MMU_VER3_DUAL_PDE_IS_PTE 0:0 /* RWXVF */
|
||||
#define NV_MMU_VER3_DUAL_PDE_IS_PTE_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_IS_PTE_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_VALID 0:0 /* RWXVF */
|
||||
#define NV_MMU_VER3_DUAL_PDE_VALID_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_VALID_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_APERTURE_BIG 2:1 /* RWXVF */
|
||||
#define NV_MMU_VER3_DUAL_PDE_APERTURE_BIG_INVALID 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_APERTURE_BIG_VIDEO_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_APERTURE_BIG_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_APERTURE_BIG_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG 5:3 /* RWXVF */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_VALID_CACHED_ATS_ALLOWED__OR__INVALID_ATS_ALLOWED 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_VALID_CACHED_ATS_ALLOWED 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_INVALID_ATS_ALLOWED 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_VALID_UNCACHED_ATS_ALLOWED__OR__SPARSE_ATS_ALLOWED 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_VALID_UNCACHED_ATS_ALLOWED 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_SPARSE_ATS_ALLOWED 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_VALID_CACHED_ATS_NOT_ALLOWED__OR__INVALID_ATS_NOT_ALLOWED 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_VALID_CACHED_ATS_NOT_ALLOWED 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_INVALID_ATS_NOT_ALLOWED 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_VALID_UNCACHED_ATS_NOT_ALLOWED__OR__SPARSE_ATS_NOT_ALLOWED 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_VALID_UNCACHED_ATS_NOT_ALLOWED 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_SPARSE_ATS_NOT_ALLOWED 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_ADDRESS_BIG 51:8 /* RWXVF */
|
||||
#define NV_MMU_VER3_DUAL_PDE_APERTURE_SMALL 66:65 /* RWXVF */
|
||||
#define NV_MMU_VER3_DUAL_PDE_APERTURE_SMALL_INVALID 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_APERTURE_SMALL_VIDEO_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_APERTURE_SMALL_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_APERTURE_SMALL_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL 69:67 /* RWXVF */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_VALID_CACHED_ATS_ALLOWED__OR__INVALID_ATS_ALLOWED 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_VALID_CACHED_ATS_ALLOWED 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_INVALID_ATS_ALLOWED 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_VALID_UNCACHED_ATS_ALLOWED__OR__SPARSE_ATS_ALLOWED 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_VALID_UNCACHED_ATS_ALLOWED 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_SPARSE_ATS_ALLOWED 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_VALID_CACHED_ATS_NOT_ALLOWED__OR__INVALID_ATS_NOT_ALLOWED 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_VALID_CACHED_ATS_NOT_ALLOWED 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_INVALID_ATS_NOT_ALLOWED 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_VALID_UNCACHED_ATS_NOT_ALLOWED__OR__SPARSE_ATS_NOT_ALLOWED 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_VALID_UNCACHED_ATS_NOT_ALLOWED 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_SPARSE_ATS_NOT_ALLOWED 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER3_DUAL_PDE_ADDRESS_SMALL 115:76 /* RWXVF */
|
||||
#define NV_MMU_VER3_DUAL_PDE_ADDRESS_SHIFT 0x0000000c /* */
|
||||
#define NV_MMU_VER3_DUAL_PDE_ADDRESS_BIG_SHIFT 8 /* */
|
||||
#define NV_MMU_VER3_DUAL_PDE__SIZE 16
|
||||
#define NV_MMU_VER3_PTE /* ----G */
|
||||
#define NV_MMU_VER3_PTE_VALID 0:0 /* RWXVF */
|
||||
#define NV_MMU_VER3_PTE_VALID_TRUE 0x1 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_VALID_FALSE 0x0 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_APERTURE 2:1 /* RWXVF */
|
||||
#define NV_MMU_VER3_PTE_APERTURE_VIDEO_MEMORY 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_APERTURE_PEER_MEMORY 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_APERTURE_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF 7:3 /* RWXVF */
|
||||
#define NV_MMU_VER3_PTE_PCF_INVALID 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_SPARSE 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_MAPPING_NOWHERE 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_NO_VALID_4KB_PAGE 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_REGULAR_RW_ATOMIC_CACHED_ACE 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_REGULAR_RW_ATOMIC_UNCACHED_ACE 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RW_ATOMIC_CACHED_ACE 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RW_ATOMIC_UNCACHED_ACE 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_REGULAR_RO_ATOMIC_CACHED_ACE 0x00000004 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_REGULAR_RO_ATOMIC_UNCACHED_ACE 0x00000005 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RO_ATOMIC_CACHED_ACE 0x00000006 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RO_ATOMIC_UNCACHED_ACE 0x00000007 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_REGULAR_RW_NO_ATOMIC_CACHED_ACE 0x00000008 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_REGULAR_RW_NO_ATOMIC_UNCACHED_ACE 0x00000009 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RW_NO_ATOMIC_CACHED_ACE 0x0000000A /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RW_NO_ATOMIC_UNCACHED_ACE 0x0000000B /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_REGULAR_RO_NO_ATOMIC_CACHED_ACE 0x0000000C /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_REGULAR_RO_NO_ATOMIC_UNCACHED_ACE 0x0000000D /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RO_NO_ATOMIC_CACHED_ACE 0x0000000E /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RO_NO_ATOMIC_UNCACHED_ACE 0x0000000F /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_REGULAR_RW_ATOMIC_CACHED_ACD 0x00000010 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_REGULAR_RW_ATOMIC_UNCACHED_ACD 0x00000011 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RW_ATOMIC_CACHED_ACD 0x00000012 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RW_ATOMIC_UNCACHED_ACD 0x00000013 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_REGULAR_RO_ATOMIC_CACHED_ACD 0x00000014 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_REGULAR_RO_ATOMIC_UNCACHED_ACD 0x00000015 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RO_ATOMIC_CACHED_ACD 0x00000016 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RO_ATOMIC_UNCACHED_ACD 0x00000017 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_REGULAR_RW_NO_ATOMIC_CACHED_ACD 0x00000018 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_REGULAR_RW_NO_ATOMIC_UNCACHED_ACD 0x00000019 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RW_NO_ATOMIC_CACHED_ACD 0x0000001A /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RW_NO_ATOMIC_UNCACHED_ACD 0x0000001B /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_REGULAR_RO_NO_ATOMIC_CACHED_ACD 0x0000001C /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_REGULAR_RO_NO_ATOMIC_UNCACHED_ACD 0x0000001D /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RO_NO_ATOMIC_CACHED_ACD 0x0000001E /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RO_NO_ATOMIC_UNCACHED_ACD 0x0000001F /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_KIND 11:8 /* RWXVF */
|
||||
#define NV_MMU_VER3_PTE_ADDRESS 51:12 /* RWXVF */
|
||||
#define NV_MMU_VER3_PTE_ADDRESS_SYS 51:12 /* RWXVF */
|
||||
#define NV_MMU_VER3_PTE_ADDRESS_PEER 51:12 /* RWXVF */
|
||||
#define NV_MMU_VER3_PTE_ADDRESS_VID 39:12 /* RWXVF */
|
||||
#define NV_MMU_VER3_PTE_PEER_ID 63:(64-3) /* RWXVF */
|
||||
#define NV_MMU_VER3_PTE_PEER_ID_0 0x00000000 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PEER_ID_1 0x00000001 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PEER_ID_2 0x00000002 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PEER_ID_3 0x00000003 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PEER_ID_4 0x00000004 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PEER_ID_5 0x00000005 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PEER_ID_6 0x00000006 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_PEER_ID_7 0x00000007 /* RW--V */
|
||||
#define NV_MMU_VER3_PTE_ADDRESS_SHIFT 0x0000000c /* */
|
||||
#define NV_MMU_VER3_PTE__SIZE 8
|
||||
#define NV_MMU_CLIENT /* ----G */
|
||||
#define NV_MMU_CLIENT_KIND 2:0 /* RWXVF */
|
||||
#define NV_MMU_CLIENT_KIND_Z16 0x1 /* R---V */
|
||||
#define NV_MMU_CLIENT_KIND_S8 0x2 /* R---V */
|
||||
#define NV_MMU_CLIENT_KIND_S8Z24 0x3 /* R---V */
|
||||
#define NV_MMU_CLIENT_KIND_ZF32_X24S8 0x4 /* R---V */
|
||||
#define NV_MMU_CLIENT_KIND_Z24S8 0x5 /* R---V */
|
||||
#define NV_MMU_CLIENT_KIND_GENERIC_MEMORY 0x6 /* R---V */
|
||||
#define NV_MMU_CLIENT_KIND_INVALID 0x7 /* R---V */
|
||||
#endif // __gb100_dev_mmu_h__
|
@ -1,14 +1,6 @@
|
||||
NVIDIA_UVM_SOURCES ?=
|
||||
NVIDIA_UVM_SOURCES_CXX ?=
|
||||
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_conf_computing.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_sec2_test.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_maxwell_sec2.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_sec2.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_blackwell.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_blackwell_fault_buffer.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_blackwell_mmu.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_blackwell_host.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_common.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_linux.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/nvstatus.c
|
||||
@ -53,6 +45,7 @@ NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_tracker.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_maxwell.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_maxwell_host.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_maxwell_ce.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_maxwell_sec2.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_maxwell_mmu.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_maxwell_fault_buffer.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_maxwell_access_counter_buffer.c
|
||||
@ -81,8 +74,13 @@ NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_fault_buffer.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_ce.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_host.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_sec2.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_mmu.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ada.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_blackwell.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_blackwell_fault_buffer.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_blackwell_mmu.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_blackwell_host.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_policy.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_perf_utils.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_kvmalloc.c
|
||||
@ -101,6 +99,7 @@ NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ats.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ats_ibm.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ats_faults.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ats_sva.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_conf_computing.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_test.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_test_rng.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_range_tree_test.c
|
||||
@ -128,3 +127,4 @@ NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_va_block_test.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_range_group_tree_test.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_thread_context_test.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_rb_tree_test.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_sec2_test.c
|
||||
|
@ -61,6 +61,7 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += iommu_sva_bind_device_has_drvdata_arg
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += vm_fault_to_errno
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += find_next_bit_wrap
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += iommu_is_dma_domain
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += for_each_sgtable_dma_page
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += folio_test_swapcache
|
||||
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_ops_fault_removed_vma_arg
|
||||
@ -75,6 +76,7 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += mempolicy_has_home_node
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += mpol_preferred_many_present
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += mmu_interval_notifier
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += fault_flag_remote_present
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += sg_dma_page_iter
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += struct_page_has_zone_device_data
|
||||
|
||||
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_int_active_memcg
|
||||
|
@ -680,6 +680,9 @@ static void uvm_vm_open_semaphore_pool(struct vm_area_struct *vma)
|
||||
// Semaphore pool vmas do not have vma wrappers, but some functions will
|
||||
// assume vm_private_data is a wrapper.
|
||||
vma->vm_private_data = NULL;
|
||||
#if defined(VM_WIPEONFORK)
|
||||
nv_vm_flags_set(vma, VM_WIPEONFORK);
|
||||
#endif
|
||||
|
||||
if (is_fork) {
|
||||
// If we forked, leave the parent vma alone.
|
||||
@ -772,6 +775,9 @@ static void uvm_vm_open_device_p2p(struct vm_area_struct *vma)
|
||||
// Device P2P vmas do not have vma wrappers, but some functions will
|
||||
// assume vm_private_data is a wrapper.
|
||||
vma->vm_private_data = NULL;
|
||||
#if defined(VM_WIPEONFORK)
|
||||
nv_vm_flags_set(vma, VM_WIPEONFORK);
|
||||
#endif
|
||||
|
||||
if (is_fork) {
|
||||
// If we forked, leave the parent vma alone.
|
||||
|
@ -379,6 +379,17 @@ NV_STATUS UvmIsPageableMemoryAccessSupportedOnGpu(const NvProcessorUuid *gpuUuid
|
||||
// OS state required to register the GPU is malformed, or the partition
|
||||
// identified by the user handles or its configuration changed.
|
||||
//
|
||||
// NV_ERR_NVLINK_FABRIC_NOT_READY:
|
||||
// (On NvSwitch-connected system) Indicates that the fabric has not been
|
||||
// configured yet. Caller must retry GPU registration.
|
||||
//
|
||||
// NV_ERR_NVLINK_FABRIC_FAILURE:
|
||||
// (On NvSwitch-connected systems) Indicates that the NvLink fabric
|
||||
// failed to be configured.
|
||||
//
|
||||
// NV_ERR_GPU_MEMORY_ONLINING_FAULURE:
|
||||
// (On coherent systems) The GPU's memory onlining failed.
|
||||
//
|
||||
// NV_ERR_GENERIC:
|
||||
// Unexpected error. We try hard to avoid returning this error code,
|
||||
// because it is not very informative.
|
||||
@ -1317,9 +1328,8 @@ NV_STATUS UvmCleanUpZombieResources(void);
|
||||
//
|
||||
// NV_ERR_INVALID_ARGUMENT:
|
||||
// perGpuAttribs is NULL but gpuAttribsCount is non-zero or vice-versa,
|
||||
// or caching is requested on more than one GPU.
|
||||
// The Confidential Computing feature is enabled and the perGpuAttribs
|
||||
// list is empty.
|
||||
// or caching is requested on more than one GPU, or (in Confidential
|
||||
// Computing only) the perGpuAttribs list is empty.
|
||||
//
|
||||
// NV_ERR_NOT_SUPPORTED:
|
||||
// The current process is not the one which called UvmInitialize, and
|
||||
@ -1469,7 +1479,9 @@ NV_STATUS UvmAllocDeviceP2P(NvProcessorUuid gpuUuid,
|
||||
// If read duplication is enabled on any pages in the VA range, then those pages
|
||||
// are read duplicated at the destination processor, leaving the source copy, if
|
||||
// present, intact with only its mapping changed to read-only if it wasn't
|
||||
// already mapped that way.
|
||||
// already mapped that way. The exception to this behavior is migrating pages
|
||||
// between different NUMA nodes, in which case the pages are migrated to the
|
||||
// destination node and a read-only mapping is created to the migrated pages.
|
||||
//
|
||||
// Pages in the VA range are migrated even if their preferred location is set to
|
||||
// a processor other than the destination processor.
|
||||
@ -2212,7 +2224,9 @@ NV_STATUS UvmMapDynamicParallelismRegion(void *base,
|
||||
//
|
||||
// If UvmMigrate, UvmMigrateAsync or UvmMigrateRangeGroup is called on any pages
|
||||
// in this VA range, then those pages will also be read duplicated on the
|
||||
// destination processor for the migration.
|
||||
// destination processor for the migration unless the migration is between CPU
|
||||
// NUMA nodes, in which case the pages are migrated to the destination NUMA
|
||||
// node and a read-only mapping to the migrated pages is created.
|
||||
//
|
||||
// Enabling read duplication on a VA range requires the CPU and all GPUs with
|
||||
// registered VA spaces to be fault-capable. Otherwise, the migration and
|
||||
@ -3945,9 +3959,7 @@ NV_STATUS UvmToolsDisableCounters(UvmToolsCountersHandle counters,
|
||||
// In-process scenario when targetVa address + size overlaps with buffer + size.
|
||||
//
|
||||
// This is essentially a UVM version of RM ctrl call
|
||||
// NV83DE_CTRL_CMD_DEBUG_READ_MEMORY. For implementation constraints (and more
|
||||
// information), please refer to the documentation:
|
||||
// //sw/docs/resman/components/compute/UVM/subsystems/UVM_8_Tools_API_Design.docx
|
||||
// NV83DE_CTRL_CMD_DEBUG_READ_MEMORY.
|
||||
//
|
||||
// Arguments:
|
||||
// session: (INPUT)
|
||||
@ -4001,9 +4013,7 @@ NV_STATUS UvmToolsReadProcessMemory(UvmToolsSessionHandle session,
|
||||
// buffer + size.
|
||||
//
|
||||
// This is essentially a UVM version of RM ctrl call
|
||||
// NV83DE_CTRL_CMD_DEBUG_READ_MEMORY. For implementation constraints (and more
|
||||
// information), please refer to the documentation:
|
||||
// //sw/docs/resman/components/compute/UVM/subsystems/UVM_8_Tools_API_Design.docx
|
||||
// NV83DE_CTRL_CMD_DEBUG_READ_MEMORY.
|
||||
//
|
||||
// Arguments:
|
||||
// session: (INPUT)
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2021-2023 NVIDIA Corporation
|
||||
Copyright (c) 2021-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -51,6 +51,9 @@ void uvm_hal_ada_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
parent_gpu->rm_va_base = 0;
|
||||
parent_gpu->rm_va_size = 128 * UVM_SIZE_1TB;
|
||||
|
||||
parent_gpu->peer_va_base = parent_gpu->rm_va_base + parent_gpu->rm_va_size;
|
||||
parent_gpu->peer_va_size = NV_MAX_DEVICES * UVM_PEER_IDENTITY_VA_SIZE;
|
||||
|
||||
parent_gpu->uvm_mem_va_base = 384 * UVM_SIZE_1TB;
|
||||
parent_gpu->uvm_mem_va_size = UVM_MEM_VA_SIZE;
|
||||
|
||||
@ -98,4 +101,6 @@ void uvm_hal_ada_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
parent_gpu->plc_supported = true;
|
||||
|
||||
parent_gpu->no_ats_range_required = false;
|
||||
|
||||
parent_gpu->conf_computing.per_channel_key_rotation = false;
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2018-2023 NVIDIA Corporation
|
||||
Copyright (c) 2018-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -51,6 +51,9 @@ void uvm_hal_ampere_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
parent_gpu->rm_va_base = 0;
|
||||
parent_gpu->rm_va_size = 128 * UVM_SIZE_1TB;
|
||||
|
||||
parent_gpu->peer_va_base = parent_gpu->rm_va_base + parent_gpu->rm_va_size;
|
||||
parent_gpu->peer_va_size = NV_MAX_DEVICES * UVM_PEER_IDENTITY_VA_SIZE;
|
||||
|
||||
parent_gpu->uvm_mem_va_base = 384 * UVM_SIZE_1TB;
|
||||
parent_gpu->uvm_mem_va_size = UVM_MEM_VA_SIZE;
|
||||
|
||||
@ -107,4 +110,6 @@ void uvm_hal_ampere_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
parent_gpu->plc_supported = true;
|
||||
|
||||
parent_gpu->no_ats_range_required = false;
|
||||
|
||||
parent_gpu->conf_computing.per_channel_key_rotation = false;
|
||||
}
|
||||
|
@ -29,6 +29,8 @@
|
||||
|
||||
bool uvm_hal_ampere_ce_method_is_valid_c6b5(uvm_push_t *push, NvU32 method_address, NvU32 method_data)
|
||||
{
|
||||
UVM_ASSERT(push->channel);
|
||||
|
||||
if (!uvm_channel_is_proxy(push->channel))
|
||||
return true;
|
||||
|
||||
@ -116,6 +118,16 @@ bool uvm_hal_ampere_ce_memcopy_is_valid_c6b5(uvm_push_t *push, uvm_gpu_address_t
|
||||
{
|
||||
NvU64 push_begin_gpu_va;
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||||
const bool peer_copy = uvm_gpu_address_is_peer(gpu, dst) || uvm_gpu_address_is_peer(gpu, src);
|
||||
|
||||
UVM_ASSERT(push->channel);
|
||||
|
||||
if (peer_copy && !uvm_channel_is_p2p(push->channel)) {
|
||||
UVM_ERR_PRINT("Peer copy from address (0x%llx) to address (0x%llx) should use designated p2p channels!",
|
||||
src.address,
|
||||
dst.address);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!uvm_parent_gpu_is_virt_mode_sriov_heavy(gpu->parent))
|
||||
return true;
|
||||
@ -182,6 +194,8 @@ void uvm_hal_ampere_ce_memcopy_patch_src_c6b5(uvm_push_t *push, uvm_gpu_address_
|
||||
{
|
||||
uvm_pushbuffer_t *pushbuffer;
|
||||
|
||||
UVM_ASSERT(push->channel);
|
||||
|
||||
if (!uvm_channel_is_proxy(push->channel))
|
||||
return;
|
||||
|
||||
|
@ -36,6 +36,8 @@ bool uvm_hal_ampere_host_method_is_valid(uvm_push_t *push, NvU32 method_address,
|
||||
if (!uvm_parent_gpu_is_virt_mode_sriov_heavy(gpu->parent))
|
||||
return true;
|
||||
|
||||
UVM_ASSERT(push->channel);
|
||||
|
||||
if (uvm_channel_is_privileged(push->channel)) {
|
||||
switch (method_address) {
|
||||
case NVC56F_SET_OBJECT:
|
||||
@ -84,6 +86,8 @@ bool uvm_hal_ampere_host_method_is_valid(uvm_push_t *push, NvU32 method_address,
|
||||
|
||||
bool uvm_hal_ampere_host_sw_method_is_valid(uvm_push_t *push, NvU32 method_address, NvU32 method_data)
|
||||
{
|
||||
UVM_ASSERT(push->channel);
|
||||
|
||||
if (!uvm_channel_is_proxy(push->channel))
|
||||
return true;
|
||||
|
||||
|
@ -55,8 +55,9 @@ static NV_STATUS service_ats_requests(uvm_gpu_va_space_t *gpu_va_space,
|
||||
NvU64 user_space_start;
|
||||
NvU64 user_space_length;
|
||||
bool write = (access_type >= UVM_FAULT_ACCESS_TYPE_WRITE);
|
||||
bool fault_service_type = (service_type == UVM_ATS_SERVICE_TYPE_FAULTS);
|
||||
uvm_populate_permissions_t populate_permissions = fault_service_type ?
|
||||
bool is_fault_service_type = (service_type == UVM_ATS_SERVICE_TYPE_FAULTS);
|
||||
bool is_prefetch_faults = (is_fault_service_type && (access_type == UVM_FAULT_ACCESS_TYPE_PREFETCH));
|
||||
uvm_populate_permissions_t populate_permissions = is_fault_service_type ?
|
||||
(write ? UVM_POPULATE_PERMISSIONS_WRITE : UVM_POPULATE_PERMISSIONS_ANY) :
|
||||
UVM_POPULATE_PERMISSIONS_INHERIT;
|
||||
|
||||
@ -97,12 +98,20 @@ static NV_STATUS service_ats_requests(uvm_gpu_va_space_t *gpu_va_space,
|
||||
.start = start,
|
||||
.length = length,
|
||||
.populate_permissions = populate_permissions,
|
||||
.touch = fault_service_type,
|
||||
.skip_mapped = fault_service_type,
|
||||
.populate_on_cpu_alloc_failures = fault_service_type,
|
||||
.populate_on_migrate_vma_failures = fault_service_type,
|
||||
.touch = is_fault_service_type,
|
||||
.skip_mapped = is_fault_service_type,
|
||||
.populate_on_cpu_alloc_failures = is_fault_service_type,
|
||||
.populate_on_migrate_vma_failures = is_fault_service_type,
|
||||
.user_space_start = &user_space_start,
|
||||
.user_space_length = &user_space_length,
|
||||
|
||||
// Potential STO NVLINK errors cannot be resolved in fault or access
|
||||
// counter handlers. If there are GPUs to check for STO, it's either
|
||||
// a) a false positive, and the migration went through ok, or
|
||||
// b) a true positive, and the destination is all zeros, and the
|
||||
// application will be terminated soon.
|
||||
.gpus_to_check_for_nvlink_errors = NULL,
|
||||
.fail_on_unresolved_sto_errors = !is_fault_service_type || is_prefetch_faults,
|
||||
};
|
||||
|
||||
UVM_ASSERT(uvm_ats_can_service_faults(gpu_va_space, mm));
|
||||
@ -113,7 +122,7 @@ static NV_STATUS service_ats_requests(uvm_gpu_va_space_t *gpu_va_space,
|
||||
// set skip_mapped to true. For pages already mapped, this will only handle
|
||||
// PTE upgrades if needed.
|
||||
status = uvm_migrate_pageable(&uvm_migrate_args);
|
||||
if (fault_service_type && (status == NV_WARN_NOTHING_TO_DO))
|
||||
if (is_fault_service_type && (status == NV_WARN_NOTHING_TO_DO))
|
||||
status = NV_OK;
|
||||
|
||||
UVM_ASSERT(status != NV_ERR_MORE_PROCESSING_REQUIRED);
|
||||
@ -146,7 +155,10 @@ static void ats_batch_select_residency(uvm_gpu_va_space_t *gpu_va_space,
|
||||
uvm_ats_fault_context_t *ats_context)
|
||||
{
|
||||
uvm_gpu_t *gpu = gpu_va_space->gpu;
|
||||
int residency = uvm_gpu_numa_node(gpu);
|
||||
int residency;
|
||||
|
||||
UVM_ASSERT(gpu->mem_info.numa.enabled);
|
||||
residency = uvm_gpu_numa_node(gpu);
|
||||
|
||||
#if defined(NV_MEMPOLICY_HAS_UNIFIED_NODES)
|
||||
struct mempolicy *vma_policy = vma_policy(vma);
|
||||
@ -463,6 +475,65 @@ static NV_STATUS ats_compute_prefetch(uvm_gpu_va_space_t *gpu_va_space,
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_ats_service_faults_region(uvm_gpu_va_space_t *gpu_va_space,
|
||||
struct vm_area_struct *vma,
|
||||
NvU64 base,
|
||||
uvm_va_block_region_t region,
|
||||
uvm_fault_access_type_t access_type,
|
||||
uvm_ats_fault_context_t *ats_context,
|
||||
uvm_page_mask_t *faults_serviced_mask)
|
||||
{
|
||||
NvU64 start = base + (region.first * PAGE_SIZE);
|
||||
size_t length = uvm_va_block_region_size(region);
|
||||
NV_STATUS status;
|
||||
|
||||
UVM_ASSERT(start >= vma->vm_start);
|
||||
UVM_ASSERT((start + length) <= vma->vm_end);
|
||||
|
||||
status = service_ats_requests(gpu_va_space,
|
||||
vma,
|
||||
start,
|
||||
length,
|
||||
access_type,
|
||||
UVM_ATS_SERVICE_TYPE_FAULTS,
|
||||
ats_context);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
uvm_page_mask_region_fill(faults_serviced_mask, region);
|
||||
|
||||
// WAR for older kernel versions missing an SMMU invalidate on RO -> RW
|
||||
// transition. The SMMU and GPU could have the stale RO copy cached in their
|
||||
// TLBs, which could have caused this write fault. This operation
|
||||
// invalidates the SMMU TLBs but not the GPU TLBs. That will happen below as
|
||||
// necessary.
|
||||
if (access_type == UVM_FAULT_ACCESS_TYPE_WRITE)
|
||||
uvm_ats_smmu_invalidate_tlbs(gpu_va_space, start, length);
|
||||
|
||||
// The Linux kernel does not invalidate TLB entries on an invalid to valid
|
||||
// PTE transition. The GPU might have the invalid PTE cached in its TLB.
|
||||
// The GPU will re-fetch an entry on access if the PTE is invalid and the
|
||||
// page size is not 4K, but if the page size is 4K no re-fetch will happen
|
||||
// and the GPU will fault despite the CPU PTE being valid. We don't know
|
||||
// whether these faults happened due to stale entries after a transition,
|
||||
// so use the hammer of always invalidating the GPU's TLB on each fault.
|
||||
//
|
||||
// The second case is similar and handles missing ATS invalidations on RO ->
|
||||
// RW transitions for all page sizes. See the uvm_ats_smmu_invalidate_tlbs()
|
||||
// call above.
|
||||
if (PAGE_SIZE == UVM_PAGE_SIZE_4K || (UVM_ATS_SMMU_WAR_REQUIRED() && access_type == UVM_FAULT_ACCESS_TYPE_WRITE)) {
|
||||
flush_tlb_va_region(gpu_va_space, start, length, ats_context->client_type);
|
||||
}
|
||||
else {
|
||||
// ARM requires TLB invalidations on RO -> RW, but not all architectures
|
||||
// do. If we implement ATS support on other architectures, we might need
|
||||
// to issue GPU invalidates.
|
||||
UVM_ASSERT(NVCPU_IS_AARCH64);
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
|
||||
struct vm_area_struct *vma,
|
||||
NvU64 base,
|
||||
@ -471,12 +542,11 @@ NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_va_block_region_t subregion;
|
||||
uvm_va_block_region_t region = uvm_va_block_region(0, PAGES_PER_UVM_VA_BLOCK);
|
||||
uvm_page_mask_t *prefetch_only_fault_mask = &ats_context->faults.prefetch_only_fault_mask;
|
||||
uvm_page_mask_t *read_fault_mask = &ats_context->faults.read_fault_mask;
|
||||
uvm_page_mask_t *write_fault_mask = &ats_context->faults.write_fault_mask;
|
||||
uvm_page_mask_t *faults_serviced_mask = &ats_context->faults.faults_serviced_mask;
|
||||
uvm_page_mask_t *reads_serviced_mask = &ats_context->faults.reads_serviced_mask;
|
||||
uvm_fault_client_type_t client_type = ats_context->client_type;
|
||||
uvm_ats_service_type_t service_type = UVM_ATS_SERVICE_TYPE_FAULTS;
|
||||
|
||||
UVM_ASSERT(vma);
|
||||
UVM_ASSERT(IS_ALIGNED(base, UVM_VA_BLOCK_SIZE));
|
||||
@ -492,7 +562,7 @@ NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
|
||||
uvm_page_mask_zero(reads_serviced_mask);
|
||||
|
||||
if (!(vma->vm_flags & VM_READ))
|
||||
return status;
|
||||
return NV_OK;
|
||||
|
||||
if (!(vma->vm_flags & VM_WRITE)) {
|
||||
// If VMA doesn't have write permissions, all write faults are fatal.
|
||||
@ -508,72 +578,65 @@ NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
|
||||
|
||||
// There are no pending faults beyond write faults to RO region.
|
||||
if (uvm_page_mask_empty(read_fault_mask))
|
||||
return status;
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
ats_batch_select_residency(gpu_va_space, vma, ats_context);
|
||||
|
||||
ats_compute_prefetch(gpu_va_space, vma, base, service_type, ats_context);
|
||||
ats_compute_prefetch(gpu_va_space, vma, base, UVM_ATS_SERVICE_TYPE_FAULTS, ats_context);
|
||||
|
||||
for_each_va_block_subregion_in_mask(subregion, write_fault_mask, region) {
|
||||
NvU64 start = base + (subregion.first * PAGE_SIZE);
|
||||
size_t length = uvm_va_block_region_num_pages(subregion) * PAGE_SIZE;
|
||||
uvm_fault_access_type_t access_type = (vma->vm_flags & VM_WRITE) ?
|
||||
UVM_FAULT_ACCESS_TYPE_WRITE :
|
||||
UVM_FAULT_ACCESS_TYPE_READ;
|
||||
|
||||
UVM_ASSERT(start >= vma->vm_start);
|
||||
UVM_ASSERT((start + length) <= vma->vm_end);
|
||||
|
||||
status = service_ats_requests(gpu_va_space, vma, start, length, access_type, service_type, ats_context);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
uvm_fault_access_type_t access_type;
|
||||
uvm_page_mask_t *serviced_mask;
|
||||
|
||||
if (vma->vm_flags & VM_WRITE) {
|
||||
uvm_page_mask_region_fill(faults_serviced_mask, subregion);
|
||||
uvm_ats_smmu_invalidate_tlbs(gpu_va_space, start, length);
|
||||
|
||||
// The Linux kernel never invalidates TLB entries on mapping
|
||||
// permission upgrade. This is a problem if the GPU has cached
|
||||
// entries with the old permission. The GPU will re-fetch the entry
|
||||
// if the PTE is invalid and page size is not 4K (this is the case
|
||||
// on P9). However, if a page gets upgraded from R/O to R/W and GPU
|
||||
// has the PTEs cached with R/O permissions we will enter an
|
||||
// infinite loop because we just forward the fault to the Linux
|
||||
// kernel and it will see that the permissions in the page table are
|
||||
// correct. Therefore, we flush TLB entries on ATS write faults.
|
||||
flush_tlb_va_region(gpu_va_space, start, length, client_type);
|
||||
access_type = UVM_FAULT_ACCESS_TYPE_WRITE;
|
||||
serviced_mask = faults_serviced_mask;
|
||||
}
|
||||
else {
|
||||
uvm_page_mask_region_fill(reads_serviced_mask, subregion);
|
||||
// write_fault_mask contains just the addresses with both read and
|
||||
// fatal write faults, so we need to service the read component.
|
||||
access_type = UVM_FAULT_ACCESS_TYPE_READ;
|
||||
serviced_mask = reads_serviced_mask;
|
||||
}
|
||||
|
||||
status = uvm_ats_service_faults_region(gpu_va_space,
|
||||
vma,
|
||||
base,
|
||||
subregion,
|
||||
access_type,
|
||||
ats_context,
|
||||
serviced_mask);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
}
|
||||
|
||||
// Remove write faults from read_fault_mask
|
||||
// Remove write faults from read_fault_mask to avoid double-service
|
||||
uvm_page_mask_andnot(read_fault_mask, read_fault_mask, write_fault_mask);
|
||||
|
||||
for_each_va_block_subregion_in_mask(subregion, read_fault_mask, region) {
|
||||
NvU64 start = base + (subregion.first * PAGE_SIZE);
|
||||
size_t length = uvm_va_block_region_num_pages(subregion) * PAGE_SIZE;
|
||||
uvm_fault_access_type_t access_type = UVM_FAULT_ACCESS_TYPE_READ;
|
||||
|
||||
UVM_ASSERT(start >= vma->vm_start);
|
||||
UVM_ASSERT((start + length) <= vma->vm_end);
|
||||
|
||||
status = service_ats_requests(gpu_va_space, vma, start, length, access_type, service_type, ats_context);
|
||||
status = uvm_ats_service_faults_region(gpu_va_space,
|
||||
vma,
|
||||
base,
|
||||
subregion,
|
||||
UVM_FAULT_ACCESS_TYPE_READ,
|
||||
ats_context,
|
||||
faults_serviced_mask);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
}
|
||||
|
||||
uvm_page_mask_region_fill(faults_serviced_mask, subregion);
|
||||
|
||||
// Similarly to permission upgrade scenario, discussed above, GPU
|
||||
// will not re-fetch the entry if the PTE is invalid and page size
|
||||
// is 4K. To avoid infinite faulting loop, invalidate TLB for every
|
||||
// new translation written explicitly like in the case of permission
|
||||
// upgrade.
|
||||
if (PAGE_SIZE == UVM_PAGE_SIZE_4K)
|
||||
flush_tlb_va_region(gpu_va_space, start, length, client_type);
|
||||
|
||||
// Handle HW prefetch only faults
|
||||
for_each_va_block_subregion_in_mask(subregion, prefetch_only_fault_mask, region) {
|
||||
status = uvm_ats_service_faults_region(gpu_va_space,
|
||||
vma,
|
||||
base,
|
||||
subregion,
|
||||
UVM_FAULT_ACCESS_TYPE_PREFETCH,
|
||||
ats_context,
|
||||
faults_serviced_mask);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
}
|
||||
|
||||
return status;
|
||||
@ -679,7 +742,10 @@ NV_STATUS uvm_ats_service_access_counters(uvm_gpu_va_space_t *gpu_va_space,
|
||||
UVM_ASSERT((start + length) <= vma->vm_end);
|
||||
|
||||
status = service_ats_requests(gpu_va_space, vma, start, length, access_type, service_type, ats_context);
|
||||
if (status == NV_OK)
|
||||
|
||||
// clear access counters if pages were migrated or migration needs to
|
||||
// be retried
|
||||
if (status == NV_OK || status == NV_ERR_BUSY_RETRY)
|
||||
uvm_page_mask_region_fill(migrated_mask, subregion);
|
||||
else if (status != NV_WARN_NOTHING_TO_DO)
|
||||
return status;
|
||||
|
@ -29,12 +29,13 @@
|
||||
|
||||
// Service ATS faults in the range (base, base + UVM_VA_BLOCK_SIZE) with service
|
||||
// type for individual pages in the range requested by page masks set in
|
||||
// ats_context->fault.read_fault_mask/write_fault_mask. base must be aligned to
|
||||
// UVM_VA_BLOCK_SIZE. The caller is responsible for ensuring that faulting
|
||||
// addresses fall completely within the VMA. The caller is also responsible for
|
||||
// ensuring that the faulting addresses don't overlap a GMMU region. (See
|
||||
// uvm_ats_check_in_gmmu_region). The caller is also responsible for handling
|
||||
// any errors returned by this function (fault cancellations etc.).
|
||||
// ats_context->fault.read_fault_mask/write_fault_mask/prefetch_only_mask.
|
||||
// base must be aligned to UVM_VA_BLOCK_SIZE. The caller is responsible for
|
||||
// ensuring that faulting addresses fall completely within the VMA. The caller
|
||||
// is also responsible for ensuring that the faulting addresses don't overlap
|
||||
// a GMMU region. (See uvm_ats_check_in_gmmu_region). The caller is also
|
||||
// responsible for handling any errors returned by this function (fault
|
||||
// cancellations etc.).
|
||||
//
|
||||
// Returns the fault service status in ats_context->fault.faults_serviced_mask.
|
||||
// In addition, ats_context->fault.reads_serviced_mask returns whether read
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2022-2023 NVIDIA Corporation
|
||||
Copyright (c) 2022-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -51,14 +51,16 @@ void uvm_hal_blackwell_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
parent_gpu->rm_va_base = 0;
|
||||
parent_gpu->rm_va_size = 64 * UVM_SIZE_1PB;
|
||||
|
||||
parent_gpu->peer_va_base = parent_gpu->rm_va_base + parent_gpu->rm_va_size;
|
||||
parent_gpu->peer_va_size = NV_MAX_DEVICES * UVM_PEER_IDENTITY_VA_SIZE;
|
||||
|
||||
parent_gpu->uvm_mem_va_base = parent_gpu->rm_va_size + 384 * UVM_SIZE_1TB;
|
||||
parent_gpu->uvm_mem_va_size = UVM_MEM_VA_SIZE;
|
||||
|
||||
// See uvm_mmu.h for mapping placement
|
||||
parent_gpu->flat_vidmem_va_base = (64 * UVM_SIZE_1PB) + (32 * UVM_SIZE_1TB);
|
||||
|
||||
// TODO: Bug 3953852: Set this to true pending Blackwell changes
|
||||
parent_gpu->ce_phys_vidmem_write_supported = !uvm_parent_gpu_is_coherent(parent_gpu);
|
||||
parent_gpu->ce_phys_vidmem_write_supported = true;
|
||||
|
||||
parent_gpu->peer_copy_mode = g_uvm_global.peer_copy_mode;
|
||||
|
||||
@ -102,4 +104,6 @@ void uvm_hal_blackwell_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
parent_gpu->plc_supported = true;
|
||||
|
||||
parent_gpu->no_ats_range_required = true;
|
||||
|
||||
parent_gpu->conf_computing.per_channel_key_rotation = true;
|
||||
}
|
||||
|
@ -37,7 +37,6 @@
|
||||
#include "uvm_hal_types.h"
|
||||
#include "uvm_blackwell_fault_buffer.h"
|
||||
#include "hwref/blackwell/gb100/dev_fault.h"
|
||||
#include "hwref/blackwell/gb100/dev_mmu.h"
|
||||
|
||||
static uvm_mmu_mode_hal_t blackwell_mmu_mode_hal;
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2023 NVIDIA Corporation
|
||||
Copyright (c) 2015-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -38,6 +38,7 @@
|
||||
#include "clb06f.h"
|
||||
#include "uvm_conf_computing.h"
|
||||
|
||||
|
||||
// WLC push is decrypted by SEC2 or CE (in WLC schedule).
|
||||
// In sysmem it's followed by auth tag.
|
||||
#define WLC_PUSHBUFFER_ALIGNMENT max3(UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT, \
|
||||
@ -97,6 +98,31 @@ typedef enum
|
||||
UVM_CHANNEL_UPDATE_MODE_FORCE_ALL
|
||||
} uvm_channel_update_mode_t;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
// Reserve an entry that is expected to use p2p operations.
|
||||
UVM_CHANNEL_RESERVE_WITH_P2P,
|
||||
|
||||
// Reserve an entry that is not expected to use p2p operations.
|
||||
UVM_CHANNEL_RESERVE_NO_P2P,
|
||||
} uvm_channel_reserve_type_t;
|
||||
|
||||
bool uvm_channel_pool_is_p2p(uvm_channel_pool_t *pool)
|
||||
{
|
||||
uvm_channel_manager_t *manager = pool->manager;
|
||||
uvm_gpu_id_t id;
|
||||
|
||||
if (manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_GPU_TO_GPU] == pool)
|
||||
return true;
|
||||
|
||||
for_each_gpu_id_in_mask(id, &manager->gpu->peer_info.peer_gpu_mask) {
|
||||
if (manager->pool_to_use.gpu_to_gpu[uvm_id_gpu_index(id)] == pool)
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool uvm_channel_pool_uses_mutex(uvm_channel_pool_t *pool)
|
||||
{
|
||||
// Work submission to proxy channels in SR-IOV heavy entails calling RM API
|
||||
@ -119,10 +145,12 @@ bool uvm_channel_pool_uses_mutex(uvm_channel_pool_t *pool)
|
||||
|
||||
static void channel_pool_lock_init(uvm_channel_pool_t *pool)
|
||||
{
|
||||
uvm_lock_order_t order = UVM_LOCK_ORDER_CHANNEL;
|
||||
uvm_lock_order_t order;
|
||||
|
||||
if (g_uvm_global.conf_computing_enabled && uvm_channel_pool_is_wlc(pool))
|
||||
order = UVM_LOCK_ORDER_WLC_CHANNEL;
|
||||
else
|
||||
order = UVM_LOCK_ORDER_CHANNEL;
|
||||
|
||||
if (uvm_channel_pool_uses_mutex(pool))
|
||||
uvm_mutex_init(&pool->mutex, order);
|
||||
@ -274,7 +302,9 @@ NvU32 uvm_channel_get_available_gpfifo_entries(uvm_channel_t *channel)
|
||||
return available;
|
||||
}
|
||||
|
||||
static bool try_claim_channel_locked(uvm_channel_t *channel, NvU32 num_gpfifo_entries)
|
||||
static bool try_claim_channel_locked(uvm_channel_t *channel,
|
||||
NvU32 num_gpfifo_entries,
|
||||
uvm_channel_reserve_type_t reserve_type)
|
||||
{
|
||||
bool claimed = false;
|
||||
|
||||
@ -283,6 +313,9 @@ static bool try_claim_channel_locked(uvm_channel_t *channel, NvU32 num_gpfifo_en
|
||||
|
||||
uvm_channel_pool_assert_locked(channel->pool);
|
||||
|
||||
if (reserve_type == UVM_CHANNEL_RESERVE_WITH_P2P && channel->suspended_p2p)
|
||||
return false;
|
||||
|
||||
if (channel_get_available_gpfifo_entries(channel) >= num_gpfifo_entries) {
|
||||
channel->current_gpfifo_count += num_gpfifo_entries;
|
||||
claimed = true;
|
||||
@ -291,12 +324,14 @@ static bool try_claim_channel_locked(uvm_channel_t *channel, NvU32 num_gpfifo_en
|
||||
return claimed;
|
||||
}
|
||||
|
||||
static bool try_claim_channel(uvm_channel_t *channel, NvU32 num_gpfifo_entries)
|
||||
static bool try_claim_channel(uvm_channel_t *channel,
|
||||
NvU32 num_gpfifo_entries,
|
||||
uvm_channel_reserve_type_t reserve_type)
|
||||
{
|
||||
bool claimed;
|
||||
|
||||
channel_pool_lock(channel->pool);
|
||||
claimed = try_claim_channel_locked(channel, num_gpfifo_entries);
|
||||
claimed = try_claim_channel_locked(channel, num_gpfifo_entries, reserve_type);
|
||||
channel_pool_unlock(channel->pool);
|
||||
|
||||
return claimed;
|
||||
@ -349,7 +384,8 @@ static bool test_claim_and_lock_channel(uvm_channel_t *channel, NvU32 num_gpfifo
|
||||
if (uvm_channel_is_locked_for_push(channel))
|
||||
return false;
|
||||
|
||||
if (try_claim_channel_locked(channel, num_gpfifo_entries)) {
|
||||
// Confidential compute is not using p2p ops, reserve without p2p
|
||||
if (try_claim_channel_locked(channel, num_gpfifo_entries, UVM_CHANNEL_RESERVE_NO_P2P)) {
|
||||
lock_channel_for_push(channel);
|
||||
return true;
|
||||
}
|
||||
@ -490,7 +526,9 @@ static NV_STATUS channel_reserve_and_lock_in_pool(uvm_channel_pool_t *pool, uvm_
|
||||
|
||||
for_each_clear_bit(index, pool->conf_computing.push_locks, pool->num_channels) {
|
||||
channel = &pool->channels[index];
|
||||
if (try_claim_channel_locked(channel, 1)) {
|
||||
|
||||
// Confidential compute is not using p2p ops, reserve without p2p
|
||||
if (try_claim_channel_locked(channel, 1, UVM_CHANNEL_RESERVE_NO_P2P)) {
|
||||
lock_channel_for_push(channel);
|
||||
goto done;
|
||||
}
|
||||
@ -529,7 +567,9 @@ done:
|
||||
}
|
||||
|
||||
// Reserve a channel in the specified pool
|
||||
static NV_STATUS channel_reserve_in_pool(uvm_channel_pool_t *pool, uvm_channel_t **channel_out)
|
||||
static NV_STATUS channel_reserve_in_pool(uvm_channel_pool_t *pool,
|
||||
uvm_channel_reserve_type_t reserve_type,
|
||||
uvm_channel_t **channel_out)
|
||||
{
|
||||
uvm_channel_t *channel;
|
||||
uvm_spin_loop_t spin;
|
||||
@ -541,7 +581,7 @@ static NV_STATUS channel_reserve_in_pool(uvm_channel_pool_t *pool, uvm_channel_t
|
||||
|
||||
uvm_for_each_channel_in_pool(channel, pool) {
|
||||
// TODO: Bug 1764953: Prefer idle/less busy channels
|
||||
if (try_claim_channel(channel, 1)) {
|
||||
if (try_claim_channel(channel, 1, reserve_type)) {
|
||||
*channel_out = channel;
|
||||
return NV_OK;
|
||||
}
|
||||
@ -554,7 +594,7 @@ static NV_STATUS channel_reserve_in_pool(uvm_channel_pool_t *pool, uvm_channel_t
|
||||
|
||||
uvm_channel_update_progress(channel);
|
||||
|
||||
if (try_claim_channel(channel, 1)) {
|
||||
if (try_claim_channel(channel, 1, reserve_type)) {
|
||||
*channel_out = channel;
|
||||
|
||||
return NV_OK;
|
||||
@ -564,6 +604,9 @@ static NV_STATUS channel_reserve_in_pool(uvm_channel_pool_t *pool, uvm_channel_t
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
if (reserve_type == UVM_CHANNEL_RESERVE_WITH_P2P && channel->suspended_p2p)
|
||||
return NV_ERR_BUSY_RETRY;
|
||||
|
||||
UVM_SPIN_LOOP(&spin);
|
||||
}
|
||||
}
|
||||
@ -575,12 +618,18 @@ static NV_STATUS channel_reserve_in_pool(uvm_channel_pool_t *pool, uvm_channel_t
|
||||
|
||||
NV_STATUS uvm_channel_reserve_type(uvm_channel_manager_t *manager, uvm_channel_type_t type, uvm_channel_t **channel_out)
|
||||
{
|
||||
uvm_channel_reserve_type_t reserve_type;
|
||||
uvm_channel_pool_t *pool = manager->pool_to_use.default_for_type[type];
|
||||
|
||||
UVM_ASSERT(pool != NULL);
|
||||
UVM_ASSERT(type < UVM_CHANNEL_TYPE_COUNT);
|
||||
|
||||
return channel_reserve_in_pool(pool, channel_out);
|
||||
if (type == UVM_CHANNEL_TYPE_GPU_TO_GPU)
|
||||
reserve_type = UVM_CHANNEL_RESERVE_WITH_P2P;
|
||||
else
|
||||
reserve_type = UVM_CHANNEL_RESERVE_NO_P2P;
|
||||
|
||||
return channel_reserve_in_pool(pool, reserve_type, channel_out);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_channel_reserve_gpu_to_gpu(uvm_channel_manager_t *manager,
|
||||
@ -596,7 +645,7 @@ NV_STATUS uvm_channel_reserve_gpu_to_gpu(uvm_channel_manager_t *manager,
|
||||
|
||||
UVM_ASSERT(pool->pool_type == UVM_CHANNEL_POOL_TYPE_CE);
|
||||
|
||||
return channel_reserve_in_pool(pool, channel_out);
|
||||
return channel_reserve_in_pool(pool, UVM_CHANNEL_RESERVE_WITH_P2P, channel_out);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_channel_manager_wait(uvm_channel_manager_t *manager)
|
||||
@ -1441,7 +1490,6 @@ void uvm_channel_end_push(uvm_push_t *push)
|
||||
bool needs_sec2_work_submit = false;
|
||||
|
||||
channel_pool_lock(channel->pool);
|
||||
|
||||
encrypt_push(push);
|
||||
|
||||
new_tracking_value = ++channel->tracking_sem.queued_value;
|
||||
@ -1523,6 +1571,7 @@ void uvm_channel_end_push(uvm_push_t *push)
|
||||
// push must be updated before that. Notably uvm_pushbuffer_end_push() has
|
||||
// to be called first.
|
||||
unlock_channel_for_push(channel);
|
||||
|
||||
channel_pool_unlock(channel->pool);
|
||||
|
||||
// This memory barrier is borrowed from CUDA, as it supposedly fixes perf
|
||||
@ -1805,13 +1854,14 @@ NV_STATUS uvm_channel_reserve(uvm_channel_t *channel, NvU32 num_gpfifo_entries)
|
||||
if (g_uvm_global.conf_computing_enabled)
|
||||
return channel_reserve_and_lock(channel, num_gpfifo_entries);
|
||||
|
||||
if (try_claim_channel(channel, num_gpfifo_entries))
|
||||
// Direct channel reservations don't use p2p
|
||||
if (try_claim_channel(channel, num_gpfifo_entries, UVM_CHANNEL_RESERVE_NO_P2P))
|
||||
return NV_OK;
|
||||
|
||||
uvm_channel_update_progress(channel);
|
||||
|
||||
uvm_spin_loop_init(&spin);
|
||||
while (!try_claim_channel(channel, num_gpfifo_entries) && status == NV_OK) {
|
||||
while (!try_claim_channel(channel, num_gpfifo_entries, UVM_CHANNEL_RESERVE_NO_P2P) && status == NV_OK) {
|
||||
UVM_SPIN_LOOP(&spin);
|
||||
status = uvm_channel_check_errors(channel);
|
||||
uvm_channel_update_progress(channel);
|
||||
@ -1825,9 +1875,11 @@ void uvm_channel_release(uvm_channel_t *channel, NvU32 num_gpfifo_entries)
|
||||
channel_pool_lock(channel->pool);
|
||||
|
||||
UVM_ASSERT(uvm_channel_is_locked_for_push(channel));
|
||||
|
||||
unlock_channel_for_push(channel);
|
||||
|
||||
UVM_ASSERT(channel->current_gpfifo_count >= num_gpfifo_entries);
|
||||
|
||||
channel->current_gpfifo_count -= num_gpfifo_entries;
|
||||
channel_pool_unlock(channel->pool);
|
||||
}
|
||||
@ -1852,6 +1904,134 @@ static uvm_gpfifo_entry_t *uvm_channel_get_first_pending_entry(uvm_channel_t *ch
|
||||
return entry;
|
||||
}
|
||||
|
||||
static NV_STATUS channel_suspend_p2p(uvm_channel_t *channel)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
|
||||
UVM_ASSERT(channel);
|
||||
UVM_ASSERT(!channel->suspended_p2p);
|
||||
|
||||
// Reserve all entries to block traffic.
|
||||
// Each channel needs 1 entry as sentinel.
|
||||
status = uvm_channel_reserve(channel, channel->num_gpfifo_entries - 1);
|
||||
|
||||
// Prevent p2p traffic from reserving entries
|
||||
if (status == NV_OK)
|
||||
channel->suspended_p2p = true;
|
||||
|
||||
// Release the entries reserved above to allow non-p2p traffic
|
||||
uvm_channel_release(channel, channel->num_gpfifo_entries - 1);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static void channel_resume_p2p(uvm_channel_t *channel)
|
||||
{
|
||||
UVM_ASSERT(channel);
|
||||
UVM_ASSERT(channel->suspended_p2p);
|
||||
|
||||
channel->suspended_p2p = false;
|
||||
}
|
||||
|
||||
static NV_STATUS channel_pool_suspend_p2p(uvm_channel_pool_t *pool)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
NvU32 i;
|
||||
|
||||
UVM_ASSERT(pool);
|
||||
UVM_ASSERT(!uvm_channel_pool_is_wlc(pool));
|
||||
UVM_ASSERT(!uvm_channel_pool_is_lcic(pool));
|
||||
|
||||
for (i = 0; i < pool->num_channels; ++i) {
|
||||
status = channel_suspend_p2p(pool->channels + i);
|
||||
if (status != NV_OK)
|
||||
break;
|
||||
}
|
||||
|
||||
// Resume suspended channels in case of error
|
||||
while ((status != NV_OK) && (i-- > 0))
|
||||
channel_resume_p2p(pool->channels + i);
|
||||
|
||||
for (i = 0; i < pool->num_channels; ++i)
|
||||
UVM_ASSERT(pool->channels[i].suspended_p2p == (status == NV_OK));
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static void channel_pool_resume_p2p(uvm_channel_pool_t *pool)
|
||||
{
|
||||
NvU32 i;
|
||||
|
||||
UVM_ASSERT(pool);
|
||||
UVM_ASSERT(!uvm_channel_pool_is_wlc(pool));
|
||||
UVM_ASSERT(!uvm_channel_pool_is_lcic(pool));
|
||||
|
||||
for (i = 0; i < pool->num_channels; ++i)
|
||||
channel_resume_p2p(pool->channels + i);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_channel_manager_suspend_p2p(uvm_channel_manager_t *channel_manager)
|
||||
{
|
||||
uvm_channel_pool_t *pool;
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_gpu_id_t gpu_id;
|
||||
DECLARE_BITMAP(suspended_pools, UVM_COPY_ENGINE_COUNT_MAX);
|
||||
|
||||
// Pools can be assigned to multiple 'pool_to_use' locations
|
||||
// Use bitmap to track which were suspended.
|
||||
bitmap_zero(suspended_pools, channel_manager->num_channel_pools);
|
||||
|
||||
for_each_gpu_id_in_mask(gpu_id, &channel_manager->gpu->peer_info.peer_gpu_mask) {
|
||||
pool = channel_manager->pool_to_use.gpu_to_gpu[uvm_id_gpu_index(gpu_id)];
|
||||
if (pool && !test_bit(uvm_channel_pool_index_in_channel_manager(pool), suspended_pools)) {
|
||||
status = channel_pool_suspend_p2p(pool);
|
||||
if (status != NV_OK)
|
||||
break;
|
||||
|
||||
__set_bit(uvm_channel_pool_index_in_channel_manager(pool), suspended_pools);
|
||||
}
|
||||
}
|
||||
|
||||
pool = channel_manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_GPU_TO_GPU];
|
||||
if (status == NV_OK && !test_bit(uvm_channel_pool_index_in_channel_manager(pool), suspended_pools)) {
|
||||
status = channel_pool_suspend_p2p(pool);
|
||||
|
||||
// Do not set the suspended_pools bit here. If status is NV_OK it's not
|
||||
// needed, otherwise it should not be set anyway.
|
||||
}
|
||||
|
||||
// Resume suspended pools in case of error
|
||||
if (status != NV_OK) {
|
||||
unsigned i;
|
||||
|
||||
for_each_set_bit(i, suspended_pools, channel_manager->num_channel_pools)
|
||||
channel_pool_resume_p2p(channel_manager->channel_pools + i);
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
void uvm_channel_manager_resume_p2p(uvm_channel_manager_t *channel_manager)
|
||||
{
|
||||
uvm_channel_pool_t *pool;
|
||||
uvm_gpu_id_t gpu_id;
|
||||
DECLARE_BITMAP(resumed_pools, UVM_COPY_ENGINE_COUNT_MAX);
|
||||
|
||||
// Pools can be assigned to multiple 'pool_to_use' locations
|
||||
// Use bitmap to track which were suspended.
|
||||
bitmap_zero(resumed_pools, channel_manager->num_channel_pools);
|
||||
|
||||
for_each_gpu_id_in_mask(gpu_id, &channel_manager->gpu->peer_info.peer_gpu_mask) {
|
||||
pool = channel_manager->pool_to_use.gpu_to_gpu[uvm_id_gpu_index(gpu_id)];
|
||||
if (pool && !test_and_set_bit(uvm_channel_pool_index_in_channel_manager(pool), resumed_pools))
|
||||
channel_pool_resume_p2p(pool);
|
||||
}
|
||||
|
||||
pool = channel_manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_GPU_TO_GPU];
|
||||
if (!test_and_set_bit(uvm_channel_pool_index_in_channel_manager(pool), resumed_pools))
|
||||
channel_pool_resume_p2p(pool);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_channel_get_status(uvm_channel_t *channel)
|
||||
{
|
||||
uvm_gpu_t *gpu;
|
||||
@ -1891,7 +2071,7 @@ NV_STATUS uvm_channel_check_errors(uvm_channel_t *channel)
|
||||
NV_STATUS status = uvm_channel_get_status(channel);
|
||||
|
||||
if (status == NV_OK)
|
||||
return NV_OK;
|
||||
return status;
|
||||
|
||||
UVM_ERR_PRINT("Detected a channel error, channel %s GPU %s\n",
|
||||
channel->name,
|
||||
@ -2134,6 +2314,7 @@ static uvmGpuTsgHandle channel_get_tsg(uvm_channel_t *channel)
|
||||
|
||||
tsg_index = uvm_channel_index_in_pool(channel);
|
||||
}
|
||||
|
||||
UVM_ASSERT(tsg_index < pool->num_tsgs);
|
||||
|
||||
return pool->tsg_handles[tsg_index];
|
||||
@ -2251,6 +2432,7 @@ static NV_STATUS channel_create(uvm_channel_pool_t *pool, uvm_channel_t *channel
|
||||
if (status != NV_OK)
|
||||
goto error;
|
||||
|
||||
channel->suspended_p2p = false;
|
||||
channel->num_gpfifo_entries = channel_pool_num_gpfifo_entries(pool);
|
||||
channel->gpfifo_entries = uvm_kvmalloc_zero(sizeof(*channel->gpfifo_entries) * channel->num_gpfifo_entries);
|
||||
if (channel->gpfifo_entries == NULL) {
|
||||
@ -2444,6 +2626,7 @@ static UVM_GPU_CHANNEL_ENGINE_TYPE pool_type_to_engine_type(uvm_channel_pool_typ
|
||||
{
|
||||
if (pool_type == UVM_CHANNEL_POOL_TYPE_SEC2)
|
||||
return UVM_GPU_CHANNEL_ENGINE_TYPE_SEC2;
|
||||
|
||||
return UVM_GPU_CHANNEL_ENGINE_TYPE_CE;
|
||||
}
|
||||
|
||||
@ -2862,10 +3045,6 @@ static void pick_ces_for_channel_types(uvm_channel_manager_t *manager,
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
// In Confidential Computing, do not mark all usable CEs, only the preferred
|
||||
// ones, because non-preferred CE channels are guaranteed to not be used.
|
||||
bool mark_all_usable_ces = !g_uvm_global.conf_computing_enabled;
|
||||
|
||||
for (i = 0; i < num_channel_types; ++i) {
|
||||
unsigned ce;
|
||||
unsigned best_ce = UVM_COPY_ENGINE_COUNT_MAX;
|
||||
@ -2875,7 +3054,10 @@ static void pick_ces_for_channel_types(uvm_channel_manager_t *manager,
|
||||
if (!ce_is_usable(ce_caps + ce))
|
||||
continue;
|
||||
|
||||
if (mark_all_usable_ces)
|
||||
// In Confidential Computing, do not mark all usable CEs, only the
|
||||
// preferred ones, because non-preferred CE channels are guaranteed
|
||||
// to not be used.
|
||||
if (!g_uvm_global.conf_computing_enabled)
|
||||
__set_bit(ce, manager->ce_mask);
|
||||
|
||||
if (best_ce == UVM_COPY_ENGINE_COUNT_MAX) {
|
||||
@ -2919,6 +3101,7 @@ static void pick_ces_conf_computing(uvm_channel_manager_t *manager,
|
||||
unsigned *preferred_ce)
|
||||
{
|
||||
unsigned best_wlc_ce;
|
||||
uvm_gpu_t *gpu = manager->gpu;
|
||||
|
||||
// The WLC type must go last so an unused CE is chosen, if available
|
||||
uvm_channel_type_t types[] = {UVM_CHANNEL_TYPE_CPU_TO_GPU,
|
||||
@ -2937,9 +3120,10 @@ static void pick_ces_conf_computing(uvm_channel_manager_t *manager,
|
||||
|
||||
best_wlc_ce = preferred_ce[UVM_CHANNEL_TYPE_WLC];
|
||||
|
||||
// TODO: Bug 4576908: in HCC, the WLC type should not share a CE with any
|
||||
// channel type other than LCIC. The assertion should be a check instead.
|
||||
UVM_ASSERT(ce_usage_count(best_wlc_ce, preferred_ce) == 0);
|
||||
// The implementation of engine-wide key rotation depends on using a
|
||||
// dedicated CE for the WLC and LCIC pools.
|
||||
if (uvm_conf_computing_is_key_rotation_enabled(gpu) && !gpu->parent->conf_computing.per_channel_key_rotation)
|
||||
UVM_ASSERT(ce_usage_count(best_wlc_ce, preferred_ce) == 0);
|
||||
}
|
||||
|
||||
static NV_STATUS channel_manager_pick_ces(uvm_channel_manager_t *manager, unsigned *preferred_ce)
|
||||
@ -2967,6 +3151,7 @@ static NV_STATUS channel_manager_pick_ces(uvm_channel_manager_t *manager, unsign
|
||||
pick_ces_conf_computing(manager, ces_caps->copyEngineCaps, preferred_ce);
|
||||
else
|
||||
pick_ces(manager, ces_caps->copyEngineCaps, preferred_ce);
|
||||
|
||||
out:
|
||||
uvm_kvfree(ces_caps);
|
||||
|
||||
@ -3000,6 +3185,8 @@ void uvm_channel_manager_set_p2p_ce(uvm_channel_manager_t *manager, uvm_gpu_t *p
|
||||
|
||||
UVM_ASSERT(manager->gpu != peer);
|
||||
UVM_ASSERT(optimal_ce < UVM_COPY_ENGINE_COUNT_MAX);
|
||||
UVM_ASSERT(manager->gpu->parent->peer_copy_mode != UVM_GPU_PEER_COPY_MODE_UNSUPPORTED);
|
||||
UVM_ASSERT(peer->parent->peer_copy_mode != UVM_GPU_PEER_COPY_MODE_UNSUPPORTED);
|
||||
|
||||
manager->pool_to_use.gpu_to_gpu[peer_gpu_index] = channel_manager_ce_pool(manager, optimal_ce);
|
||||
}
|
||||
@ -3063,7 +3250,7 @@ static void init_channel_manager_conf(uvm_channel_manager_t *manager)
|
||||
|
||||
// 2- Allocation locations
|
||||
|
||||
if (uvm_conf_computing_mode_is_hcc(gpu)) {
|
||||
if (g_uvm_global.conf_computing_enabled) {
|
||||
UVM_ASSERT(gpu->mem_info.size > 0);
|
||||
|
||||
// When the Confidential Computing feature is enabled, the GPU is
|
||||
@ -3290,7 +3477,7 @@ static NV_STATUS setup_wlc_schedule(uvm_channel_t *wlc)
|
||||
// WLC can only process one job at a time.
|
||||
// Prune any initialization entries and block all but one (+1 for sentinel)
|
||||
uvm_channel_update_progress(wlc);
|
||||
if (!try_claim_channel(wlc, wlc->num_gpfifo_entries - 2)) {
|
||||
if (!try_claim_channel(wlc, wlc->num_gpfifo_entries - 2, UVM_CHANNEL_RESERVE_NO_P2P)) {
|
||||
status = NV_ERR_INVALID_STATE;
|
||||
goto free_gpfifo_entries;
|
||||
}
|
||||
@ -3437,7 +3624,7 @@ static NV_STATUS setup_lcic_schedule(uvm_channel_t *paired_wlc, uvm_channel_t *l
|
||||
// Prune any initialization entries and
|
||||
// block all gpfifo entries (-1 for sentinel)
|
||||
uvm_channel_update_progress(lcic);
|
||||
if (!try_claim_channel(lcic, lcic->num_gpfifo_entries - 1)) {
|
||||
if (!try_claim_channel(lcic, lcic->num_gpfifo_entries - 1, UVM_CHANNEL_RESERVE_NO_P2P)) {
|
||||
status = NV_ERR_INVALID_STATE;
|
||||
goto free_gpfifo_entries;
|
||||
}
|
||||
@ -3700,6 +3887,7 @@ static void channel_manager_destroy_pools(uvm_channel_manager_t *manager)
|
||||
{
|
||||
uvm_rm_mem_free(manager->gpu->conf_computing.iv_rm_mem);
|
||||
manager->gpu->conf_computing.iv_rm_mem = NULL;
|
||||
|
||||
while (manager->num_channel_pools > 0)
|
||||
channel_pool_destroy(manager->channel_pools + manager->num_channel_pools - 1);
|
||||
|
||||
@ -3856,6 +4044,7 @@ const char *uvm_channel_type_to_string(uvm_channel_type_t channel_type)
|
||||
|
||||
const char *uvm_channel_pool_type_to_string(uvm_channel_pool_type_t channel_pool_type)
|
||||
{
|
||||
|
||||
BUILD_BUG_ON(UVM_CHANNEL_POOL_TYPE_COUNT != 5);
|
||||
|
||||
switch (channel_pool_type) {
|
||||
@ -3870,17 +4059,21 @@ const char *uvm_channel_pool_type_to_string(uvm_channel_pool_type_t channel_pool
|
||||
|
||||
static const char *get_gpfifo_location_string(uvm_channel_t *channel)
|
||||
{
|
||||
|
||||
// SEC2 channels override the channel manager location for GPFIFO.
|
||||
if (uvm_channel_is_sec2(channel))
|
||||
return buffer_location_to_string(UVM_BUFFER_LOCATION_SYS);
|
||||
|
||||
return buffer_location_to_string(channel->pool->manager->conf.gpfifo_loc);
|
||||
}
|
||||
|
||||
static const char *get_gpput_location_string(uvm_channel_t *channel)
|
||||
{
|
||||
|
||||
// SEC2 channels override the channel manager location for GPPUT.
|
||||
if (uvm_channel_is_sec2(channel))
|
||||
return buffer_location_to_string(UVM_BUFFER_LOCATION_SYS);
|
||||
|
||||
return buffer_location_to_string(channel->pool->manager->conf.gpput_loc);
|
||||
}
|
||||
|
||||
|
@ -200,6 +200,7 @@ typedef struct
|
||||
// num_tsgs is 1. Pre-Volta GPUs also have a single TSG object, but since HW
|
||||
// does not support TSG for CE engines, a HW TSG is not created, but a TSG
|
||||
// object is required to allocate channels.
|
||||
//
|
||||
// When Confidential Computing mode is enabled, the WLC and LCIC channel
|
||||
// types require one TSG for each WLC/LCIC pair of channels. In this case,
|
||||
// we do not use a TSG per channel pool, but instead a TSG per WLC/LCIC
|
||||
@ -416,6 +417,8 @@ struct uvm_channel_struct
|
||||
struct list_head channel_list_node;
|
||||
NvU32 pending_event_count;
|
||||
} tools;
|
||||
|
||||
bool suspended_p2p;
|
||||
};
|
||||
|
||||
struct uvm_channel_manager_struct
|
||||
@ -478,6 +481,12 @@ struct uvm_channel_manager_struct
|
||||
} conf_computing;
|
||||
};
|
||||
|
||||
// Index of a channel pool within the manager
|
||||
static unsigned uvm_channel_pool_index_in_channel_manager(const uvm_channel_pool_t *pool)
|
||||
{
|
||||
return pool - pool->manager->channel_pools;
|
||||
}
|
||||
|
||||
// Create a channel manager for the GPU
|
||||
NV_STATUS uvm_channel_manager_create(uvm_gpu_t *gpu, uvm_channel_manager_t **manager_out);
|
||||
|
||||
@ -532,6 +541,8 @@ NvU64 uvm_channel_get_static_pb_unprotected_sysmem_gpu_va(uvm_channel_t *channel
|
||||
|
||||
char* uvm_channel_get_static_pb_unprotected_sysmem_cpu(uvm_channel_t *channel);
|
||||
|
||||
bool uvm_channel_pool_is_p2p(uvm_channel_pool_t *pool);
|
||||
|
||||
static bool uvm_channel_pool_is_proxy(uvm_channel_pool_t *pool)
|
||||
{
|
||||
UVM_ASSERT(uvm_pool_type_is_valid(pool->pool_type));
|
||||
@ -549,6 +560,11 @@ static bool uvm_channel_pool_is_ce(uvm_channel_pool_t *pool)
|
||||
return !uvm_channel_pool_is_sec2(pool);
|
||||
}
|
||||
|
||||
static bool uvm_channel_is_p2p(uvm_channel_t *channel)
|
||||
{
|
||||
return uvm_channel_pool_is_p2p(channel->pool);
|
||||
}
|
||||
|
||||
static bool uvm_channel_is_ce(uvm_channel_t *channel)
|
||||
{
|
||||
return uvm_channel_pool_is_ce(channel->pool);
|
||||
@ -584,14 +600,25 @@ bool uvm_channel_is_privileged(uvm_channel_t *channel);
|
||||
// Destroy the channel manager
|
||||
void uvm_channel_manager_destroy(uvm_channel_manager_t *channel_manager);
|
||||
|
||||
// Suspend p2p traffic on channels used for p2p operations.
|
||||
// This is used in STO recovery sequence to quiet nvlink traffic before the
|
||||
// links can be restored.
|
||||
NV_STATUS uvm_channel_manager_suspend_p2p(uvm_channel_manager_t *channel_manager);
|
||||
|
||||
// Resume p2p traffic on channels used for p2p operations.
|
||||
// This is used at the end of the STO recovery sequence to resume suspended p2p
|
||||
// traffic on p2p channels.
|
||||
void uvm_channel_manager_resume_p2p(uvm_channel_manager_t *channel_manager);
|
||||
|
||||
// Get the current status of the channel
|
||||
// Returns NV_OK if the channel is in a good state and NV_ERR_RC_ERROR
|
||||
// otherwise. Notably this never sets the global fatal error.
|
||||
// Returns NV_OK if the channel is in a good state,
|
||||
// NV_ERR_RC_ERROR otherwise.
|
||||
// Notably this never sets the global fatal error.
|
||||
NV_STATUS uvm_channel_get_status(uvm_channel_t *channel);
|
||||
|
||||
// Check for channel errors
|
||||
// Checks for channel errors by calling uvm_channel_get_status(). If an error
|
||||
// occurred, sets the global fatal error and prints errors.
|
||||
// Checks for channel errors by calling uvm_channel_get_status().
|
||||
// If a fatal error occurred, sets the global fatal error and prints errors.
|
||||
NV_STATUS uvm_channel_check_errors(uvm_channel_t *channel);
|
||||
|
||||
// Check errors on all channels in the channel manager
|
||||
@ -625,6 +652,7 @@ static bool uvm_channel_manager_is_wlc_ready(uvm_channel_manager_t *manager)
|
||||
{
|
||||
return manager->conf_computing.wlc_ready;
|
||||
}
|
||||
|
||||
// Get the GPU VA of semaphore_channel's tracking semaphore within the VA space
|
||||
// associated with access_channel.
|
||||
//
|
||||
|
@ -206,9 +206,10 @@ static NV_STATUS uvm_test_rc_for_gpu(uvm_gpu_t *gpu)
|
||||
uvm_for_each_pool(pool, manager) {
|
||||
uvm_channel_t *channel;
|
||||
|
||||
// Skip LCIC channels as those can't accept any pushes
|
||||
if (uvm_channel_pool_is_lcic(pool))
|
||||
continue;
|
||||
// Skip LCIC channels as those can't accept any pushes
|
||||
if (uvm_channel_pool_is_lcic(pool))
|
||||
continue;
|
||||
|
||||
uvm_for_each_channel_in_pool(channel, pool) {
|
||||
NvU32 i;
|
||||
for (i = 0; i < 512; ++i) {
|
||||
@ -1292,6 +1293,7 @@ static NV_STATUS test_write_ctrl_gpfifo_noop(uvm_va_space_t *va_space)
|
||||
// after their schedule is set up
|
||||
if (uvm_channel_pool_is_wlc(pool))
|
||||
continue;
|
||||
|
||||
uvm_for_each_channel_in_pool(channel, pool) {
|
||||
NvU32 i;
|
||||
|
||||
@ -1331,6 +1333,7 @@ static NV_STATUS test_write_ctrl_gpfifo_and_pushes(uvm_va_space_t *va_space)
|
||||
// after their schedule is set up
|
||||
if (uvm_channel_pool_is_wlc(pool))
|
||||
continue;
|
||||
|
||||
uvm_for_each_channel_in_pool(channel, pool) {
|
||||
NvU32 i;
|
||||
uvm_push_t push;
|
||||
@ -1473,6 +1476,7 @@ static NV_STATUS test_channel_pushbuffer_extension_base(uvm_va_space_t *va_space
|
||||
// Skip LCIC channels as those can't accept any pushes
|
||||
if (uvm_channel_pool_is_lcic(pool))
|
||||
continue;
|
||||
|
||||
uvm_for_each_channel_in_pool(channel, pool) {
|
||||
NvU32 i;
|
||||
uvm_push_t push;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2021-2023 NVIDIA Corporation
|
||||
Copyright (c) 2021-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -65,32 +65,13 @@ static ulong uvm_conf_computing_channel_iv_rotation_limit = UVM_CONF_COMPUTING_I
|
||||
|
||||
module_param(uvm_conf_computing_channel_iv_rotation_limit, ulong, S_IRUGO);
|
||||
|
||||
static UvmGpuConfComputeMode uvm_conf_computing_get_mode(const uvm_parent_gpu_t *parent)
|
||||
{
|
||||
return parent->rm_info.gpuConfComputeCaps.mode;
|
||||
}
|
||||
|
||||
bool uvm_conf_computing_mode_is_hcc(const uvm_gpu_t *gpu)
|
||||
{
|
||||
return uvm_conf_computing_get_mode(gpu->parent) == UVM_GPU_CONF_COMPUTE_MODE_HCC;
|
||||
}
|
||||
|
||||
void uvm_conf_computing_check_parent_gpu(const uvm_parent_gpu_t *parent)
|
||||
{
|
||||
uvm_parent_gpu_t *other_parent;
|
||||
UvmGpuConfComputeMode parent_mode = uvm_conf_computing_get_mode(parent);
|
||||
|
||||
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
|
||||
|
||||
// The Confidential Computing state of the GPU should match that of the
|
||||
// system.
|
||||
UVM_ASSERT((parent_mode != UVM_GPU_CONF_COMPUTE_MODE_NONE) == g_uvm_global.conf_computing_enabled);
|
||||
|
||||
// All GPUs derive Confidential Computing status from their parent. By
|
||||
// current policy all parent GPUs have identical Confidential Computing
|
||||
// status.
|
||||
for_each_parent_gpu(other_parent)
|
||||
UVM_ASSERT(parent_mode == uvm_conf_computing_get_mode(other_parent));
|
||||
// Confidential Computing enablement on the system should match enablement
|
||||
// on the GPU.
|
||||
UVM_ASSERT(parent->rm_info.gpuConfComputeCaps.bConfComputingEnabled == g_uvm_global.conf_computing_enabled);
|
||||
}
|
||||
|
||||
static void dma_buffer_destroy_locked(uvm_conf_computing_dma_buffer_pool_t *dma_buffer_pool,
|
||||
@ -343,9 +324,6 @@ static NV_STATUS dummy_iv_mem_init(uvm_gpu_t *gpu)
|
||||
{
|
||||
NV_STATUS status;
|
||||
|
||||
if (!uvm_conf_computing_mode_is_hcc(gpu))
|
||||
return NV_OK;
|
||||
|
||||
status = uvm_mem_alloc_sysmem_dma(sizeof(UvmCslIv), gpu, NULL, &gpu->conf_computing.iv_mem);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2021-2023 NVIDIA Corporation
|
||||
Copyright (c) 2021-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -62,8 +62,6 @@
|
||||
|
||||
void uvm_conf_computing_check_parent_gpu(const uvm_parent_gpu_t *parent);
|
||||
|
||||
bool uvm_conf_computing_mode_is_hcc(const uvm_gpu_t *gpu);
|
||||
|
||||
typedef struct
|
||||
{
|
||||
// List of free DMA buffers (uvm_conf_computing_dma_buffer_t).
|
||||
|
@ -54,6 +54,9 @@ static NV_STATUS uvm_register_callbacks(void)
|
||||
g_exported_uvm_ops.stopDevice = NULL;
|
||||
g_exported_uvm_ops.isrTopHalf = uvm_isr_top_half_entry;
|
||||
|
||||
g_exported_uvm_ops.drainP2P = uvm_suspend_and_drainP2P_entry;
|
||||
g_exported_uvm_ops.resumeP2P = uvm_resumeP2P_entry;
|
||||
|
||||
// Register the UVM callbacks with the main GPU driver:
|
||||
status = uvm_rm_locked_call(nvUvmInterfaceRegisterUvmCallbacks(&g_exported_uvm_ops));
|
||||
if (status != NV_OK)
|
||||
@ -100,12 +103,6 @@ NV_STATUS uvm_global_init(void)
|
||||
goto error;
|
||||
}
|
||||
|
||||
status = errno_to_nv_status(nv_kthread_q_init(&g_uvm_global.deferred_release_q, "UVM deferred release queue"));
|
||||
if (status != NV_OK) {
|
||||
UVM_DBG_PRINT("nv_kthread_q_init() failed: %s\n", nvstatusToString(status));
|
||||
goto error;
|
||||
}
|
||||
|
||||
status = uvm_procfs_init();
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("uvm_procfs_init() failed: %s\n", nvstatusToString(status));
|
||||
@ -194,15 +191,22 @@ NV_STATUS uvm_global_init(void)
|
||||
goto error;
|
||||
}
|
||||
|
||||
// This sets up the ISR (interrupt service routine), by hooking into RM's top-half ISR callback. As soon as this
|
||||
// call completes, GPU interrupts will start arriving, so it's important to be prepared to receive interrupts before
|
||||
// this point:
|
||||
// This sets up the ISR (interrupt service routine), by hooking into RM's
|
||||
// top-half ISR callback. As soon as this call completes, GPU interrupts
|
||||
// will start arriving, so it's important to be prepared to receive
|
||||
// interrupts before this point.
|
||||
status = uvm_register_callbacks();
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("uvm_register_callbacks failed: %s\n", nvstatusToString(status));
|
||||
goto error;
|
||||
}
|
||||
|
||||
status = errno_to_nv_status(nv_kthread_q_init(&g_uvm_global.deferred_release_q, "UVM deferred release queue"));
|
||||
if (status != NV_OK) {
|
||||
UVM_DBG_PRINT("nv_kthread_q_init() failed: %s\n", nvstatusToString(status));
|
||||
goto error;
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
|
||||
error:
|
||||
@ -214,9 +218,7 @@ void uvm_global_exit(void)
|
||||
{
|
||||
uvm_assert_mutex_unlocked(&g_uvm_global.global_lock);
|
||||
|
||||
// Guarantee completion of any release callbacks scheduled after the flush
|
||||
// in uvm_resume().
|
||||
nv_kthread_q_flush(&g_uvm_global.deferred_release_q);
|
||||
nv_kthread_q_stop(&g_uvm_global.deferred_release_q);
|
||||
|
||||
uvm_unregister_callbacks();
|
||||
uvm_service_block_context_exit();
|
||||
@ -237,7 +239,6 @@ void uvm_global_exit(void)
|
||||
|
||||
uvm_procfs_exit();
|
||||
|
||||
nv_kthread_q_stop(&g_uvm_global.deferred_release_q);
|
||||
nv_kthread_q_stop(&g_uvm_global.global_q);
|
||||
|
||||
uvm_assert_mutex_unlocked(&g_uvm_global.va_spaces.lock);
|
||||
@ -472,3 +473,68 @@ NV_STATUS uvm_global_gpu_check_ecc_error(uvm_processor_mask_t *gpus)
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS suspend_and_drainP2P(const NvProcessorUuid *parent_uuid)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_parent_gpu_t *parent_gpu;
|
||||
|
||||
uvm_mutex_lock(&g_uvm_global.global_lock);
|
||||
|
||||
// NVLINK STO recovery is not supported in combination with MIG
|
||||
parent_gpu = uvm_parent_gpu_get_by_uuid(parent_uuid);
|
||||
if (!parent_gpu || parent_gpu->smc.enabled) {
|
||||
status = NV_ERR_INVALID_DEVICE;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
status = uvm_channel_manager_suspend_p2p(parent_gpu->gpus[0]->channel_manager);
|
||||
|
||||
unlock:
|
||||
uvm_mutex_unlock(&g_uvm_global.global_lock);
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS resumeP2P(const NvProcessorUuid *parent_uuid)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_parent_gpu_t *parent_gpu;
|
||||
|
||||
uvm_mutex_lock(&g_uvm_global.global_lock);
|
||||
|
||||
// NVLINK STO recovery is not supported in combination with MIG
|
||||
parent_gpu = uvm_parent_gpu_get_by_uuid(parent_uuid);
|
||||
if (!parent_gpu || parent_gpu->smc.enabled) {
|
||||
status = NV_ERR_INVALID_DEVICE;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
uvm_channel_manager_resume_p2p(parent_gpu->gpus[0]->channel_manager);
|
||||
|
||||
unlock:
|
||||
uvm_mutex_unlock(&g_uvm_global.global_lock);
|
||||
return status;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_suspend_and_drainP2P_entry(const NvProcessorUuid *uuid)
|
||||
{
|
||||
UVM_ENTRY_RET(suspend_and_drainP2P(uuid));
|
||||
}
|
||||
|
||||
NV_STATUS uvm_resumeP2P_entry(const NvProcessorUuid *uuid)
|
||||
{
|
||||
UVM_ENTRY_RET(resumeP2P(uuid));
|
||||
}
|
||||
|
||||
NV_STATUS uvm_global_gpu_check_nvlink_error(uvm_processor_mask_t *gpus)
|
||||
{
|
||||
uvm_gpu_t *gpu;
|
||||
|
||||
for_each_gpu_in_mask(gpu, gpus) {
|
||||
NV_STATUS status = uvm_gpu_check_nvlink_error(gpu);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
@ -171,6 +171,12 @@ NV_STATUS uvm_suspend_entry(void);
|
||||
// Recover after exit from a system sleep state
|
||||
NV_STATUS uvm_resume_entry(void);
|
||||
|
||||
// Block all P2P traffic on the GPU's channels
|
||||
NV_STATUS uvm_suspend_and_drainP2P_entry(const NvProcessorUuid *uuid);
|
||||
|
||||
// Resume P2P traffic on the GPU's channels
|
||||
NV_STATUS uvm_resumeP2P_entry(const NvProcessorUuid *uuid);
|
||||
|
||||
// Add parent GPU to the global table.
|
||||
//
|
||||
// LOCKING: requires that you hold the global lock and gpu_table_lock
|
||||
@ -300,7 +306,7 @@ static uvm_gpu_t *uvm_processor_mask_find_first_gpu(const uvm_processor_mask_t *
|
||||
return gpu;
|
||||
}
|
||||
|
||||
static uvm_gpu_t *__uvm_processor_mask_find_next_gpu(const uvm_processor_mask_t *gpus, uvm_gpu_t *gpu)
|
||||
static uvm_gpu_t *uvm_processor_mask_find_next_gpu(const uvm_processor_mask_t *gpus, uvm_gpu_t *gpu)
|
||||
{
|
||||
uvm_gpu_id_t gpu_id;
|
||||
|
||||
@ -322,7 +328,45 @@ static uvm_gpu_t *__uvm_processor_mask_find_next_gpu(const uvm_processor_mask_t
|
||||
#define for_each_gpu_in_mask(gpu, mask) \
|
||||
for (gpu = uvm_processor_mask_find_first_gpu(mask); \
|
||||
gpu != NULL; \
|
||||
gpu = __uvm_processor_mask_find_next_gpu(mask, gpu))
|
||||
gpu = uvm_processor_mask_find_next_gpu(mask, gpu))
|
||||
|
||||
static uvm_parent_gpu_t *uvm_parent_processor_mask_find_first_gpu(const uvm_parent_processor_mask_t *mask)
|
||||
{
|
||||
uvm_parent_gpu_t *parent_gpu;
|
||||
uvm_parent_gpu_id_t parent_id = uvm_parent_processor_mask_find_first_gpu_id(mask);
|
||||
|
||||
if (UVM_PARENT_ID_IS_INVALID(parent_id))
|
||||
return NULL;
|
||||
|
||||
parent_gpu = uvm_parent_gpu_get(parent_id);
|
||||
|
||||
// See comment in uvm_processor_mask_find_first_gpu().
|
||||
UVM_ASSERT_MSG(parent_gpu, "parent_id %u\n", uvm_parent_id_value(parent_id));
|
||||
return parent_gpu;
|
||||
}
|
||||
|
||||
static uvm_parent_gpu_t *uvm_parent_processor_mask_find_next_gpu(const uvm_parent_processor_mask_t *mask,
|
||||
uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
uvm_parent_gpu_id_t parent_id;
|
||||
|
||||
UVM_ASSERT(parent_gpu);
|
||||
parent_id = uvm_parent_processor_mask_find_next_gpu_id(mask, uvm_parent_id_next(parent_gpu->id));
|
||||
if (UVM_PARENT_ID_IS_INVALID(parent_id))
|
||||
return NULL;
|
||||
|
||||
parent_gpu = uvm_parent_gpu_get(parent_id);
|
||||
|
||||
// See comment in uvm_processor_mask_find_first_gpu().
|
||||
UVM_ASSERT_MSG(parent_gpu, "parent_id %u\n", uvm_parent_id_value(parent_id));
|
||||
return parent_gpu;
|
||||
}
|
||||
|
||||
// Helper to iterate over all parent GPUs in the input mask
|
||||
#define for_each_parent_gpu_in_mask(parent_gpu, mask) \
|
||||
for ((parent_gpu) = uvm_parent_processor_mask_find_first_gpu((mask)); \
|
||||
(parent_gpu); \
|
||||
(parent_gpu) = uvm_parent_processor_mask_find_next_gpu((mask), (parent_gpu)))
|
||||
|
||||
// Helper to iterate over all GPUs retained by the UVM driver
|
||||
// (across all va spaces).
|
||||
@ -330,7 +374,7 @@ static uvm_gpu_t *__uvm_processor_mask_find_next_gpu(const uvm_processor_mask_t
|
||||
for (({uvm_assert_mutex_locked(&g_uvm_global.global_lock); \
|
||||
gpu = uvm_processor_mask_find_first_gpu(&g_uvm_global.retained_gpus);}); \
|
||||
gpu != NULL; \
|
||||
gpu = __uvm_processor_mask_find_next_gpu(&g_uvm_global.retained_gpus, gpu))
|
||||
gpu = uvm_processor_mask_find_next_gpu(&g_uvm_global.retained_gpus, gpu))
|
||||
|
||||
// LOCKING: Must hold either the global_lock or the gpu_table_lock
|
||||
static uvm_parent_gpu_t *uvm_global_find_next_parent_gpu(uvm_parent_gpu_t *parent_gpu)
|
||||
@ -407,6 +451,10 @@ void uvm_global_gpu_release(const uvm_processor_mask_t *mask);
|
||||
// Notably this check cannot be performed where it's not safe to call into RM.
|
||||
NV_STATUS uvm_global_gpu_check_ecc_error(uvm_processor_mask_t *gpus);
|
||||
|
||||
// Check for nvlink errors for all GPUs in a mask
|
||||
// Notably this check cannot be performed where it's not safe to call into RM.
|
||||
NV_STATUS uvm_global_gpu_check_nvlink_error(uvm_processor_mask_t *gpus);
|
||||
|
||||
// Pre-allocate fault service contexts.
|
||||
NV_STATUS uvm_service_block_context_init(void);
|
||||
|
||||
|
@ -134,11 +134,13 @@ static NV_STATUS get_gpu_caps(uvm_gpu_t *gpu)
|
||||
|
||||
if (gpu_caps.numaEnabled) {
|
||||
UVM_ASSERT(uvm_parent_gpu_is_coherent(gpu->parent));
|
||||
|
||||
gpu->mem_info.numa.enabled = true;
|
||||
gpu->mem_info.numa.node_id = gpu_caps.numaNodeId;
|
||||
}
|
||||
else {
|
||||
UVM_ASSERT(!uvm_parent_gpu_is_coherent(gpu->parent));
|
||||
gpu->mem_info.numa.node_id = NUMA_NO_NODE;
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
@ -245,7 +247,7 @@ static NV_STATUS get_gpu_fb_info(uvm_gpu_t *gpu)
|
||||
}
|
||||
|
||||
gpu->mem_info.max_vidmem_page_size = fb_info.maxVidmemPageSize;
|
||||
gpu->mem_info.static_bar1_start = pci_bar1_addr;
|
||||
gpu->mem_info.static_bar1_start = pci_bar1_addr + fb_info.staticBar1StartOffset;
|
||||
gpu->mem_info.static_bar1_size = fb_info.staticBar1Size;
|
||||
|
||||
return NV_OK;
|
||||
@ -275,6 +277,55 @@ static NV_STATUS get_gpu_ecc_info(uvm_gpu_t *gpu)
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS get_gpu_nvlink_info(uvm_gpu_t *gpu)
|
||||
{
|
||||
NV_STATUS status;
|
||||
UvmGpuNvlinkInfo nvlink_info = {0};
|
||||
|
||||
status = uvm_rm_locked_call(nvUvmInterfaceGetNvlinkInfo(uvm_gpu_device_handle(gpu), &nvlink_info));
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
atomic_set(&gpu->nvlink_status.injected_error, UVM_TEST_NVLINK_ERROR_NONE);
|
||||
|
||||
gpu->nvlink_status.enabled = nvlink_info.bNvlinkRecoveryEnabled;
|
||||
if (gpu->nvlink_status.enabled) {
|
||||
gpu->nvlink_status.hw_interrupt_tree_location =
|
||||
(volatile NvU32*)((char*)nvlink_info.nvlinkReadLocation + nvlink_info.nvlinkOffset);
|
||||
UVM_ASSERT(gpu->nvlink_status.hw_interrupt_tree_location != NULL);
|
||||
|
||||
gpu->nvlink_status.mask = nvlink_info.nvlinkMask;
|
||||
UVM_ASSERT(gpu->nvlink_status.mask != 0);
|
||||
|
||||
gpu->nvlink_status.error_notifier = nvlink_info.nvlinkErrorNotifier;
|
||||
UVM_ASSERT(gpu->nvlink_status.error_notifier != NULL);
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_gpu_inject_nvlink_error(uvm_gpu_t *gpu, UVM_TEST_NVLINK_ERROR_TYPE error_type)
|
||||
{
|
||||
UVM_ASSERT(gpu);
|
||||
|
||||
if (!uvm_enable_builtin_tests)
|
||||
return NV_ERR_NOT_SUPPORTED;
|
||||
|
||||
switch (error_type)
|
||||
{
|
||||
case UVM_TEST_NVLINK_ERROR_NONE:
|
||||
case UVM_TEST_NVLINK_ERROR_RESOLVED:
|
||||
case UVM_TEST_NVLINK_ERROR_UNRESOLVED:
|
||||
break;
|
||||
|
||||
default:
|
||||
return NV_ERR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
atomic_set(&gpu->nvlink_status.injected_error, error_type);
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static bool gpu_supports_uvm(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
// TODO: Bug 1757136: Add Linux SLI support. Until then, explicitly disable
|
||||
@ -490,6 +541,7 @@ static void gpu_info_print_common(uvm_gpu_t *gpu, struct seq_file *s)
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "GPU %s\n", uvm_gpu_name(gpu));
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "retained_count %llu\n", uvm_gpu_retained_count(gpu));
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "ecc %s\n", gpu->ecc.enabled ? "enabled" : "disabled");
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "nvlink status and recovery %s\n", gpu->nvlink_status.enabled ? "enabled" : "disabled");
|
||||
if (gpu->parent->closest_cpu_numa_node == -1)
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "closest_cpu_numa_node n/a\n");
|
||||
else
|
||||
@ -1269,6 +1321,40 @@ NV_STATUS uvm_gpu_check_ecc_error(uvm_gpu_t *gpu)
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_gpu_check_nvlink_error(uvm_gpu_t *gpu)
|
||||
{
|
||||
NV_STATUS status = uvm_gpu_check_nvlink_error_no_rm(gpu);
|
||||
|
||||
// Either NV_OK, or a resolved error code.
|
||||
if (status != NV_WARN_MORE_PROCESSING_REQUIRED)
|
||||
return status;
|
||||
|
||||
// An interrupt that might mean an NVLINK error needs to be serviced.
|
||||
UVM_ASSERT(status == NV_WARN_MORE_PROCESSING_REQUIRED);
|
||||
|
||||
status = service_interrupts(gpu->parent);
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("Servicing interrupts failed: %s, GPU %s\n", nvstatusToString(status), uvm_gpu_name(gpu));
|
||||
return status;
|
||||
}
|
||||
|
||||
// Check injected error if present, this works even if nvlink_status is not
|
||||
// supported.
|
||||
if (uvm_enable_builtin_tests) {
|
||||
if (atomic_read(&gpu->nvlink_status.injected_error) != UVM_TEST_NVLINK_ERROR_NONE)
|
||||
return NV_ERR_MEMORY_ERROR;
|
||||
|
||||
UVM_ASSERT(gpu->nvlink_status.error_notifier);
|
||||
}
|
||||
|
||||
// After servicing interrupts, the NVLINK error notifier should be current.
|
||||
if (*gpu->nvlink_status.error_notifier) {
|
||||
UVM_ERR_PRINT("NVLINK error encountered, GPU %s\n", uvm_gpu_name(gpu));
|
||||
return NV_ERR_MEMORY_ERROR;
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
static NV_STATUS init_parent_gpu(uvm_parent_gpu_t *parent_gpu,
|
||||
const NvProcessorUuid *gpu_uuid,
|
||||
const UvmGpuInfo *gpu_info,
|
||||
@ -1294,6 +1380,9 @@ static NV_STATUS init_parent_gpu(uvm_parent_gpu_t *parent_gpu,
|
||||
parent_gpu->closest_cpu_numa_node = dev_to_node(&parent_gpu->pci_dev->dev);
|
||||
parent_gpu->dma_addressable_start = gpu_platform_info->dma_addressable_start;
|
||||
parent_gpu->dma_addressable_limit = gpu_platform_info->dma_addressable_limit;
|
||||
parent_gpu->egm.enabled = gpu_info->egmEnabled;
|
||||
parent_gpu->egm.local_peer_id = gpu_info->egmPeerId;
|
||||
parent_gpu->egm.base_address = gpu_info->egmBaseAddr;
|
||||
|
||||
parent_gpu->sli_enabled = (gpu_info->subdeviceCount > 1);
|
||||
|
||||
@ -1397,7 +1486,8 @@ static NV_STATUS init_gpu(uvm_gpu_t *gpu, const UvmGpuInfo *gpu_info)
|
||||
|
||||
status = get_gpu_caps(gpu);
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("Failed to get GPU caps: %s, GPU %s\n", nvstatusToString(status), uvm_gpu_name(gpu));
|
||||
if (status != NV_ERR_NVLINK_FABRIC_NOT_READY)
|
||||
UVM_ERR_PRINT("Failed to get GPU caps: %s, GPU %s\n", nvstatusToString(status), uvm_gpu_name(gpu));
|
||||
return status;
|
||||
}
|
||||
|
||||
@ -1421,6 +1511,12 @@ static NV_STATUS init_gpu(uvm_gpu_t *gpu, const UvmGpuInfo *gpu_info)
|
||||
return status;
|
||||
}
|
||||
|
||||
status = get_gpu_nvlink_info(gpu);
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("Failed to get GPU NVLINK RECOVERY info: %s, GPU %s\n", nvstatusToString(status), uvm_gpu_name(gpu));
|
||||
return status;
|
||||
}
|
||||
|
||||
status = uvm_pmm_gpu_init(&gpu->pmm);
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("PMM initialization failed: %s, GPU %s\n", nvstatusToString(status), uvm_gpu_name(gpu));
|
||||
@ -1974,6 +2070,56 @@ NV_STATUS uvm_gpu_check_ecc_error_no_rm(uvm_gpu_t *gpu)
|
||||
return NV_WARN_MORE_PROCESSING_REQUIRED;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_gpu_get_injected_nvlink_error(uvm_gpu_t *gpu)
|
||||
{
|
||||
if (uvm_enable_builtin_tests) {
|
||||
UVM_TEST_NVLINK_ERROR_TYPE error_type = atomic_read(&gpu->nvlink_status.injected_error);
|
||||
|
||||
if (error_type == UVM_TEST_NVLINK_ERROR_UNRESOLVED)
|
||||
return NV_WARN_MORE_PROCESSING_REQUIRED;
|
||||
|
||||
if (error_type == UVM_TEST_NVLINK_ERROR_RESOLVED)
|
||||
return NV_ERR_MEMORY_ERROR;
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_gpu_check_nvlink_error_no_rm(uvm_gpu_t *gpu)
|
||||
{
|
||||
NV_STATUS status;
|
||||
|
||||
// We may need to call service_interrupts() which cannot be done in the top
|
||||
// half interrupt handler so assert here as well to catch improper use as
|
||||
// early as possible.
|
||||
UVM_ASSERT(!in_interrupt());
|
||||
|
||||
status = uvm_gpu_get_injected_nvlink_error(gpu);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
if (!gpu->nvlink_status.enabled)
|
||||
return NV_OK;
|
||||
|
||||
if (*gpu->nvlink_status.error_notifier) {
|
||||
UVM_ERR_PRINT("NVLINK error encountered, GPU %s\n", uvm_gpu_name(gpu));
|
||||
return NV_ERR_MEMORY_ERROR;
|
||||
}
|
||||
|
||||
// RM hasn't seen an NVLINK error yet, check whether there is a pending
|
||||
// interrupt that might indicate one. We might get false positives because
|
||||
// the interrupt bits we read are not NVLINK-specific. They're just the
|
||||
// top-level bits for any interrupt on all engines which support NVLINK
|
||||
// errors.
|
||||
if ((*gpu->nvlink_status.hw_interrupt_tree_location & gpu->nvlink_status.mask) == 0) {
|
||||
// No pending interrupts.
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
// An interrupt that might mean an NVLINK error needs to be serviced, signal
|
||||
// that to the caller.
|
||||
return NV_WARN_MORE_PROCESSING_REQUIRED;
|
||||
}
|
||||
static NV_STATUS get_parent_p2p_caps(uvm_parent_gpu_t *parent_gpu0,
|
||||
uvm_parent_gpu_t *parent_gpu1,
|
||||
UvmGpuP2PCapsParams *p2p_caps_params)
|
||||
@ -2041,6 +2187,10 @@ static void set_optimal_p2p_write_ces(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1)
|
||||
NvU32 ce0, ce1;
|
||||
|
||||
UVM_ASSERT(parent_peer_caps->ref_count);
|
||||
UVM_ASSERT(gpu0->parent->peer_copy_mode == gpu1->parent->peer_copy_mode);
|
||||
|
||||
if (gpu0->parent->peer_copy_mode == UVM_GPU_PEER_COPY_MODE_UNSUPPORTED)
|
||||
return;
|
||||
|
||||
if (parent_peer_caps->link_type < UVM_GPU_LINK_NVLINK_1)
|
||||
return;
|
||||
@ -2056,7 +2206,7 @@ static void set_optimal_p2p_write_ces(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1)
|
||||
static int nv_procfs_read_parent_gpu_peer_caps(struct seq_file *s, void *v)
|
||||
{
|
||||
if (!uvm_down_read_trylock(&g_uvm_global.pm.lock))
|
||||
return -EAGAIN;
|
||||
return -EAGAIN;
|
||||
|
||||
parent_gpu_peer_caps_print((uvm_parent_gpu_t **)s->private, s);
|
||||
|
||||
@ -2161,12 +2311,17 @@ static NV_STATUS parent_peers_init(uvm_parent_gpu_t *parent_gpu0,
|
||||
parent_peer_caps->link_type = link_type;
|
||||
parent_peer_caps->total_link_line_rate_mbyte_per_s = p2p_caps_params.totalLinkLineRateMBps;
|
||||
|
||||
if (parent_gpu0->egm.enabled || parent_gpu1->egm.enabled)
|
||||
UVM_ASSERT(parent_peer_caps->link_type >= UVM_GPU_LINK_NVLINK_2);
|
||||
|
||||
// Initialize peer ids and establish peer mappings
|
||||
// Peer id from min(gpu_id0, gpu_id1) -> max(gpu_id0, gpu_id1)
|
||||
parent_peer_caps->peer_ids[0] = p2p_caps_params.peerIds[0];
|
||||
parent_peer_caps->egm_peer_ids[0] = p2p_caps_params.egmPeerIds[0];
|
||||
|
||||
// Peer id from max(gpu_id0, gpu_id1) -> min(gpu_id0, gpu_id1)
|
||||
parent_peer_caps->peer_ids[1] = p2p_caps_params.peerIds[1];
|
||||
parent_peer_caps->egm_peer_ids[1] = p2p_caps_params.egmPeerIds[1];
|
||||
|
||||
parent_peer_caps->optimalNvlinkWriteCEs[0] = p2p_caps_params.optimalNvlinkWriteCEs[0];
|
||||
parent_peer_caps->optimalNvlinkWriteCEs[1] = p2p_caps_params.optimalNvlinkWriteCEs[1];
|
||||
@ -2831,6 +2986,53 @@ uvm_aperture_t uvm_gpu_peer_aperture(uvm_gpu_t *local_gpu, uvm_gpu_t *remote_gpu
|
||||
return parent_gpu_peer_aperture(local_gpu->parent, remote_gpu->parent, parent_peer_caps);
|
||||
}
|
||||
|
||||
uvm_gpu_phys_address_t uvm_gpu_peer_phys_address(uvm_gpu_t *owning_gpu, NvU64 address, uvm_gpu_t *accessing_gpu)
|
||||
{
|
||||
uvm_aperture_t aperture = uvm_gpu_peer_aperture(accessing_gpu, owning_gpu);
|
||||
|
||||
if (uvm_parent_gpus_are_nvswitch_connected(accessing_gpu->parent, owning_gpu->parent))
|
||||
address += owning_gpu->parent->nvswitch_info.fabric_memory_window_start;
|
||||
|
||||
return uvm_gpu_phys_address(aperture, address);
|
||||
}
|
||||
|
||||
uvm_gpu_address_t uvm_gpu_peer_copy_address(uvm_gpu_t *owning_gpu, NvU64 address, uvm_gpu_t *accessing_gpu)
|
||||
{
|
||||
uvm_gpu_identity_mapping_t *gpu_peer_mapping;
|
||||
|
||||
if (accessing_gpu->parent->peer_copy_mode == UVM_GPU_PEER_COPY_MODE_PHYSICAL)
|
||||
return uvm_gpu_address_from_phys(uvm_gpu_peer_phys_address(owning_gpu, address, accessing_gpu));
|
||||
|
||||
UVM_ASSERT(accessing_gpu->parent->peer_copy_mode == UVM_GPU_PEER_COPY_MODE_VIRTUAL);
|
||||
gpu_peer_mapping = uvm_gpu_get_peer_mapping(accessing_gpu, owning_gpu->id);
|
||||
|
||||
return uvm_gpu_address_virtual(gpu_peer_mapping->base + address);
|
||||
}
|
||||
|
||||
uvm_aperture_t uvm_gpu_egm_peer_aperture(uvm_parent_gpu_t *local_gpu, uvm_parent_gpu_t *remote_gpu)
|
||||
{
|
||||
uvm_parent_gpu_peer_t *peer_caps;
|
||||
NvU8 peer_id;
|
||||
|
||||
if (local_gpu == remote_gpu) {
|
||||
UVM_ASSERT(local_gpu->egm.enabled);
|
||||
peer_id = local_gpu->egm.local_peer_id;
|
||||
}
|
||||
else {
|
||||
UVM_ASSERT(remote_gpu->egm.enabled);
|
||||
|
||||
peer_caps = parent_gpu_peer_caps(local_gpu, remote_gpu);
|
||||
|
||||
// Comparison is based on min(gpu1, gpu2) logic for peer GPUs.
|
||||
if (uvm_parent_id_cmp(local_gpu->id, remote_gpu->id) < 0)
|
||||
peer_id = peer_caps->egm_peer_ids[0];
|
||||
else
|
||||
peer_id = peer_caps->egm_peer_ids[1];
|
||||
}
|
||||
|
||||
return UVM_APERTURE_PEER(peer_id);
|
||||
}
|
||||
|
||||
NvU64 uvm_gpu_peer_ref_count(const uvm_gpu_t *gpu0, const uvm_gpu_t *gpu1)
|
||||
{
|
||||
UVM_ASSERT(!uvm_gpus_are_smc_peers(gpu0, gpu1));
|
||||
@ -2838,14 +3040,80 @@ NvU64 uvm_gpu_peer_ref_count(const uvm_gpu_t *gpu0, const uvm_gpu_t *gpu1)
|
||||
return gpu_peer_caps(gpu0, gpu1)->ref_count;
|
||||
}
|
||||
|
||||
bool uvm_gpu_address_is_peer(uvm_gpu_t *gpu, uvm_gpu_address_t address)
|
||||
{
|
||||
if (address.is_virtual) {
|
||||
// Virtual addresses can be used for peer copies only if
|
||||
// peer_copy_mode == UVM_GPU_PEER_COPY_MODE_VIRTUAL
|
||||
if (gpu->parent->peer_copy_mode == UVM_GPU_PEER_COPY_MODE_VIRTUAL) {
|
||||
return (address.address >= gpu->parent->peer_va_base &&
|
||||
address.address < (gpu->parent->peer_va_base + gpu->parent->peer_va_size));
|
||||
}
|
||||
} else {
|
||||
uvm_parent_gpu_t *parent_gpu;
|
||||
phys_addr_t phys_addr;
|
||||
|
||||
if (uvm_aperture_is_peer(address.aperture)) {
|
||||
bool is_peer = true;
|
||||
uvm_parent_processor_mask_t parent_gpus;
|
||||
uvm_parent_gpu_t *parent_peer_gpu;
|
||||
|
||||
if (gpu->parent->egm.enabled && address.aperture == gpu->parent->egm.local_peer_id)
|
||||
return false;
|
||||
|
||||
// EGM uses peer IDs but they are different from VIDMEM peer IDs.
|
||||
// Check if the address aperture is an EGM aperture.
|
||||
uvm_parent_gpus_from_processor_mask(&parent_gpus, &gpu->peer_info.peer_gpu_mask);
|
||||
uvm_spin_lock(&gpu->peer_info.peer_gpus_lock);
|
||||
for_each_parent_gpu_in_mask(parent_peer_gpu, &parent_gpus) {
|
||||
uvm_aperture_t egm_peer_aperture;
|
||||
|
||||
if (!parent_peer_gpu->egm.enabled)
|
||||
continue;
|
||||
|
||||
egm_peer_aperture = uvm_gpu_egm_peer_aperture(gpu->parent, parent_peer_gpu);
|
||||
|
||||
if (address.aperture == egm_peer_aperture) {
|
||||
is_peer = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
uvm_spin_unlock(&gpu->peer_info.peer_gpus_lock);
|
||||
return is_peer;
|
||||
}
|
||||
|
||||
if (address.aperture != UVM_APERTURE_SYS)
|
||||
return false;
|
||||
|
||||
// GPU uses DMA addresses, which might be translated by IOMMU/SMMU,
|
||||
// either inline, or via ATS.
|
||||
phys_addr = dma_to_phys(&gpu->parent->pci_dev->dev, (dma_addr_t)address.address);
|
||||
|
||||
// Exposed coherent vidmem can be accessed via sys aperture
|
||||
uvm_spin_lock_irqsave(&g_uvm_global.gpu_table_lock);
|
||||
for_each_parent_gpu(parent_gpu) {
|
||||
if (parent_gpu == gpu->parent)
|
||||
continue;
|
||||
|
||||
if (phys_addr >= parent_gpu->system_bus.memory_window_start &&
|
||||
phys_addr <= parent_gpu->system_bus.memory_window_end) {
|
||||
uvm_spin_unlock_irqrestore(&g_uvm_global.gpu_table_lock);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
uvm_spin_unlock_irqrestore(&g_uvm_global.gpu_table_lock);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
uvm_aperture_t uvm_get_page_tree_location(const uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
// See comment in page_tree_set_location
|
||||
if (uvm_parent_gpu_is_virt_mode_sriov_heavy(parent_gpu))
|
||||
if (uvm_parent_gpu_is_virt_mode_sriov_heavy(parent_gpu) || g_uvm_global.conf_computing_enabled)
|
||||
return UVM_APERTURE_VID;
|
||||
|
||||
if (g_uvm_global.conf_computing_enabled)
|
||||
return UVM_APERTURE_VID;
|
||||
|
||||
return UVM_APERTURE_DEFAULT;
|
||||
}
|
||||
@ -2915,6 +3183,7 @@ static NV_STATUS parent_gpu_add_user_channel_subctx_info(uvm_parent_gpu_t *paren
|
||||
uvm_gpu_phys_address_t instance_ptr = user_channel->instance_ptr.addr;
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_rb_tree_node_t *channel_tree_node;
|
||||
NvU64 node_key;
|
||||
uvm_user_channel_subctx_info_t *channel_subctx_info;
|
||||
uvm_user_channel_subctx_info_t *new_channel_subctx_info = NULL;
|
||||
uvm_gpu_va_space_t *gpu_va_space = user_channel->gpu_va_space;
|
||||
@ -2937,7 +3206,8 @@ static NV_STATUS parent_gpu_add_user_channel_subctx_info(uvm_parent_gpu_t *paren
|
||||
uvm_spin_lock(&parent_gpu->instance_ptr_table_lock);
|
||||
|
||||
// Check if the subcontext information for the channel already exists
|
||||
channel_tree_node = uvm_rb_tree_find(&parent_gpu->tsg_table, user_channel->tsg.id);
|
||||
node_key = ((NvU64)user_channel->hw_runlist_id << 32) | user_channel->tsg.id;
|
||||
channel_tree_node = uvm_rb_tree_find(&parent_gpu->tsg_table, node_key);
|
||||
|
||||
if (!channel_tree_node) {
|
||||
// We could not allocate the descriptor before taking the lock. Exiting
|
||||
@ -2947,7 +3217,7 @@ static NV_STATUS parent_gpu_add_user_channel_subctx_info(uvm_parent_gpu_t *paren
|
||||
}
|
||||
|
||||
// Insert the new subcontext information descriptor
|
||||
new_channel_subctx_info->node.key = user_channel->tsg.id;
|
||||
new_channel_subctx_info->node.key = node_key;
|
||||
status = uvm_rb_tree_insert(&parent_gpu->tsg_table, &new_channel_subctx_info->node);
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
|
||||
@ -3336,9 +3606,6 @@ static NvU64 gpu_addr_to_dma_addr(uvm_parent_gpu_t *parent_gpu, NvU64 gpu_addr)
|
||||
NvU64 dma_addr = gpu_addr;
|
||||
UVM_ASSERT(dma_addr <= dma_addr + parent_gpu->dma_addressable_start);
|
||||
|
||||
if (parent_gpu->npu)
|
||||
dma_addr = nv_expand_nvlink_addr(dma_addr);
|
||||
|
||||
dma_addr += parent_gpu->dma_addressable_start;
|
||||
|
||||
return dma_addr;
|
||||
@ -3353,11 +3620,6 @@ static NvU64 dma_addr_to_gpu_addr(uvm_parent_gpu_t *parent_gpu, NvU64 dma_addr)
|
||||
NvU64 gpu_addr = dma_addr - parent_gpu->dma_addressable_start;
|
||||
UVM_ASSERT(dma_addr >= gpu_addr);
|
||||
|
||||
// See Bug 1920398 for background and details about NVLink DMA address
|
||||
// transformations being applied here.
|
||||
if (parent_gpu->npu)
|
||||
gpu_addr = nv_compress_nvlink_addr(gpu_addr);
|
||||
|
||||
return gpu_addr;
|
||||
}
|
||||
|
||||
|
@ -97,6 +97,11 @@ struct uvm_service_block_context_struct
|
||||
// been serviced
|
||||
uvm_processor_mask_t resident_processors;
|
||||
|
||||
// A mask of GPUs that need to be checked for NVLINK errors before the
|
||||
// handler returns, but after the VA space lock has been unlocked
|
||||
// to avoid RM/UVM VA space lock deadlocks.
|
||||
uvm_processor_mask_t gpus_to_check_for_nvlink_errors;
|
||||
|
||||
// VA block region that contains all the pages affected by the operation
|
||||
uvm_va_block_region_t region;
|
||||
|
||||
@ -192,6 +197,10 @@ typedef struct
|
||||
{
|
||||
struct
|
||||
{
|
||||
// Mask of prefetch faulted pages in a UVM_VA_BLOCK_SIZE aligned region
|
||||
// of a SAM VMA. Used for batching ATS faults in a vma.
|
||||
uvm_page_mask_t prefetch_only_fault_mask;
|
||||
|
||||
// Mask of read faulted pages in a UVM_VA_BLOCK_SIZE aligned region
|
||||
// of a SAM VMA. Used for batching ATS faults in a vma.
|
||||
uvm_page_mask_t read_fault_mask;
|
||||
@ -202,7 +211,7 @@ typedef struct
|
||||
|
||||
// Mask of all faulted pages in a UVM_VA_BLOCK_SIZE aligned region
|
||||
// of a SAM VMA. This is a logical or of read_fault_mask and
|
||||
// write_mask.
|
||||
// write_mask and prefetch_only_fault_mask.
|
||||
uvm_page_mask_t accessed_mask;
|
||||
|
||||
// Mask of successfully serviced pages in a UVM_VA_BLOCK_SIZE
|
||||
@ -269,7 +278,6 @@ typedef struct
|
||||
// Prefetch temporary state.
|
||||
uvm_perf_prefetch_bitmap_tree_t bitmap_tree;
|
||||
} prefetch_state;
|
||||
|
||||
} uvm_ats_fault_context_t;
|
||||
|
||||
struct uvm_fault_service_batch_context_struct
|
||||
@ -701,7 +709,7 @@ struct uvm_gpu_struct
|
||||
// True if the platform supports HW coherence and the GPU's memory
|
||||
// is exposed as a NUMA node to the kernel.
|
||||
bool enabled;
|
||||
unsigned int node_id;
|
||||
int node_id;
|
||||
} numa;
|
||||
|
||||
// Physical address of the start of statically mapped fb memory in BAR1
|
||||
@ -871,6 +879,38 @@ struct uvm_gpu_struct
|
||||
NvBool *error_notifier;
|
||||
} ecc;
|
||||
|
||||
// NVLINK STO recovery handling
|
||||
// In order to trap STO errors as soon as possible the driver has the hw
|
||||
// interrupt register mapped directly. If an STO interrupt is ever noticed
|
||||
// to be pending, then the UVM driver needs to:
|
||||
//
|
||||
// 1) ask RM to service interrupts, and then
|
||||
// 2) inspect the NVLINK error notifier state.
|
||||
//
|
||||
// Notably, checking for channel errors is not enough, because STO errors
|
||||
// can be pending, even after a channel has become idle.
|
||||
//
|
||||
// See more details in uvm_gpu_check_nvlink_error().
|
||||
struct
|
||||
{
|
||||
// Does the GPU have NVLINK STO recovery enabled?
|
||||
bool enabled;
|
||||
|
||||
// Artificially injected error for testing
|
||||
atomic_t injected_error;
|
||||
|
||||
// Direct mapping of the 32-bit part of the hw interrupt tree that has
|
||||
// the NVLINK error bits.
|
||||
volatile NvU32 *hw_interrupt_tree_location;
|
||||
|
||||
// Mask to get the NVLINK error interrupt bits from the 32-bits above.
|
||||
NvU32 mask;
|
||||
|
||||
// Set to true by RM when a fatal NVLINK error is encountered (requires
|
||||
// asking RM to service pending interrupts to be current).
|
||||
NvBool *error_notifier;
|
||||
} nvlink_status;
|
||||
|
||||
struct
|
||||
{
|
||||
NvU32 swizz_id;
|
||||
@ -1001,6 +1041,8 @@ struct uvm_parent_gpu_struct
|
||||
// Whether CE supports physical addressing mode for writes to vidmem
|
||||
bool ce_phys_vidmem_write_supported;
|
||||
|
||||
// Addressing mode(s) supported for CE transfers between this GPU and its
|
||||
// peers: none, physical only, physical and virtual, etc.
|
||||
uvm_gpu_peer_copy_mode_t peer_copy_mode;
|
||||
|
||||
// Virtualization mode of the GPU.
|
||||
@ -1090,6 +1132,15 @@ struct uvm_parent_gpu_struct
|
||||
// Indicates whether the GPU can map sysmem with pages larger than 4k
|
||||
bool can_map_sysmem_with_large_pages;
|
||||
|
||||
struct
|
||||
{
|
||||
// If true, the granularity of key rotation is a single channel. If
|
||||
// false, the key replacement affects all channels on the engine. The
|
||||
// supported granularity is dependent on the number of key slots
|
||||
// available in HW.
|
||||
bool per_channel_key_rotation;
|
||||
} conf_computing;
|
||||
|
||||
// VA base and size of the RM managed part of the internal UVM VA space.
|
||||
//
|
||||
// The internal UVM VA is shared with RM by RM controlling some of the top
|
||||
@ -1102,6 +1153,11 @@ struct uvm_parent_gpu_struct
|
||||
NvU64 rm_va_base;
|
||||
NvU64 rm_va_size;
|
||||
|
||||
// Base and size of the GPU VA space used for peer identity mappings,
|
||||
// it is used only if peer_copy_mode is UVM_GPU_PEER_COPY_MODE_VIRTUAL.
|
||||
NvU64 peer_va_base;
|
||||
NvU64 peer_va_size;
|
||||
|
||||
// Base and size of the GPU VA used for uvm_mem_t allocations mapped in the
|
||||
// internal address_space_tree.
|
||||
NvU64 uvm_mem_va_base;
|
||||
@ -1260,6 +1316,22 @@ struct uvm_parent_gpu_struct
|
||||
unsigned long smmu_prod;
|
||||
unsigned long smmu_cons;
|
||||
} smmu_war;
|
||||
|
||||
struct
|
||||
{
|
||||
// Is EGM support enabled on this GPU.
|
||||
bool enabled;
|
||||
|
||||
// Local EGM peer ID. This ID is used to route EGM memory accesses to
|
||||
// the local CPU socket.
|
||||
NvU8 local_peer_id;
|
||||
|
||||
// EGM base address of the EGM carveout for remote EGM accesses.
|
||||
// The base address is used when computing PTE PA address values for
|
||||
// accesses to the local CPU socket's EGM memory from other peer
|
||||
// GPUs.
|
||||
NvU64 base_address;
|
||||
} egm;
|
||||
};
|
||||
|
||||
static const char *uvm_parent_gpu_name(uvm_parent_gpu_t *parent_gpu)
|
||||
@ -1330,6 +1402,18 @@ typedef struct
|
||||
// peer_id[1] from max(gpu_id_1, gpu_id_2) -> min(gpu_id_1, gpu_id_2)
|
||||
NvU8 peer_ids[2];
|
||||
|
||||
// EGM peer Id associated with this device w.r.t. a peer GPU.
|
||||
// Note: egmPeerId (A -> B) != egmPeerId (B -> A)
|
||||
// egm_peer_id[0] from min(gpu_id_1, gpu_id_2) -> max(gpu_id_1, gpu_id_2)
|
||||
// egm_peer_id[1] from max(gpu_id_1, gpu_id_2) -> min(gpu_id_1, gpu_id_2)
|
||||
//
|
||||
// Unlike VIDMEM peers, EGM peers are not symmetric. This means that if
|
||||
// one of the GPUs is EGM-enabled, it does not automatically mean that
|
||||
// the other is also EGM-enabled. Therefore, an EGM peer Ids are only
|
||||
// valid if the peer GPU is EGM-enabled, i.e. egm_peer_id[0] is valid
|
||||
// iff max(gpu_id_1, gpu_id_2) is EGM-enabled.
|
||||
NvU8 egm_peer_ids[2];
|
||||
|
||||
// The link type between the peer parent GPUs, currently either PCIe or
|
||||
// NVLINK.
|
||||
uvm_gpu_link_type_t link_type;
|
||||
@ -1372,7 +1456,9 @@ void uvm_gpu_exit_va_space(uvm_va_space_t *va_space);
|
||||
|
||||
static unsigned int uvm_gpu_numa_node(uvm_gpu_t *gpu)
|
||||
{
|
||||
UVM_ASSERT(gpu->mem_info.numa.enabled);
|
||||
if (!gpu->mem_info.numa.enabled)
|
||||
UVM_ASSERT(gpu->mem_info.numa.node_id == NUMA_NO_NODE);
|
||||
|
||||
return gpu->mem_info.numa.node_id;
|
||||
}
|
||||
|
||||
@ -1381,6 +1467,7 @@ static uvm_gpu_phys_address_t uvm_gpu_page_to_phys_address(uvm_gpu_t *gpu, struc
|
||||
unsigned long sys_addr = page_to_pfn(page) << PAGE_SHIFT;
|
||||
unsigned long gpu_offset = sys_addr - gpu->parent->system_bus.memory_window_start;
|
||||
|
||||
UVM_ASSERT(gpu->mem_info.numa.enabled);
|
||||
UVM_ASSERT(page_to_nid(page) == uvm_gpu_numa_node(gpu));
|
||||
UVM_ASSERT(sys_addr >= gpu->parent->system_bus.memory_window_start);
|
||||
UVM_ASSERT(sys_addr + PAGE_SIZE - 1 <= gpu->parent->system_bus.memory_window_end);
|
||||
@ -1459,6 +1546,18 @@ uvm_gpu_link_type_t uvm_parent_gpu_peer_link_type(uvm_parent_gpu_t *parent_gpu0,
|
||||
// They must not be the same gpu.
|
||||
uvm_aperture_t uvm_gpu_peer_aperture(uvm_gpu_t *local_gpu, uvm_gpu_t *remote_gpu);
|
||||
|
||||
// Returns the physical address for use by accessing_gpu of a vidmem allocation
|
||||
// on the peer owning_gpu. This address can be used for making PTEs on
|
||||
// accessing_gpu, but not for copying between the two GPUs. For that, use
|
||||
// uvm_gpu_peer_copy_address.
|
||||
uvm_gpu_phys_address_t uvm_gpu_peer_phys_address(uvm_gpu_t *owning_gpu, NvU64 address, uvm_gpu_t *accessing_gpu);
|
||||
|
||||
// Returns the physical or virtual address for use by accessing_gpu to copy to/
|
||||
// from a vidmem allocation on the peer owning_gpu. This may be different from
|
||||
// uvm_gpu_peer_phys_address to handle CE limitations in addressing peer
|
||||
// physical memory directly.
|
||||
uvm_gpu_address_t uvm_gpu_peer_copy_address(uvm_gpu_t *owning_gpu, NvU64 address, uvm_gpu_t *accessing_gpu);
|
||||
|
||||
// Return the reference count for the P2P state between the given GPUs.
|
||||
// The two GPUs must have different parents.
|
||||
NvU64 uvm_gpu_peer_ref_count(const uvm_gpu_t *gpu0, const uvm_gpu_t *gpu1);
|
||||
@ -1467,6 +1566,13 @@ NvU64 uvm_gpu_peer_ref_count(const uvm_gpu_t *gpu0, const uvm_gpu_t *gpu1);
|
||||
// address.
|
||||
uvm_processor_id_t uvm_gpu_get_processor_id_by_address(uvm_gpu_t *gpu, uvm_gpu_phys_address_t addr);
|
||||
|
||||
// Get the EGM aperture for local_gpu to use to map memory resident on the CPU
|
||||
// NUMA node that remote_gpu is attached to.
|
||||
// Note that local_gpu can be equal to remote_gpu when memory is resident in
|
||||
// CPU NUMA node local to local_gpu. In this case, the local EGM peer ID will
|
||||
// be used.
|
||||
uvm_aperture_t uvm_gpu_egm_peer_aperture(uvm_parent_gpu_t *local_gpu, uvm_parent_gpu_t *remote_gpu);
|
||||
|
||||
bool uvm_parent_gpus_are_nvswitch_connected(const uvm_parent_gpu_t *parent_gpu0, const uvm_parent_gpu_t *parent_gpu1);
|
||||
|
||||
static bool uvm_gpus_are_smc_peers(const uvm_gpu_t *gpu0, const uvm_gpu_t *gpu1)
|
||||
@ -1508,8 +1614,8 @@ static uvm_gpu_address_t uvm_parent_gpu_address_virtual_from_sysmem_phys(uvm_par
|
||||
return uvm_gpu_address_virtual(parent_gpu->flat_sysmem_va_base + pa);
|
||||
}
|
||||
|
||||
// Given a GPU or CPU physical address (not peer), retrieve an address suitable
|
||||
// for CE access.
|
||||
// Given a GPU, CPU, or EGM PEER physical address (not VIDMEM peer), retrieve an
|
||||
// address suitable for CE access.
|
||||
static uvm_gpu_address_t uvm_gpu_address_copy(uvm_gpu_t *gpu, uvm_gpu_phys_address_t phys_addr)
|
||||
{
|
||||
UVM_ASSERT(phys_addr.aperture == UVM_APERTURE_VID || phys_addr.aperture == UVM_APERTURE_SYS);
|
||||
@ -1531,6 +1637,12 @@ static uvm_gpu_identity_mapping_t *uvm_gpu_get_peer_mapping(uvm_gpu_t *gpu, uvm_
|
||||
return &gpu->peer_mappings[uvm_id_gpu_index(peer_id)];
|
||||
}
|
||||
|
||||
// Check whether the provided address points to peer memory:
|
||||
// * Physical address using one of the PEER apertures
|
||||
// * Physical address using SYS aperture that belongs to an exposed coherent memory
|
||||
// * Virtual address in the region [peer_va_base, peer_va_base + peer_va_size)
|
||||
bool uvm_gpu_address_is_peer(uvm_gpu_t *gpu, uvm_gpu_address_t address);
|
||||
|
||||
// Check for ECC errors
|
||||
//
|
||||
// Notably this check cannot be performed where it's not safe to call into RM.
|
||||
@ -1543,6 +1655,23 @@ NV_STATUS uvm_gpu_check_ecc_error(uvm_gpu_t *gpu);
|
||||
// and it's required to call uvm_gpu_check_ecc_error() to be sure.
|
||||
NV_STATUS uvm_gpu_check_ecc_error_no_rm(uvm_gpu_t *gpu);
|
||||
|
||||
// Check for NVLINK errors
|
||||
//
|
||||
// Inject NVLINK error
|
||||
NV_STATUS uvm_gpu_inject_nvlink_error(uvm_gpu_t *gpu, UVM_TEST_NVLINK_ERROR_TYPE error_type);
|
||||
|
||||
NV_STATUS uvm_gpu_get_injected_nvlink_error(uvm_gpu_t *gpu);
|
||||
|
||||
// Notably this check cannot be performed where it's not safe to call into RM.
|
||||
NV_STATUS uvm_gpu_check_nvlink_error(uvm_gpu_t *gpu);
|
||||
|
||||
// Check for NVLINK errors without calling into RM
|
||||
//
|
||||
// Calling into RM is problematic in many places, this check is always safe to
|
||||
// do. Returns NV_WARN_MORE_PROCESSING_REQUIRED if there might be an NVLINK error
|
||||
// and it's required to call uvm_gpu_check_nvlink_error() to be sure.
|
||||
NV_STATUS uvm_gpu_check_nvlink_error_no_rm(uvm_gpu_t *gpu);
|
||||
|
||||
// Map size bytes of contiguous sysmem on the GPU for physical access
|
||||
//
|
||||
// size has to be aligned to PAGE_SIZE.
|
||||
|
@ -24,7 +24,6 @@
|
||||
#include "nv_uvm_interface.h"
|
||||
#include "uvm_gpu_access_counters.h"
|
||||
#include "uvm_global.h"
|
||||
#include "uvm_api.h"
|
||||
#include "uvm_gpu.h"
|
||||
#include "uvm_hal.h"
|
||||
#include "uvm_kvmalloc.h"
|
||||
|
@ -612,40 +612,35 @@ static void access_counters_isr_bottom_half_entry(void *args)
|
||||
UVM_ENTRY_VOID(access_counters_isr_bottom_half(args));
|
||||
}
|
||||
|
||||
// When Confidential Computing is enabled, UVM does not (indirectly) trigger
|
||||
// the replayable fault interrupt by updating GET. This is because, in this
|
||||
// configuration, GET is a dummy register used to inform GSP-RM (the owner
|
||||
// of the HW replayable fault buffer) of the latest entry consumed by the
|
||||
// UVM driver. The real GET register is owned by GSP-RM.
|
||||
//
|
||||
// The retriggering of a replayable faults bottom half happens then
|
||||
// manually, by scheduling a bottom half for later if there is any pending
|
||||
// work in the fault buffer accessible by UVM. The retriggering adddresses
|
||||
// two problematic scenarios caused by GET updates not setting any
|
||||
// interrupt:
|
||||
//
|
||||
// (1) UVM didn't process all the entries up to cached PUT
|
||||
//
|
||||
// (2) UVM did process all the entries up to cached PUT, but GSP-RM
|
||||
// added new entries such that cached PUT is out-of-date
|
||||
//
|
||||
// In both cases, re-enablement of interrupts would have caused the
|
||||
// replayable fault to be triggered in a non-CC setup, because the updated
|
||||
// value of GET is different from PUT. But this not the case in Confidential
|
||||
// Computing, so a bottom half needs to be manually scheduled in order to
|
||||
// ensure that all faults are serviced.
|
||||
//
|
||||
// While in the typical case the retriggering happens within a replayable
|
||||
// fault bottom half, it can also happen within a non-interrupt path such as
|
||||
// uvm_gpu_fault_buffer_flush.
|
||||
static void replayable_faults_retrigger_bottom_half(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
bool retrigger = false;
|
||||
|
||||
// When Confidential Computing is enabled, UVM does not (indirectly) trigger
|
||||
// the replayable fault interrupt by updating GET. This is because, in this
|
||||
// configuration, GET is a dummy register used to inform GSP-RM (the owner
|
||||
// of the HW replayable fault buffer) of the latest entry consumed by the
|
||||
// UVM driver. The real GET register is owned by GSP-RM.
|
||||
//
|
||||
// The retriggering of a replayable faults bottom half happens then
|
||||
// manually, by scheduling a bottom half for later if there is any pending
|
||||
// work in the fault buffer accessible by UVM. The retriggering adddresses
|
||||
// two problematic scenarios caused by GET updates not setting any
|
||||
// interrupt:
|
||||
//
|
||||
// (1) UVM didn't process all the entries up to cached PUT
|
||||
//
|
||||
// (2) UVM did process all the entries up to cached PUT, but GSP-RM
|
||||
// added new entries such that cached PUT is out-of-date
|
||||
//
|
||||
// In both cases, re-enablement of interrupts would have caused the
|
||||
// replayable fault to be triggered in a non-CC setup, because the updated
|
||||
// value of GET is different from PUT. But this not the case in Confidential
|
||||
// Computing, so a bottom half needs to be manually scheduled in order to
|
||||
// ensure that all faults are serviced.
|
||||
//
|
||||
// While in the typical case the retriggering happens within a replayable
|
||||
// fault bottom half, it can also happen within a non-interrupt path such as
|
||||
// uvm_gpu_fault_buffer_flush.
|
||||
if (g_uvm_global.conf_computing_enabled)
|
||||
retrigger = true;
|
||||
|
||||
if (!retrigger)
|
||||
if (!g_uvm_global.conf_computing_enabled)
|
||||
return;
|
||||
|
||||
uvm_spin_lock_irqsave(&parent_gpu->isr.interrupts_lock);
|
||||
|
@ -579,6 +579,7 @@ static NV_STATUS service_non_managed_fault(uvm_gpu_va_space_t *gpu_va_space,
|
||||
uvm_fault_access_type_t fault_access_type = fault_entry->fault_access_type;
|
||||
uvm_ats_fault_context_t *ats_context = &non_replayable_faults->ats_context;
|
||||
|
||||
uvm_page_mask_zero(&ats_context->faults.prefetch_only_fault_mask);
|
||||
uvm_page_mask_zero(&ats_context->faults.read_fault_mask);
|
||||
uvm_page_mask_zero(&ats_context->faults.write_fault_mask);
|
||||
uvm_page_mask_zero(&ats_context->faults.accessed_mask);
|
||||
|
@ -636,6 +636,7 @@ static NV_STATUS fault_buffer_flush_locked(uvm_parent_gpu_t *parent_gpu,
|
||||
status = hw_fault_buffer_flush_locked(parent_gpu, HW_FAULT_BUFFER_FLUSH_MODE_DISCARD);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
replayable_faults->cached_put = parent_gpu->fault_buffer_hal->read_put(parent_gpu);
|
||||
}
|
||||
|
||||
@ -898,7 +899,7 @@ static NV_STATUS fetch_fault_buffer_entries(uvm_parent_gpu_t *parent_gpu,
|
||||
// We have some entry to work on. Let's do the rest later.
|
||||
if (fetch_mode == FAULT_FETCH_MODE_BATCH_READY && fault_index > 0)
|
||||
goto done;
|
||||
|
||||
|
||||
status = uvm_global_get_status();
|
||||
if (status != NV_OK)
|
||||
goto done;
|
||||
@ -1715,7 +1716,7 @@ static NV_STATUS service_fault_batch_ats_sub_vma(uvm_gpu_va_space_t *gpu_va_spac
|
||||
|
||||
status = uvm_ats_service_faults(gpu_va_space, vma, base, &batch_context->ats_context);
|
||||
|
||||
// Remove prefetched pages from the serviced mask since fault servicing
|
||||
// Remove SW prefetched pages from the serviced mask since fault servicing
|
||||
// failures belonging to prefetch pages need to be ignored.
|
||||
uvm_page_mask_and(faults_serviced_mask, faults_serviced_mask, accessed_mask);
|
||||
|
||||
@ -1777,6 +1778,7 @@ static void start_new_sub_batch(NvU64 *sub_batch_base,
|
||||
{
|
||||
uvm_page_mask_zero(&ats_context->faults.read_fault_mask);
|
||||
uvm_page_mask_zero(&ats_context->faults.write_fault_mask);
|
||||
uvm_page_mask_zero(&ats_context->faults.prefetch_only_fault_mask);
|
||||
|
||||
*sub_batch_fault_index = fault_index;
|
||||
*sub_batch_base = UVM_VA_BLOCK_ALIGN_DOWN(address);
|
||||
@ -1798,6 +1800,7 @@ static NV_STATUS service_fault_batch_ats_sub(uvm_gpu_va_space_t *gpu_va_space,
|
||||
uvm_ats_fault_context_t *ats_context = &batch_context->ats_context;
|
||||
uvm_page_mask_t *read_fault_mask = &ats_context->faults.read_fault_mask;
|
||||
uvm_page_mask_t *write_fault_mask = &ats_context->faults.write_fault_mask;
|
||||
uvm_page_mask_t *prefetch_only_fault_mask = &ats_context->faults.prefetch_only_fault_mask;
|
||||
uvm_gpu_t *gpu = gpu_va_space->gpu;
|
||||
bool replay_per_va_block =
|
||||
(gpu->parent->fault_buffer_info.replayable.replay_policy == UVM_PERF_FAULT_REPLAY_POLICY_BLOCK);
|
||||
@ -1829,7 +1832,9 @@ static NV_STATUS service_fault_batch_ats_sub(uvm_gpu_va_space_t *gpu_va_space,
|
||||
|
||||
// End of sub-batch. Service faults gathered so far.
|
||||
if (fault_address >= (sub_batch_base + UVM_VA_BLOCK_SIZE)) {
|
||||
UVM_ASSERT(!uvm_page_mask_empty(read_fault_mask) || !uvm_page_mask_empty(write_fault_mask));
|
||||
UVM_ASSERT(!uvm_page_mask_empty(read_fault_mask) ||
|
||||
!uvm_page_mask_empty(write_fault_mask) ||
|
||||
!uvm_page_mask_empty(prefetch_only_fault_mask));
|
||||
|
||||
status = service_fault_batch_ats_sub_vma(gpu_va_space,
|
||||
vma,
|
||||
@ -1846,8 +1851,14 @@ static NV_STATUS service_fault_batch_ats_sub(uvm_gpu_va_space_t *gpu_va_space,
|
||||
|
||||
page_index = (fault_address - sub_batch_base) / PAGE_SIZE;
|
||||
|
||||
if ((access_type <= UVM_FAULT_ACCESS_TYPE_READ) ||
|
||||
uvm_fault_access_type_mask_test(current_entry->access_type_mask, UVM_FAULT_ACCESS_TYPE_READ))
|
||||
// Do not check for coalesced access type. If there are multiple different
|
||||
// accesses to an address, we can disregard the prefetch one.
|
||||
if ((access_type == UVM_FAULT_ACCESS_TYPE_PREFETCH) &&
|
||||
(uvm_fault_access_type_mask_highest(current_entry->access_type_mask) == UVM_FAULT_ACCESS_TYPE_PREFETCH))
|
||||
uvm_page_mask_set(prefetch_only_fault_mask, page_index);
|
||||
|
||||
if ((access_type == UVM_FAULT_ACCESS_TYPE_READ) ||
|
||||
uvm_fault_access_type_mask_test(current_entry->access_type_mask, UVM_FAULT_ACCESS_TYPE_READ))
|
||||
uvm_page_mask_set(read_fault_mask, page_index);
|
||||
|
||||
if (access_type >= UVM_FAULT_ACCESS_TYPE_WRITE)
|
||||
@ -1861,7 +1872,10 @@ static NV_STATUS service_fault_batch_ats_sub(uvm_gpu_va_space_t *gpu_va_space,
|
||||
(previous_entry->va_space == current_entry->va_space));
|
||||
|
||||
// Service the last sub-batch.
|
||||
if ((status == NV_OK) && (!uvm_page_mask_empty(read_fault_mask) || !uvm_page_mask_empty(write_fault_mask))) {
|
||||
if ((status == NV_OK) &&
|
||||
(!uvm_page_mask_empty(read_fault_mask) ||
|
||||
!uvm_page_mask_empty(write_fault_mask) ||
|
||||
!uvm_page_mask_empty(prefetch_only_fault_mask))) {
|
||||
status = service_fault_batch_ats_sub_vma(gpu_va_space,
|
||||
vma,
|
||||
sub_batch_base,
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2023 NVIDIA Corporation
|
||||
Copyright (c) 2015-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -30,13 +30,34 @@
|
||||
|
||||
#define UVM_SEMAPHORE_SIZE 4
|
||||
#define UVM_SEMAPHORE_PAGE_SIZE PAGE_SIZE
|
||||
#define UVM_SEMAPHORE_COUNT_PER_PAGE (PAGE_SIZE / UVM_SEMAPHORE_SIZE)
|
||||
#define UVM_SEMAPHORE_COUNT_PER_PAGE (UVM_SEMAPHORE_PAGE_SIZE / UVM_SEMAPHORE_SIZE)
|
||||
|
||||
// The top nibble of the canary base is intentionally 0. The rest of the value
|
||||
// is arbitrary. See the comments below on make_canary.
|
||||
#define UVM_SEMAPHORE_CANARY_BASE 0x0badc0de
|
||||
#define UVM_SEMAPHORE_CANARY_MASK 0xf0000000
|
||||
|
||||
// In Confidential Computing, the representation of the semaphore payload
|
||||
// requires additional storage (fields), because it is encrypted.
|
||||
//
|
||||
// The payload fields are written by the GPU, and read by the CPU.
|
||||
typedef struct
|
||||
{
|
||||
// The actual (encrypted) payload value.
|
||||
NvU32 encrypted_payload;
|
||||
|
||||
// Plaintext number used to version a {encrypted_payload, auth_tag} pair.
|
||||
// The notifier ensures that the CPU can decrypt a valid snapshot of those
|
||||
// encryption materials.
|
||||
uvm_gpu_semaphore_notifier_t notifier;
|
||||
|
||||
// Padding used to enforce 16-byte alignment of the authentication tag.
|
||||
NvU64 unused;
|
||||
|
||||
// Authentication tag associated with the encrypted payload.
|
||||
NvU8 auth_tag[UVM_CONF_COMPUTING_AUTH_TAG_SIZE];
|
||||
} uvm_gpu_encrypted_semaphore_payload_t;
|
||||
|
||||
struct uvm_gpu_semaphore_pool_struct
|
||||
{
|
||||
// The GPU owning the pool
|
||||
@ -61,14 +82,9 @@ struct uvm_gpu_semaphore_pool_page_struct
|
||||
uvm_rm_mem_t *memory;
|
||||
|
||||
struct {
|
||||
// Unprotected sysmem storing encrypted value of semaphores
|
||||
// Unprotected sysmem storing encrypted value of semaphores, in addition
|
||||
// to other encryption-related data.
|
||||
uvm_rm_mem_t *encrypted_payload_memory;
|
||||
|
||||
// Unprotected sysmem storing encryption auth tags
|
||||
uvm_rm_mem_t *auth_tag_memory;
|
||||
|
||||
// Unprotected sysmem storing plain text notifier values
|
||||
uvm_rm_mem_t *notifier_memory;
|
||||
} conf_computing;
|
||||
|
||||
// Pool the page is part of
|
||||
@ -131,7 +147,6 @@ static bool semaphore_uses_canary(uvm_gpu_semaphore_pool_t *pool)
|
||||
// A pool allocated in the CPR of vidmem cannot be read/written from the
|
||||
// CPU.
|
||||
return !gpu_semaphore_pool_is_secure(pool) && UVM_IS_DEBUG();
|
||||
return UVM_IS_DEBUG();
|
||||
}
|
||||
|
||||
// Can the GPU access the semaphore, i.e., can Host/Esched address the semaphore
|
||||
@ -146,68 +161,49 @@ static void pool_page_free_buffers(uvm_gpu_semaphore_pool_page_t *page)
|
||||
uvm_rm_mem_free(page->memory);
|
||||
page->memory = NULL;
|
||||
|
||||
if (gpu_semaphore_pool_is_secure(page->pool)) {
|
||||
uvm_rm_mem_free(page->conf_computing.encrypted_payload_memory);
|
||||
uvm_rm_mem_free(page->conf_computing.auth_tag_memory);
|
||||
uvm_rm_mem_free(page->conf_computing.notifier_memory);
|
||||
|
||||
page->conf_computing.encrypted_payload_memory = NULL;
|
||||
page->conf_computing.auth_tag_memory = NULL;
|
||||
page->conf_computing.notifier_memory = NULL;
|
||||
}
|
||||
else {
|
||||
if (!gpu_semaphore_pool_is_secure(page->pool))
|
||||
UVM_ASSERT(!page->conf_computing.encrypted_payload_memory);
|
||||
UVM_ASSERT(!page->conf_computing.auth_tag_memory);
|
||||
UVM_ASSERT(!page->conf_computing.notifier_memory);
|
||||
}
|
||||
|
||||
uvm_rm_mem_free(page->conf_computing.encrypted_payload_memory);
|
||||
page->conf_computing.encrypted_payload_memory = NULL;
|
||||
}
|
||||
|
||||
static NV_STATUS pool_page_alloc_buffers(uvm_gpu_semaphore_pool_page_t *page)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_gpu_semaphore_pool_t *pool = page->pool;
|
||||
uvm_gpu_t *gpu = pool->gpu;
|
||||
uvm_rm_mem_type_t memory_type = (pool->aperture == UVM_APERTURE_SYS) ? UVM_RM_MEM_TYPE_SYS : UVM_RM_MEM_TYPE_GPU;
|
||||
size_t align = 0;
|
||||
bool map_all = true;
|
||||
align = gpu_semaphore_pool_is_secure(pool) ? UVM_CONF_COMPUTING_BUF_ALIGNMENT : 0;
|
||||
map_all = gpu_semaphore_pool_is_secure(pool) ? false : true;
|
||||
|
||||
if (map_all)
|
||||
status = uvm_rm_mem_alloc_and_map_all(pool->gpu, memory_type, UVM_SEMAPHORE_PAGE_SIZE, align, &page->memory);
|
||||
else
|
||||
status = uvm_rm_mem_alloc(pool->gpu, memory_type, UVM_SEMAPHORE_PAGE_SIZE, align, &page->memory);
|
||||
|
||||
if (status != NV_OK)
|
||||
goto error;
|
||||
size_t memory_size = UVM_SEMAPHORE_PAGE_SIZE;
|
||||
|
||||
if (!gpu_semaphore_pool_is_secure(pool))
|
||||
return NV_OK;
|
||||
return uvm_rm_mem_alloc_and_map_all(gpu, memory_type, memory_size, 0, &page->memory);
|
||||
|
||||
status = uvm_rm_mem_alloc_and_map_cpu(pool->gpu,
|
||||
status = uvm_rm_mem_alloc(gpu, memory_type, memory_size, UVM_CONF_COMPUTING_BUF_ALIGNMENT, &page->memory);
|
||||
if (status != NV_OK)
|
||||
goto error;
|
||||
|
||||
// TODO: Bug 4607874: This check can be removed once a more general solution
|
||||
// to prevent reordering of CE writes is in place.
|
||||
//
|
||||
// The sysmem allocation backing the page's encrypted payload memory must be
|
||||
// 32-bytes aligned (UVM_CONF_COMPUTING_BUF_ALIGNMENT). If each individual
|
||||
// encrypted payload is 32 bytes, then it never spans more than a single,
|
||||
// naturally aligned, segment of 32 bytes. This is required to prevent
|
||||
// reordering issues that result on failures when decrypting the semaphore's
|
||||
// payload on the CPU.
|
||||
BUILD_BUG_ON(sizeof(uvm_gpu_encrypted_semaphore_payload_t) != UVM_CONF_COMPUTING_BUF_ALIGNMENT);
|
||||
|
||||
BUILD_BUG_ON(offsetof(uvm_gpu_encrypted_semaphore_payload_t, auth_tag) != UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT);
|
||||
|
||||
status = uvm_rm_mem_alloc_and_map_cpu(gpu,
|
||||
UVM_RM_MEM_TYPE_SYS,
|
||||
UVM_SEMAPHORE_PAGE_SIZE,
|
||||
UVM_SEMAPHORE_COUNT_PER_PAGE * sizeof(uvm_gpu_encrypted_semaphore_payload_t),
|
||||
UVM_CONF_COMPUTING_BUF_ALIGNMENT,
|
||||
&page->conf_computing.encrypted_payload_memory);
|
||||
if (status != NV_OK)
|
||||
goto error;
|
||||
|
||||
BUILD_BUG_ON(UVM_CONF_COMPUTING_AUTH_TAG_SIZE % UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT);
|
||||
status = uvm_rm_mem_alloc_and_map_cpu(pool->gpu,
|
||||
UVM_RM_MEM_TYPE_SYS,
|
||||
UVM_SEMAPHORE_COUNT_PER_PAGE * UVM_CONF_COMPUTING_AUTH_TAG_SIZE,
|
||||
UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT,
|
||||
&page->conf_computing.auth_tag_memory);
|
||||
if (status != NV_OK)
|
||||
goto error;
|
||||
|
||||
status = uvm_rm_mem_alloc_and_map_cpu(pool->gpu,
|
||||
UVM_RM_MEM_TYPE_SYS,
|
||||
UVM_SEMAPHORE_COUNT_PER_PAGE * sizeof(NvU32),
|
||||
0,
|
||||
&page->conf_computing.notifier_memory);
|
||||
if (status != NV_OK)
|
||||
goto error;
|
||||
|
||||
return NV_OK;
|
||||
error:
|
||||
pool_page_free_buffers(page);
|
||||
@ -492,51 +488,64 @@ NvU32 *uvm_gpu_semaphore_get_cpu_va(uvm_gpu_semaphore_t *semaphore)
|
||||
return (NvU32*)(base_va + semaphore->index * UVM_SEMAPHORE_SIZE);
|
||||
}
|
||||
|
||||
static uvm_gpu_encrypted_semaphore_payload_t *encrypted_semaphore_payload(uvm_gpu_semaphore_t *semaphore)
|
||||
{
|
||||
uvm_gpu_encrypted_semaphore_payload_t *encrypted_semaphore;
|
||||
|
||||
encrypted_semaphore = uvm_rm_mem_get_cpu_va(semaphore->page->conf_computing.encrypted_payload_memory);
|
||||
|
||||
return encrypted_semaphore + semaphore->index;
|
||||
}
|
||||
|
||||
static NvU64 encrypted_semaphore_payload_gpu_va(uvm_gpu_semaphore_t *semaphore)
|
||||
{
|
||||
uvm_gpu_semaphore_pool_page_t *page = semaphore->page;
|
||||
NvU64 gpu_va_base = uvm_rm_mem_get_gpu_uvm_va(page->conf_computing.encrypted_payload_memory, page->pool->gpu);
|
||||
|
||||
return gpu_va_base + semaphore->index * sizeof(uvm_gpu_encrypted_semaphore_payload_t);
|
||||
}
|
||||
|
||||
NvU32 *uvm_gpu_semaphore_get_encrypted_payload_cpu_va(uvm_gpu_semaphore_t *semaphore)
|
||||
{
|
||||
char *encrypted_base_va = uvm_rm_mem_get_cpu_va(semaphore->page->conf_computing.encrypted_payload_memory);
|
||||
|
||||
return (NvU32*)(encrypted_base_va + semaphore->index * UVM_SEMAPHORE_SIZE);
|
||||
return &encrypted_semaphore_payload(semaphore)->encrypted_payload;
|
||||
}
|
||||
|
||||
uvm_gpu_address_t uvm_gpu_semaphore_get_encrypted_payload_gpu_va(uvm_gpu_semaphore_t *semaphore)
|
||||
{
|
||||
NvU64 encrypted_base_va = uvm_rm_mem_get_gpu_uvm_va(semaphore->page->conf_computing.encrypted_payload_memory,
|
||||
semaphore->page->pool->gpu);
|
||||
size_t offset = offsetof(uvm_gpu_encrypted_semaphore_payload_t, encrypted_payload);
|
||||
NvU64 gpu_va = encrypted_semaphore_payload_gpu_va(semaphore) + offset;
|
||||
|
||||
return uvm_gpu_address_virtual_unprotected(encrypted_base_va + semaphore->index * UVM_SEMAPHORE_SIZE);
|
||||
UVM_ASSERT(IS_ALIGNED(gpu_va, UVM_CONF_COMPUTING_BUF_ALIGNMENT));
|
||||
|
||||
return uvm_gpu_address_virtual_unprotected(gpu_va);
|
||||
}
|
||||
|
||||
uvm_gpu_semaphore_notifier_t *uvm_gpu_semaphore_get_notifier_cpu_va(uvm_gpu_semaphore_t *semaphore)
|
||||
{
|
||||
uvm_gpu_semaphore_notifier_t *notifier_base_va =
|
||||
uvm_rm_mem_get_cpu_va(semaphore->page->conf_computing.notifier_memory);
|
||||
|
||||
return notifier_base_va + semaphore->index;
|
||||
return &encrypted_semaphore_payload(semaphore)->notifier;
|
||||
}
|
||||
|
||||
uvm_gpu_address_t uvm_gpu_semaphore_get_notifier_gpu_va(uvm_gpu_semaphore_t *semaphore)
|
||||
{
|
||||
NvU64 notifier_base_va = uvm_rm_mem_get_gpu_uvm_va(semaphore->page->conf_computing.notifier_memory,
|
||||
semaphore->page->pool->gpu);
|
||||
size_t offset = offsetof(uvm_gpu_encrypted_semaphore_payload_t, notifier);
|
||||
NvU64 gpu_va = encrypted_semaphore_payload_gpu_va(semaphore) + offset;
|
||||
|
||||
return uvm_gpu_address_virtual_unprotected(notifier_base_va +
|
||||
semaphore->index * sizeof(uvm_gpu_semaphore_notifier_t));
|
||||
return uvm_gpu_address_virtual_unprotected(gpu_va);
|
||||
}
|
||||
|
||||
void *uvm_gpu_semaphore_get_auth_tag_cpu_va(uvm_gpu_semaphore_t *semaphore)
|
||||
{
|
||||
char *auth_tag_base_va = uvm_rm_mem_get_cpu_va(semaphore->page->conf_computing.auth_tag_memory);
|
||||
|
||||
return (void*)(auth_tag_base_va + semaphore->index * UVM_CONF_COMPUTING_AUTH_TAG_SIZE);
|
||||
return encrypted_semaphore_payload(semaphore)->auth_tag;
|
||||
}
|
||||
|
||||
uvm_gpu_address_t uvm_gpu_semaphore_get_auth_tag_gpu_va(uvm_gpu_semaphore_t *semaphore)
|
||||
{
|
||||
NvU64 auth_tag_base_va = uvm_rm_mem_get_gpu_uvm_va(semaphore->page->conf_computing.auth_tag_memory,
|
||||
semaphore->page->pool->gpu);
|
||||
size_t offset = offsetof(uvm_gpu_encrypted_semaphore_payload_t, auth_tag);
|
||||
NvU64 gpu_va = encrypted_semaphore_payload_gpu_va(semaphore) + offset;
|
||||
|
||||
return uvm_gpu_address_virtual_unprotected(auth_tag_base_va + semaphore->index * UVM_CONF_COMPUTING_AUTH_TAG_SIZE);
|
||||
UVM_ASSERT(IS_ALIGNED(gpu_va, UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT));
|
||||
|
||||
return uvm_gpu_address_virtual_unprotected(gpu_va);
|
||||
}
|
||||
|
||||
NvU32 uvm_gpu_semaphore_get_payload(uvm_gpu_semaphore_t *semaphore)
|
||||
@ -595,7 +604,7 @@ static bool tracking_semaphore_uses_mutex(uvm_gpu_tracking_semaphore_t *tracking
|
||||
NV_STATUS uvm_gpu_tracking_semaphore_alloc(uvm_gpu_semaphore_pool_t *pool, uvm_gpu_tracking_semaphore_t *tracking_sem)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_lock_order_t order = UVM_LOCK_ORDER_LEAF;
|
||||
uvm_lock_order_t order;
|
||||
|
||||
memset(tracking_sem, 0, sizeof(*tracking_sem));
|
||||
|
||||
@ -607,6 +616,8 @@ NV_STATUS uvm_gpu_tracking_semaphore_alloc(uvm_gpu_semaphore_pool_t *pool, uvm_g
|
||||
|
||||
if (g_uvm_global.conf_computing_enabled)
|
||||
order = UVM_LOCK_ORDER_SECURE_SEMAPHORE;
|
||||
else
|
||||
order = UVM_LOCK_ORDER_LEAF;
|
||||
|
||||
if (tracking_semaphore_uses_mutex(tracking_sem))
|
||||
uvm_mutex_init(&tracking_sem->m_lock, order);
|
||||
|
@ -107,11 +107,12 @@ NV_STATUS uvm_gpu_semaphore_secure_pool_create(uvm_gpu_t *gpu, uvm_gpu_semaphore
|
||||
void uvm_gpu_semaphore_pool_destroy(uvm_gpu_semaphore_pool_t *pool);
|
||||
|
||||
// Allocate a semaphore from the pool.
|
||||
//
|
||||
// The semaphore will be mapped on all GPUs currently registered with the UVM
|
||||
// driver, and on all new GPUs which will be registered in the future.
|
||||
// Unless the Confidential Computing feature is enabled and the pool is a
|
||||
// secure pool. In this case, it is only mapped to the GPU that holds the
|
||||
// allocation.
|
||||
// driver, and on all new GPUs which will be registered in the future. The only
|
||||
// exception (in Confidential Computing) are semaphores allocated from a secure
|
||||
// pool, which are only mapped on the GPU that holds the allocation.
|
||||
//
|
||||
// The mappings are added to UVM's internal address space, and (in SR-IOV heavy)
|
||||
// to the proxy address space.
|
||||
//
|
||||
|
@ -46,6 +46,8 @@
|
||||
#include "clc8b5.h"
|
||||
#include "clc96f.h"
|
||||
#include "clc9b5.h"
|
||||
#include "clca6f.h"
|
||||
#include "clcab5.h"
|
||||
|
||||
static int uvm_downgrade_force_membar_sys = 1;
|
||||
module_param(uvm_downgrade_force_membar_sys, uint, 0644);
|
||||
@ -73,16 +75,17 @@ static uvm_hal_class_ops_t ce_table[] =
|
||||
.semaphore_release = uvm_hal_maxwell_ce_semaphore_release,
|
||||
.semaphore_timestamp = uvm_hal_maxwell_ce_semaphore_timestamp,
|
||||
.semaphore_reduction_inc = uvm_hal_maxwell_ce_semaphore_reduction_inc,
|
||||
.semaphore_target_is_valid = uvm_hal_maxwell_semaphore_target_is_valid,
|
||||
.offset_out = uvm_hal_maxwell_ce_offset_out,
|
||||
.offset_in_out = uvm_hal_maxwell_ce_offset_in_out,
|
||||
.phys_mode = uvm_hal_maxwell_ce_phys_mode,
|
||||
.plc_mode = uvm_hal_maxwell_ce_plc_mode,
|
||||
.memcopy_copy_type = uvm_hal_maxwell_ce_memcopy_copy_type,
|
||||
.memcopy_is_valid = uvm_hal_ce_memcopy_is_valid_stub,
|
||||
.memcopy_is_valid = uvm_hal_maxwell_ce_memcopy_is_valid,
|
||||
.memcopy_patch_src = uvm_hal_ce_memcopy_patch_src_stub,
|
||||
.memcopy = uvm_hal_maxwell_ce_memcopy,
|
||||
.memcopy_v_to_v = uvm_hal_maxwell_ce_memcopy_v_to_v,
|
||||
.memset_is_valid = uvm_hal_ce_memset_is_valid_stub,
|
||||
.memset_is_valid = uvm_hal_maxwell_ce_memset_is_valid,
|
||||
.memset_1 = uvm_hal_maxwell_ce_memset_1,
|
||||
.memset_4 = uvm_hal_maxwell_ce_memset_4,
|
||||
.memset_8 = uvm_hal_maxwell_ce_memset_8,
|
||||
@ -142,9 +145,9 @@ static uvm_hal_class_ops_t ce_table[] =
|
||||
.u.ce_ops = {
|
||||
.method_is_valid = uvm_hal_method_is_valid_stub,
|
||||
.plc_mode = uvm_hal_ampere_ce_plc_mode_c7b5,
|
||||
.memcopy_is_valid = uvm_hal_ce_memcopy_is_valid_stub,
|
||||
.memcopy_is_valid = uvm_hal_maxwell_ce_memcopy_is_valid,
|
||||
.memcopy_patch_src = uvm_hal_ce_memcopy_patch_src_stub,
|
||||
.memset_is_valid = uvm_hal_ce_memset_is_valid_stub,
|
||||
.memset_is_valid = uvm_hal_maxwell_ce_memset_is_valid,
|
||||
},
|
||||
},
|
||||
{
|
||||
@ -171,6 +174,11 @@ static uvm_hal_class_ops_t ce_table[] =
|
||||
.parent_id = HOPPER_DMA_COPY_A,
|
||||
.u.ce_ops = {},
|
||||
},
|
||||
{
|
||||
.id = BLACKWELL_DMA_COPY_B,
|
||||
.parent_id = BLACKWELL_DMA_COPY_A,
|
||||
.u.ce_ops = {},
|
||||
},
|
||||
};
|
||||
|
||||
// Table for GPFIFO functions. Same idea as the copy engine table.
|
||||
@ -185,6 +193,7 @@ static uvm_hal_class_ops_t host_table[] =
|
||||
.sw_method_is_valid = uvm_hal_method_is_valid_stub,
|
||||
.wait_for_idle = uvm_hal_maxwell_host_wait_for_idle,
|
||||
.membar_sys = uvm_hal_maxwell_host_membar_sys,
|
||||
|
||||
// No MEMBAR GPU until Pascal, just do a MEMBAR SYS.
|
||||
.membar_gpu = uvm_hal_maxwell_host_membar_sys,
|
||||
.noop = uvm_hal_maxwell_host_noop,
|
||||
@ -192,6 +201,7 @@ static uvm_hal_class_ops_t host_table[] =
|
||||
.semaphore_acquire = uvm_hal_maxwell_host_semaphore_acquire,
|
||||
.semaphore_release = uvm_hal_maxwell_host_semaphore_release,
|
||||
.semaphore_timestamp = uvm_hal_maxwell_host_semaphore_timestamp,
|
||||
.semaphore_target_is_valid = uvm_hal_maxwell_semaphore_target_is_valid,
|
||||
.set_gpfifo_entry = uvm_hal_maxwell_host_set_gpfifo_entry,
|
||||
.set_gpfifo_noop = uvm_hal_maxwell_host_set_gpfifo_noop,
|
||||
.set_gpfifo_pushbuffer_segment_base = uvm_hal_maxwell_host_set_gpfifo_pushbuffer_segment_base_unsupported,
|
||||
@ -302,6 +312,11 @@ static uvm_hal_class_ops_t host_table[] =
|
||||
.tlb_invalidate_test = uvm_hal_blackwell_host_tlb_invalidate_test,
|
||||
}
|
||||
},
|
||||
{
|
||||
.id = BLACKWELL_CHANNEL_GPFIFO_B,
|
||||
.parent_id = BLACKWELL_CHANNEL_GPFIFO_A,
|
||||
.u.host_ops = {}
|
||||
},
|
||||
};
|
||||
|
||||
static uvm_hal_class_ops_t arch_table[] =
|
||||
@ -383,6 +398,15 @@ static uvm_hal_class_ops_t arch_table[] =
|
||||
.mmu_client_id_to_utlb_id = uvm_hal_blackwell_mmu_client_id_to_utlb_id,
|
||||
}
|
||||
},
|
||||
{
|
||||
.id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GB200,
|
||||
.parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GB100,
|
||||
.u.arch_ops = {
|
||||
// Note that GB20x MMU behaves as Hopper MMU, so it inherits from
|
||||
// Hopper's MMU, not from GB10x.
|
||||
.mmu_mode_hal = uvm_hal_mmu_mode_hopper,
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
static uvm_hal_class_ops_t fault_buffer_table[] =
|
||||
@ -479,6 +503,11 @@ static uvm_hal_class_ops_t fault_buffer_table[] =
|
||||
.get_mmu_engine_type = uvm_hal_blackwell_fault_buffer_get_mmu_engine_type,
|
||||
}
|
||||
},
|
||||
{
|
||||
.id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GB200,
|
||||
.parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GB100,
|
||||
.u.fault_buffer_ops = {}
|
||||
},
|
||||
};
|
||||
|
||||
static uvm_hal_class_ops_t access_counter_buffer_table[] =
|
||||
@ -546,6 +575,11 @@ static uvm_hal_class_ops_t access_counter_buffer_table[] =
|
||||
.parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GH100,
|
||||
.u.access_counter_buffer_ops = {}
|
||||
},
|
||||
{
|
||||
.id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GB200,
|
||||
.parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GB100,
|
||||
.u.access_counter_buffer_ops = {}
|
||||
},
|
||||
};
|
||||
|
||||
static uvm_hal_class_ops_t sec2_table[] =
|
||||
@ -557,6 +591,7 @@ static uvm_hal_class_ops_t sec2_table[] =
|
||||
.decrypt = uvm_hal_maxwell_sec2_decrypt_unsupported,
|
||||
.semaphore_release = uvm_hal_maxwell_sec2_semaphore_release_unsupported,
|
||||
.semaphore_timestamp = uvm_hal_maxwell_sec2_semaphore_timestamp_unsupported,
|
||||
.semaphore_target_is_valid = uvm_hal_maxwell_semaphore_target_is_valid,
|
||||
}
|
||||
},
|
||||
{
|
||||
@ -604,6 +639,11 @@ static uvm_hal_class_ops_t sec2_table[] =
|
||||
.parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GH100,
|
||||
.u.sec2_ops = {}
|
||||
},
|
||||
{
|
||||
.id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GB200,
|
||||
.parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GB100,
|
||||
.u.sec2_ops = {}
|
||||
},
|
||||
};
|
||||
|
||||
static inline uvm_hal_class_ops_t *ops_find_by_id(uvm_hal_class_ops_t *table, NvU32 row_count, NvU32 id)
|
||||
@ -799,16 +839,11 @@ NV_STATUS uvm_hal_init_gpu(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
static void hal_override_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
// Access counters are currently not supported in vGPU.
|
||||
// Access counters are currently not supported in vGPU or Confidential
|
||||
// Computing.
|
||||
//
|
||||
// TODO: Bug 200692962: Add support for access counters in vGPU
|
||||
if (parent_gpu->virt_mode != UVM_VIRT_MODE_NONE) {
|
||||
parent_gpu->access_counters_supported = false;
|
||||
parent_gpu->access_counters_can_use_physical_addresses = false;
|
||||
}
|
||||
|
||||
// Access counters are not supported in Confidential Computing.
|
||||
else if (g_uvm_global.conf_computing_enabled) {
|
||||
if ((parent_gpu->virt_mode != UVM_VIRT_MODE_NONE) || g_uvm_global.conf_computing_enabled) {
|
||||
parent_gpu->access_counters_supported = false;
|
||||
parent_gpu->access_counters_can_use_physical_addresses = false;
|
||||
}
|
||||
@ -1048,16 +1083,6 @@ bool uvm_hal_method_is_valid_stub(uvm_push_t *push, NvU32 method_address, NvU32
|
||||
return true;
|
||||
}
|
||||
|
||||
bool uvm_hal_ce_memcopy_is_valid_stub(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
void uvm_hal_ce_memcopy_patch_src_stub(uvm_push_t *push, uvm_gpu_address_t *src)
|
||||
{
|
||||
}
|
||||
|
||||
bool uvm_hal_ce_memset_is_valid_stub(uvm_push_t *push, uvm_gpu_address_t dst, size_t num_elements, size_t element_size)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
@ -268,6 +268,15 @@ void uvm_hal_maxwell_host_semaphore_acquire(uvm_push_t *push, NvU64 gpu_va, NvU3
|
||||
void uvm_hal_turing_host_semaphore_acquire(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
||||
void uvm_hal_hopper_host_semaphore_acquire(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
||||
|
||||
// Semaphore op validation.
|
||||
// The validation happens at the start of semaphore op (uvm_hal_semaphore_*_t)
|
||||
// execution. This is currently shared for all semaphore operations;
|
||||
// semaphore releases by both CE, SEC2, and esched as well as semaphore
|
||||
// reduction operations, semaphore acquire, and semaphore release
|
||||
// operations with timestamp.
|
||||
typedef bool (*uvm_hal_semaphore_target_is_valid_t)(uvm_push_t *push, NvU64 gpu_va);
|
||||
bool uvm_hal_maxwell_semaphore_target_is_valid(uvm_push_t *push, NvU64 gpu_va);
|
||||
|
||||
typedef void (*uvm_hal_host_set_gpfifo_entry_t)(NvU64 *fifo_entry,
|
||||
NvU64 pushbuffer_va,
|
||||
NvU32 pushbuffer_length,
|
||||
@ -330,10 +339,9 @@ bool uvm_hal_ampere_ce_method_is_valid_c6b5(uvm_push_t *push, NvU32 method_addre
|
||||
|
||||
// Memcopy validation.
|
||||
// The validation happens at the start of the memcopy (uvm_hal_memcopy_t)
|
||||
// execution. Use uvm_hal_ce_memcopy_is_valid_stub to skip the validation for
|
||||
// a given architecture.
|
||||
// execution.
|
||||
typedef bool (*uvm_hal_ce_memcopy_is_valid)(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
|
||||
bool uvm_hal_ce_memcopy_is_valid_stub(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
|
||||
bool uvm_hal_maxwell_ce_memcopy_is_valid(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
|
||||
bool uvm_hal_ampere_ce_memcopy_is_valid_c6b5(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
|
||||
bool uvm_hal_hopper_ce_memcopy_is_valid(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
|
||||
|
||||
@ -358,13 +366,15 @@ void uvm_hal_maxwell_ce_memcopy_v_to_v(uvm_push_t *push, NvU64 dst, NvU64 src, s
|
||||
|
||||
// Memset validation.
|
||||
// The validation happens at the start of the memset (uvm_hal_memset_*_t)
|
||||
// execution. Use uvm_hal_ce_memset_is_valid_stub to skip the validation for
|
||||
// a given architecture.
|
||||
// execution.
|
||||
typedef bool (*uvm_hal_ce_memset_is_valid)(uvm_push_t *push,
|
||||
uvm_gpu_address_t dst,
|
||||
size_t num_elements,
|
||||
size_t element_size);
|
||||
bool uvm_hal_ce_memset_is_valid_stub(uvm_push_t *push, uvm_gpu_address_t dst, size_t num_elements, size_t element_size);
|
||||
bool uvm_hal_maxwell_ce_memset_is_valid(uvm_push_t *push,
|
||||
uvm_gpu_address_t dst,
|
||||
size_t num_elements,
|
||||
size_t element_size);
|
||||
bool uvm_hal_ampere_ce_memset_is_valid_c6b5(uvm_push_t *push,
|
||||
uvm_gpu_address_t dst,
|
||||
size_t num_elements,
|
||||
@ -484,6 +494,7 @@ uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_turing(NvU64 big_page_size);
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_ampere(NvU64 big_page_size);
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_hopper(NvU64 big_page_size);
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_blackwell(NvU64 big_page_size);
|
||||
|
||||
void uvm_hal_maxwell_mmu_enable_prefetch_faults_unsupported(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_hal_maxwell_mmu_disable_prefetch_faults_unsupported(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_hal_pascal_mmu_enable_prefetch_faults(uvm_parent_gpu_t *parent_gpu);
|
||||
@ -566,7 +577,6 @@ NvU8 uvm_hal_volta_fault_buffer_get_ve_id(NvU16 mmu_engine_id, uvm_mmu_engine_ty
|
||||
uvm_mmu_engine_type_t uvm_hal_volta_fault_buffer_get_mmu_engine_type(NvU16 mmu_engine_id,
|
||||
uvm_fault_client_type_t client_type,
|
||||
NvU16 client_id);
|
||||
|
||||
uvm_fault_type_t uvm_hal_volta_fault_buffer_get_fault_type(const NvU32 *fault_entry);
|
||||
|
||||
void uvm_hal_turing_disable_replayable_faults(uvm_parent_gpu_t *parent_gpu);
|
||||
@ -760,6 +770,7 @@ struct uvm_host_hal_struct
|
||||
uvm_hal_semaphore_release_t semaphore_release;
|
||||
uvm_hal_semaphore_acquire_t semaphore_acquire;
|
||||
uvm_hal_semaphore_timestamp_t semaphore_timestamp;
|
||||
uvm_hal_semaphore_target_is_valid_t semaphore_target_is_valid;
|
||||
uvm_hal_host_set_gpfifo_entry_t set_gpfifo_entry;
|
||||
uvm_hal_host_set_gpfifo_noop_t set_gpfifo_noop;
|
||||
uvm_hal_host_set_gpfifo_pushbuffer_segment_base_t set_gpfifo_pushbuffer_segment_base;
|
||||
@ -786,6 +797,7 @@ struct uvm_ce_hal_struct
|
||||
uvm_hal_ce_method_is_valid method_is_valid;
|
||||
uvm_hal_semaphore_release_t semaphore_release;
|
||||
uvm_hal_semaphore_timestamp_t semaphore_timestamp;
|
||||
uvm_hal_semaphore_target_is_valid_t semaphore_target_is_valid;
|
||||
uvm_hal_ce_offset_out_t offset_out;
|
||||
uvm_hal_ce_offset_in_out_t offset_in_out;
|
||||
uvm_hal_ce_phys_mode_t phys_mode;
|
||||
@ -849,6 +861,7 @@ struct uvm_sec2_hal_struct
|
||||
uvm_hal_sec2_decrypt_t decrypt;
|
||||
uvm_hal_semaphore_release_t semaphore_release;
|
||||
uvm_hal_semaphore_timestamp_t semaphore_timestamp;
|
||||
uvm_hal_semaphore_target_is_valid_t semaphore_target_is_valid;
|
||||
};
|
||||
|
||||
typedef struct
|
||||
|
@ -301,7 +301,6 @@ typedef enum
|
||||
UVM_FAULT_TYPE_REGION_VIOLATION,
|
||||
UVM_FAULT_TYPE_POISONED,
|
||||
UVM_FAULT_TYPE_CC_VIOLATION,
|
||||
|
||||
UVM_FAULT_TYPE_COUNT
|
||||
} uvm_fault_type_t;
|
||||
|
||||
|
@ -163,7 +163,7 @@ static uvm_va_block_t *hmm_va_block_from_node(uvm_range_tree_node_t *node)
|
||||
// Copies the contents of the source device-private page to the
|
||||
// destination CPU page. This will invalidate mappings, so cannot be
|
||||
// called while holding any va_block locks.
|
||||
static void hmm_copy_devmem_page(struct page *dst_page, struct page *src_page)
|
||||
static NV_STATUS hmm_copy_devmem_page(struct page *dst_page, struct page *src_page)
|
||||
{
|
||||
uvm_tracker_t tracker = UVM_TRACKER_INIT();
|
||||
uvm_gpu_phys_address_t src_addr;
|
||||
@ -207,7 +207,7 @@ static void hmm_copy_devmem_page(struct page *dst_page, struct page *src_page)
|
||||
uvm_push_end(&push);
|
||||
status = uvm_tracker_add_push_safe(&tracker, &push);
|
||||
if (status == NV_OK)
|
||||
uvm_tracker_wait_deinit(&tracker);
|
||||
status = uvm_tracker_wait_deinit(&tracker);
|
||||
|
||||
out_unmap_cpu:
|
||||
uvm_parent_gpu_unmap_cpu_pages(gpu->parent, dma_addr, PAGE_SIZE);
|
||||
@ -216,12 +216,7 @@ out_unmap_gpu:
|
||||
uvm_mmu_chunk_unmap(gpu_chunk, NULL);
|
||||
|
||||
out:
|
||||
// We can't fail eviction because we need to free the device-private pages
|
||||
// so the GPU can be unregistered. So the best we can do is warn on any
|
||||
// failures and zero the uninitialised page. This could result in data loss
|
||||
// in the application but failures are not expected.
|
||||
if (WARN_ON(status != NV_OK))
|
||||
memzero_page(dst_page, 0, PAGE_SIZE);
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_pmm_gpu_evict_pfn(unsigned long pfn)
|
||||
@ -246,7 +241,12 @@ static NV_STATUS uvm_hmm_pmm_gpu_evict_pfn(unsigned long pfn)
|
||||
|
||||
lock_page(dst_page);
|
||||
|
||||
hmm_copy_devmem_page(dst_page, migrate_pfn_to_page(src_pfn));
|
||||
// We can't fail eviction because we need to free the device-private
|
||||
// pages so the GPU can be unregistered. So the best we can do is warn
|
||||
// on any failures and zero the uninitialized page. This could result
|
||||
// in data loss in the application but failures are not expected.
|
||||
if (hmm_copy_devmem_page(dst_page, migrate_pfn_to_page(src_pfn)) != NV_OK)
|
||||
memzero_page(dst_page, 0, PAGE_SIZE);
|
||||
dst_pfn = migrate_pfn(page_to_pfn(dst_page));
|
||||
migrate_device_pages(&src_pfn, &dst_pfn, 1);
|
||||
}
|
||||
@ -1725,6 +1725,11 @@ static void gpu_chunk_remove(uvm_va_block_t *va_block,
|
||||
return;
|
||||
}
|
||||
|
||||
UVM_ASSERT(gpu_chunk->state == UVM_PMM_GPU_CHUNK_STATE_ALLOCATED);
|
||||
UVM_ASSERT(gpu_chunk->is_referenced);
|
||||
|
||||
uvm_page_mask_clear(&gpu_state->resident, page_index);
|
||||
|
||||
uvm_mmu_chunk_unmap(gpu_chunk, &va_block->tracker);
|
||||
gpu_state->chunks[page_index] = NULL;
|
||||
}
|
||||
@ -2197,7 +2202,11 @@ static NV_STATUS uvm_hmm_devmem_fault_alloc_and_copy(uvm_hmm_devmem_fault_contex
|
||||
|
||||
// Do the copy but don't update the residency or mapping for the new
|
||||
// location yet.
|
||||
return uvm_va_block_service_copy(processor_id, UVM_ID_CPU, va_block, va_block_retry, service_context);
|
||||
status = uvm_va_block_service_copy(processor_id, UVM_ID_CPU, va_block, va_block_retry, service_context);
|
||||
if (status != NV_OK)
|
||||
clean_up_non_migrating_pages(va_block, src_pfns, dst_pfns, service_context->region, page_mask);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_devmem_fault_finalize_and_map(uvm_hmm_devmem_fault_context_t *devmem_fault_context)
|
||||
@ -3483,12 +3492,17 @@ NV_STATUS uvm_hmm_remote_cpu_fault(struct vm_fault *vmf)
|
||||
lock_page(dst_page);
|
||||
dst_pfn = migrate_pfn(page_to_pfn(dst_page));
|
||||
|
||||
hmm_copy_devmem_page(dst_page, src_page);
|
||||
status = hmm_copy_devmem_page(dst_page, src_page);
|
||||
if (status != NV_OK) {
|
||||
unlock_page(dst_page);
|
||||
__free_page(dst_page);
|
||||
dst_pfn = 0;
|
||||
}
|
||||
}
|
||||
|
||||
migrate_vma_pages(&args);
|
||||
|
||||
out:
|
||||
if (status == NV_OK)
|
||||
migrate_vma_pages(&args);
|
||||
migrate_vma_finalize(&args);
|
||||
|
||||
return status;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2020-2023 NVIDIA Corporation
|
||||
Copyright (c) 2020-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -36,12 +36,6 @@ static uvm_gpu_peer_copy_mode_t hopper_peer_copy_mode(uvm_parent_gpu_t *parent_g
|
||||
if (g_uvm_global.conf_computing_enabled)
|
||||
return UVM_GPU_PEER_COPY_MODE_UNSUPPORTED;
|
||||
|
||||
// TODO: Bug 4174553: In some Grace Hopper setups, physical peer copies
|
||||
// result on errors. Force peer copies to use virtual addressing until the
|
||||
// issue is clarified.
|
||||
if (uvm_parent_gpu_is_coherent(parent_gpu))
|
||||
return UVM_GPU_PEER_COPY_MODE_VIRTUAL;
|
||||
|
||||
return g_uvm_global.peer_copy_mode;
|
||||
}
|
||||
|
||||
@ -69,6 +63,9 @@ void uvm_hal_hopper_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
parent_gpu->rm_va_base = 0;
|
||||
parent_gpu->rm_va_size = 64 * UVM_SIZE_1PB;
|
||||
|
||||
parent_gpu->peer_va_base = parent_gpu->rm_va_base + parent_gpu->rm_va_size;
|
||||
parent_gpu->peer_va_size = NV_MAX_DEVICES * UVM_PEER_IDENTITY_VA_SIZE;
|
||||
|
||||
parent_gpu->uvm_mem_va_base = parent_gpu->rm_va_size + 384 * UVM_SIZE_1TB;
|
||||
parent_gpu->uvm_mem_va_size = UVM_MEM_VA_SIZE;
|
||||
|
||||
@ -121,4 +118,9 @@ void uvm_hal_hopper_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
parent_gpu->plc_supported = true;
|
||||
|
||||
parent_gpu->no_ats_range_required = true;
|
||||
|
||||
// In Hopper there are not enough HW key slots available to support
|
||||
// individual channel encryption keys, so channels on the same engine share
|
||||
// the keys.
|
||||
parent_gpu->conf_computing.per_channel_key_rotation = false;
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2020-2023 NVIDIA Corporation
|
||||
Copyright (c) 2020-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -91,6 +91,11 @@ void uvm_hal_hopper_ce_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 p
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||||
NvU32 launch_dma_plc_mode;
|
||||
|
||||
UVM_ASSERT_MSG(gpu->parent->ce_hal->semaphore_target_is_valid(push, gpu_va),
|
||||
"Semaphore target validation failed in channel %s, GPU %s.\n",
|
||||
push->channel ? push->channel->name : "'fake'",
|
||||
uvm_gpu_name(gpu));
|
||||
|
||||
NV_PUSH_3U(C8B5, SET_SEMAPHORE_A, HWVALUE(C8B5, SET_SEMAPHORE_A, UPPER, NvOffset_HI32(gpu_va)),
|
||||
SET_SEMAPHORE_B, HWVALUE(C8B5, SET_SEMAPHORE_B, LOWER, NvOffset_LO32(gpu_va)),
|
||||
SET_SEMAPHORE_PAYLOAD, payload);
|
||||
@ -109,6 +114,11 @@ void uvm_hal_hopper_ce_semaphore_reduction_inc(uvm_push_t *push, NvU64 gpu_va, N
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||||
NvU32 launch_dma_plc_mode;
|
||||
|
||||
UVM_ASSERT_MSG(gpu->parent->ce_hal->semaphore_target_is_valid(push, gpu_va),
|
||||
"Semaphore target validation failed in channel %s, GPU %s.\n",
|
||||
push->channel ? push->channel->name : "'fake'",
|
||||
uvm_gpu_name(gpu));
|
||||
|
||||
NV_PUSH_3U(C8B5, SET_SEMAPHORE_A, HWVALUE(C8B5, SET_SEMAPHORE_A, UPPER, NvOffset_HI32(gpu_va)),
|
||||
SET_SEMAPHORE_B, HWVALUE(C8B5, SET_SEMAPHORE_B, LOWER, NvOffset_LO32(gpu_va)),
|
||||
SET_SEMAPHORE_PAYLOAD, payload);
|
||||
@ -127,14 +137,18 @@ void uvm_hal_hopper_ce_semaphore_reduction_inc(uvm_push_t *push, NvU64 gpu_va, N
|
||||
|
||||
void uvm_hal_hopper_ce_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va)
|
||||
{
|
||||
uvm_gpu_t *gpu;
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||||
NvU32 launch_dma_plc_mode;
|
||||
|
||||
UVM_ASSERT_MSG(gpu->parent->ce_hal->semaphore_target_is_valid(push, gpu_va),
|
||||
"Semaphore target validation failed in channel %s, GPU %s.\n",
|
||||
push->channel ? push->channel->name : "'fake'",
|
||||
uvm_gpu_name(gpu));
|
||||
|
||||
NV_PUSH_3U(C8B5, SET_SEMAPHORE_A, HWVALUE(C8B5, SET_SEMAPHORE_A, UPPER, NvOffset_HI32(gpu_va)),
|
||||
SET_SEMAPHORE_B, HWVALUE(C8B5, SET_SEMAPHORE_B, LOWER, NvOffset_LO32(gpu_va)),
|
||||
SET_SEMAPHORE_PAYLOAD, 0xdeadbeef);
|
||||
|
||||
gpu = uvm_push_get_gpu(push);
|
||||
launch_dma_plc_mode = gpu->parent->ce_hal->plc_mode();
|
||||
|
||||
NV_PUSH_1U(C8B5, LAUNCH_DMA, hopper_get_flush_value(push) |
|
||||
@ -186,6 +200,7 @@ static NvU32 hopper_memset_copy_type(uvm_gpu_address_t dst)
|
||||
{
|
||||
if (g_uvm_global.conf_computing_enabled && dst.is_unprotected)
|
||||
return HWCONST(C8B5, LAUNCH_DMA, COPY_TYPE, NONPROT2NONPROT);
|
||||
|
||||
return HWCONST(C8B5, LAUNCH_DMA, COPY_TYPE, DEFAULT);
|
||||
}
|
||||
|
||||
@ -345,14 +360,19 @@ bool uvm_hal_hopper_ce_memset_is_valid(uvm_push_t *push,
|
||||
{
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||||
|
||||
// In HCC, if a memset uses physical addressing for the destination, then
|
||||
// it must write to (protected) vidmem. If the memset uses virtual
|
||||
// addressing, and the backing storage is not vidmem, the access is only
|
||||
// legal if the copy type is NONPROT2NONPROT, and the destination is
|
||||
// In Confidential Computing, if a memset uses physical addressing for the
|
||||
// destination, then it must write to (protected) vidmem. If the memset uses
|
||||
// virtual addressing, and the backing storage is not vidmem, the access is
|
||||
// only legal if the copy type is NONPROT2NONPROT, and the destination is
|
||||
// unprotected sysmem, but the validation does not detect it.
|
||||
if (uvm_conf_computing_mode_is_hcc(gpu) && !dst.is_virtual && dst.aperture != UVM_APERTURE_VID)
|
||||
if (g_uvm_global.conf_computing_enabled && !dst.is_virtual && dst.aperture != UVM_APERTURE_VID)
|
||||
return false;
|
||||
|
||||
if (uvm_gpu_address_is_peer(gpu, dst)) {
|
||||
UVM_ERR_PRINT("Memset to peer address (0x%llx) is not allowed!", dst.address);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!gpu->parent->ce_phys_vidmem_write_supported) {
|
||||
size_t size = num_elements * element_size;
|
||||
uvm_gpu_address_t temp = dst;
|
||||
@ -373,14 +393,22 @@ bool uvm_hal_hopper_ce_memset_is_valid(uvm_push_t *push,
|
||||
bool uvm_hal_hopper_ce_memcopy_is_valid(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src)
|
||||
{
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||||
const bool peer_copy = uvm_gpu_address_is_peer(gpu, dst) || uvm_gpu_address_is_peer(gpu, src);
|
||||
|
||||
if (uvm_conf_computing_mode_is_hcc(gpu)) {
|
||||
// In HCC, if a memcopy uses physical addressing for either the
|
||||
// destination or the source, then the corresponding aperture must be
|
||||
// vidmem. If virtual addressing is used, and the backing storage is
|
||||
// sysmem the access is only legal if the copy type is NONPROT2NONPROT,
|
||||
// but the validation does not detect it. In other words the copy
|
||||
// source and destination is unprotected sysmem.
|
||||
if (push->channel && peer_copy && !uvm_channel_is_p2p(push->channel)) {
|
||||
UVM_ERR_PRINT("Peer copy from address (0x%llx) to address (0x%llx) should use designated p2p channels!",
|
||||
src.address,
|
||||
dst.address);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (g_uvm_global.conf_computing_enabled) {
|
||||
// In Confidential Computing, if a memcopy uses physical addressing for
|
||||
// either the destination or the source, then the corresponding aperture
|
||||
// must be vidmem. If virtual addressing is used, and the backing
|
||||
// storage is sysmem the access is only legal if the copy type is
|
||||
// NONPROT2NONPROT, but the validation does not detect it. In other
|
||||
// words the copy source and destination is unprotected sysmem.
|
||||
if (!src.is_virtual && (src.aperture != UVM_APERTURE_VID))
|
||||
return false;
|
||||
|
||||
@ -490,9 +518,8 @@ void uvm_hal_hopper_ce_encrypt(uvm_push_t *push,
|
||||
NvU32 auth_tag_address_hi32, auth_tag_address_lo32;
|
||||
NvU64 iv_address;
|
||||
NvU32 iv_address_hi32, iv_address_lo32;
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||||
|
||||
UVM_ASSERT(uvm_conf_computing_mode_is_hcc(gpu));
|
||||
UVM_ASSERT(g_uvm_global.conf_computing_enabled);
|
||||
UVM_ASSERT(IS_ALIGNED(auth_tag.address, UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT));
|
||||
|
||||
if (!src.is_virtual)
|
||||
@ -537,9 +564,8 @@ void uvm_hal_hopper_ce_decrypt(uvm_push_t *push,
|
||||
{
|
||||
|
||||
NvU32 auth_tag_address_hi32, auth_tag_address_lo32;
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||||
|
||||
UVM_ASSERT(uvm_conf_computing_mode_is_hcc(gpu));
|
||||
UVM_ASSERT(g_uvm_global.conf_computing_enabled);
|
||||
UVM_ASSERT(IS_ALIGNED(auth_tag.address, UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT));
|
||||
|
||||
// The addressing mode (and aperture, if applicable) of the source and
|
||||
@ -565,4 +591,3 @@ void uvm_hal_hopper_ce_decrypt(uvm_push_t *push,
|
||||
|
||||
encrypt_or_decrypt(push, dst, src, size);
|
||||
}
|
||||
|
||||
|
@ -31,6 +31,12 @@
|
||||
void uvm_hal_hopper_host_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload)
|
||||
{
|
||||
NvU32 sem_lo;
|
||||
|
||||
UVM_ASSERT_MSG(uvm_push_get_gpu(push)->parent->host_hal->semaphore_target_is_valid(push, gpu_va),
|
||||
"Semaphore target validation failed in channel %s, GPU %s.\n",
|
||||
push->channel ? push->channel->name : "fake",
|
||||
uvm_gpu_name(uvm_push_get_gpu(push)));
|
||||
|
||||
UVM_ASSERT(!(NvOffset_LO32(gpu_va) & ~HWSHIFTMASK(C86F, SEM_ADDR_LO, OFFSET)));
|
||||
sem_lo = READ_HWVALUE(NvOffset_LO32(gpu_va), C86F, SEM_ADDR_LO, OFFSET);
|
||||
|
||||
@ -49,6 +55,12 @@ void uvm_hal_hopper_host_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32
|
||||
void uvm_hal_hopper_host_semaphore_acquire(uvm_push_t *push, NvU64 gpu_va, NvU32 payload)
|
||||
{
|
||||
NvU32 sem_lo;
|
||||
|
||||
UVM_ASSERT_MSG(uvm_push_get_gpu(push)->parent->host_hal->semaphore_target_is_valid(push, gpu_va),
|
||||
"Semaphore target validation failed in channel %s, GPU %s.\n",
|
||||
push->channel ? push->channel->name : "fake",
|
||||
uvm_gpu_name(uvm_push_get_gpu(push)));
|
||||
|
||||
UVM_ASSERT(!(NvOffset_LO32(gpu_va) & ~HWSHIFTMASK(C86F, SEM_ADDR_LO, OFFSET)));
|
||||
sem_lo = READ_HWVALUE(NvOffset_LO32(gpu_va), C86F, SEM_ADDR_LO, OFFSET);
|
||||
NV_PUSH_5U(C86F, SEM_ADDR_LO, HWVALUE(C86F, SEM_ADDR_LO, OFFSET, sem_lo),
|
||||
@ -63,6 +75,12 @@ void uvm_hal_hopper_host_semaphore_acquire(uvm_push_t *push, NvU64 gpu_va, NvU32
|
||||
void uvm_hal_hopper_host_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va)
|
||||
{
|
||||
NvU32 sem_lo;
|
||||
|
||||
UVM_ASSERT_MSG(uvm_push_get_gpu(push)->parent->host_hal->semaphore_target_is_valid(push, gpu_va),
|
||||
"Semaphore target validation failed in channel %s, GPU %s.\n",
|
||||
push->channel ? push->channel->name : "fake",
|
||||
uvm_gpu_name(uvm_push_get_gpu(push)));
|
||||
|
||||
UVM_ASSERT(!(NvOffset_LO32(gpu_va) & ~HWSHIFTMASK(C86F, SEM_ADDR_LO, OFFSET)));
|
||||
sem_lo = READ_HWVALUE(NvOffset_LO32(gpu_va), C86F, SEM_ADDR_LO, OFFSET);
|
||||
|
||||
|
@ -54,6 +54,10 @@ static NvU32 page_table_depth_hopper(NvU64 page_size)
|
||||
return 4;
|
||||
else if (page_size == UVM_PAGE_SIZE_512M)
|
||||
return 3;
|
||||
|
||||
UVM_ASSERT((page_size == UVM_PAGE_SIZE_4K) || (page_size == UVM_PAGE_SIZE_64K) ||
|
||||
(page_size == UVM_PAGE_SIZE_DEFAULT));
|
||||
|
||||
return 5;
|
||||
}
|
||||
|
||||
|
@ -93,6 +93,11 @@ void uvm_hal_hopper_sec2_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32
|
||||
uvm_gpu_address_t sign_auth_tag_gpu_va;
|
||||
NvU32 *csl_sign_init = push->next;
|
||||
|
||||
UVM_ASSERT_MSG(uvm_push_get_gpu(push)->parent->sec2_hal->semaphore_target_is_valid(push, gpu_va),
|
||||
"Semaphore target validation failed in channel %s, GPU %s.\n",
|
||||
push->channel->name,
|
||||
uvm_gpu_name(uvm_push_get_gpu(push)));
|
||||
|
||||
UVM_ASSERT(IS_ALIGNED(NvU64_LO32(gpu_va), 1 << HWSHIFT(CBA2, SEMAPHORE_B, LOWER)));
|
||||
|
||||
sem_lo = READ_HWVALUE(NvU64_LO32(gpu_va), CBA2, SEMAPHORE_B, LOWER);
|
||||
|
@ -54,3 +54,30 @@ void uvm_memcg_context_end(uvm_memcg_context_t *context)
|
||||
mem_cgroup_put(context->new_memcg);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if !UVM_FOR_EACH_SGTABLE_DMA_PAGE_PRESENT()
|
||||
static int sg_dma_page_count(struct scatterlist *sg)
|
||||
{
|
||||
return PAGE_ALIGN(sg->offset + sg_dma_len(sg)) >> PAGE_SHIFT;
|
||||
}
|
||||
|
||||
bool __sg_page_iter_dma_next(struct sg_dma_page_iter *dma_iter)
|
||||
{
|
||||
struct sg_page_iter *piter = &dma_iter->base;
|
||||
|
||||
if (!piter->__nents || !piter->sg)
|
||||
return false;
|
||||
|
||||
piter->sg_pgoffset += piter->__pg_advance;
|
||||
piter->__pg_advance = 1;
|
||||
|
||||
while (piter->sg_pgoffset >= sg_dma_page_count(piter->sg)) {
|
||||
piter->sg_pgoffset -= sg_dma_page_count(piter->sg);
|
||||
piter->sg = sg_next(piter->sg);
|
||||
if (!--piter->__nents || !piter->sg)
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
@ -84,9 +84,19 @@
|
||||
#include <linux/sched/task_stack.h>
|
||||
#endif
|
||||
|
||||
#if !defined(NV_SG_DMA_PAGE_ITER_PRESENT)
|
||||
#include <linux/scatterlist.h>
|
||||
#endif
|
||||
|
||||
#include <linux/cpumask.h>
|
||||
#include <linux/topology.h>
|
||||
|
||||
#if defined(NV_LINUX_DMA_DIRECT_H_PRESENT)
|
||||
#include <linux/dma-direct.h>
|
||||
#else
|
||||
#include <asm/dma-mapping.h>
|
||||
#endif
|
||||
|
||||
#include "nv-kthread-q.h"
|
||||
|
||||
#if defined(NV_CPUMASK_OF_NODE_PRESENT)
|
||||
@ -382,4 +392,37 @@ static inline pgprot_t uvm_pgprot_decrypted(pgprot_t prot)
|
||||
return prot;
|
||||
}
|
||||
|
||||
#if !defined(NV_SG_DMA_PAGE_ITER_PRESENT)
|
||||
// Added by commit d901b2760dc6c ("lib/scatterlist: Provide a DMA page
|
||||
// iterator") v5.0
|
||||
struct sg_dma_page_iter {
|
||||
struct sg_page_iter base;
|
||||
};
|
||||
|
||||
#define uvm_sg_page_iter_dma_address(dma_iter) \
|
||||
sg_page_iter_dma_address(&((dma_iter)->base))
|
||||
#else
|
||||
#define uvm_sg_page_iter_dma_address(dma_iter) \
|
||||
sg_page_iter_dma_address((dma_iter))
|
||||
#endif
|
||||
|
||||
#if !defined(NV_FOR_EACH_SGTABLE_DMA_PAGE_PRESENT)
|
||||
// Added by commit 709d6d73c756 ("scatterlist: add generic wrappers for
|
||||
// iterating over sgtable objects") v5.7.
|
||||
#define UVM_FOR_EACH_SGTABLE_DMA_PAGE_PRESENT() 0
|
||||
|
||||
static int sg_dma_page_count(struct scatterlist *sg);
|
||||
bool __sg_page_iter_dma_next(struct sg_dma_page_iter *dma_iter);
|
||||
|
||||
#define for_each_sg_dma_page(sglist, dma_iter, dma_nents, pgoffset) \
|
||||
for (__sg_page_iter_start(&(dma_iter)->base, sglist, dma_nents, \
|
||||
pgoffset); \
|
||||
__sg_page_iter_dma_next(dma_iter);)
|
||||
|
||||
#define for_each_sgtable_dma_page(sgt, dma_iter, pgoffset) \
|
||||
for_each_sg_dma_page((sgt)->sgl, dma_iter, (sgt)->nents, pgoffset)
|
||||
#else
|
||||
#define UVM_FOR_EACH_SGTABLE_DMA_PAGE_PRESENT() 1
|
||||
#endif
|
||||
|
||||
#endif // _UVM_LINUX_H
|
||||
|
@ -523,6 +523,7 @@ typedef enum
|
||||
// This lock order can be removed after RM no longer relies on RPC event
|
||||
// notifications.
|
||||
UVM_LOCK_ORDER_CSL_CTX,
|
||||
|
||||
UVM_LOCK_ORDER_LEAF,
|
||||
UVM_LOCK_ORDER_COUNT,
|
||||
} uvm_lock_order_t;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2016-2023 NVIDIA Corporation
|
||||
Copyright (c) 2016-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -35,6 +35,9 @@ void uvm_hal_maxwell_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
parent_gpu->rm_va_base = 0;
|
||||
parent_gpu->rm_va_size = 128 * UVM_SIZE_1GB;
|
||||
|
||||
parent_gpu->peer_va_base = 0;
|
||||
parent_gpu->peer_va_size = 0;
|
||||
|
||||
parent_gpu->uvm_mem_va_base = 768 * UVM_SIZE_1GB;
|
||||
parent_gpu->uvm_mem_va_size = UVM_MEM_VA_SIZE;
|
||||
|
||||
@ -75,4 +78,6 @@ void uvm_hal_maxwell_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
parent_gpu->plc_supported = false;
|
||||
|
||||
parent_gpu->no_ats_range_required = false;
|
||||
|
||||
parent_gpu->conf_computing.per_channel_key_rotation = false;
|
||||
}
|
||||
|
@ -50,11 +50,26 @@ void uvm_hal_maxwell_ce_offset_in_out(uvm_push_t *push, NvU64 offset_in, NvU64 o
|
||||
OFFSET_OUT_LOWER, HWVALUE(B0B5, OFFSET_OUT_LOWER, VALUE, NvOffset_LO32(offset_out)));
|
||||
}
|
||||
|
||||
bool uvm_hal_maxwell_semaphore_target_is_valid(uvm_push_t *push, NvU64 gpu_va)
|
||||
{
|
||||
if (uvm_gpu_address_is_peer(uvm_push_get_gpu(push), uvm_gpu_address_virtual(gpu_va))) {
|
||||
UVM_ERR_PRINT("Semaphore operation targetting peer addresses is not allowed!");
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void uvm_hal_maxwell_ce_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload)
|
||||
{
|
||||
NvU32 flush_value;
|
||||
bool use_flush;
|
||||
|
||||
UVM_ASSERT_MSG(uvm_push_get_gpu(push)->parent->ce_hal->semaphore_target_is_valid(push, gpu_va),
|
||||
"Semaphore target validation failed in channel %s, GPU %s.\n",
|
||||
push->channel->name,
|
||||
uvm_gpu_name(uvm_push_get_gpu(push)));
|
||||
|
||||
use_flush = uvm_hal_membar_before_semaphore(push);
|
||||
|
||||
if (use_flush)
|
||||
@ -76,6 +91,11 @@ void uvm_hal_maxwell_ce_semaphore_reduction_inc(uvm_push_t *push, NvU64 gpu_va,
|
||||
NvU32 flush_value;
|
||||
bool use_flush;
|
||||
|
||||
UVM_ASSERT_MSG(uvm_push_get_gpu(push)->parent->ce_hal->semaphore_target_is_valid(push, gpu_va),
|
||||
"Semaphore target validation failed in channel %s, GPU %s.\n",
|
||||
push->channel->name,
|
||||
uvm_gpu_name(uvm_push_get_gpu(push)));
|
||||
|
||||
use_flush = uvm_hal_membar_before_semaphore(push);
|
||||
|
||||
if (use_flush)
|
||||
@ -100,6 +120,11 @@ void uvm_hal_maxwell_ce_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va)
|
||||
NvU32 flush_value;
|
||||
bool use_flush;
|
||||
|
||||
UVM_ASSERT_MSG(uvm_push_get_gpu(push)->parent->ce_hal->semaphore_target_is_valid(push, gpu_va),
|
||||
"Semaphore target validation failed in channel %s, GPU %s.\n",
|
||||
push->channel->name,
|
||||
uvm_gpu_name(uvm_push_get_gpu(push)));
|
||||
|
||||
use_flush = uvm_hal_membar_before_semaphore(push);
|
||||
|
||||
if (use_flush)
|
||||
@ -185,6 +210,34 @@ NvU32 uvm_hal_maxwell_ce_plc_mode(void)
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool uvm_hal_maxwell_ce_memset_is_valid(uvm_push_t *push,
|
||||
uvm_gpu_address_t dst,
|
||||
size_t num_elements,
|
||||
size_t element_size)
|
||||
{
|
||||
if (uvm_gpu_address_is_peer(uvm_push_get_gpu(push), dst)) {
|
||||
UVM_ERR_PRINT("Memset to peer address (0x%llx) is not allowed!", dst.address);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool uvm_hal_maxwell_ce_memcopy_is_valid(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src)
|
||||
{
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||||
const bool peer_copy = uvm_gpu_address_is_peer(gpu, dst) || uvm_gpu_address_is_peer(gpu, src);
|
||||
|
||||
if (push->channel && peer_copy && !uvm_channel_is_p2p(push->channel)) {
|
||||
UVM_ERR_PRINT("Peer copy from address (0x%llx) to address (0x%llx) should use designated p2p channels!",
|
||||
src.address,
|
||||
dst.address);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Noop, since COPY_TYPE doesn't exist in Maxwell.
|
||||
NvU32 uvm_hal_maxwell_ce_memcopy_copy_type(uvm_gpu_address_t dst, uvm_gpu_address_t src)
|
||||
{
|
||||
@ -208,6 +261,12 @@ void uvm_hal_maxwell_ce_memcopy(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu
|
||||
push->channel->name,
|
||||
uvm_gpu_name(gpu));
|
||||
|
||||
// Check if the copy is over NVLINK and simulate dropped traffic if there's
|
||||
// an NVLINK error.
|
||||
// Src address cannot be peer as that wouldn't pass the valid check above.
|
||||
if (uvm_gpu_address_is_peer(gpu, dst) && uvm_gpu_get_injected_nvlink_error(gpu) != NV_OK)
|
||||
size = 0;
|
||||
|
||||
gpu->parent->ce_hal->memcopy_patch_src(push, &src);
|
||||
|
||||
launch_dma_src_dst_type = gpu->parent->ce_hal->phys_mode(push, dst, src);
|
||||
|
@ -175,6 +175,12 @@ void uvm_hal_maxwell_host_interrupt(uvm_push_t *push)
|
||||
void uvm_hal_maxwell_host_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload)
|
||||
{
|
||||
NvU32 sem_lo;
|
||||
|
||||
UVM_ASSERT_MSG(uvm_push_get_gpu(push)->parent->host_hal->semaphore_target_is_valid(push, gpu_va),
|
||||
"Semaphore target validation failed in channel %s, GPU %s.\n",
|
||||
push->channel->name,
|
||||
uvm_gpu_name(uvm_push_get_gpu(push)));
|
||||
|
||||
UVM_ASSERT(!(NvOffset_LO32(gpu_va) & ~HWSHIFTMASK(A16F, SEMAPHOREB, OFFSET_LOWER)));
|
||||
sem_lo = READ_HWVALUE(NvOffset_LO32(gpu_va), A16F, SEMAPHOREB, OFFSET_LOWER);
|
||||
|
||||
@ -191,6 +197,12 @@ void uvm_hal_maxwell_host_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU3
|
||||
void uvm_hal_maxwell_host_semaphore_acquire(uvm_push_t *push, NvU64 gpu_va, NvU32 payload)
|
||||
{
|
||||
NvU32 sem_lo;
|
||||
|
||||
UVM_ASSERT_MSG(uvm_push_get_gpu(push)->parent->host_hal->semaphore_target_is_valid(push, gpu_va),
|
||||
"Semaphore target validation failed in channel %s, GPU %s.\n",
|
||||
push->channel->name,
|
||||
uvm_gpu_name(uvm_push_get_gpu(push)));
|
||||
|
||||
UVM_ASSERT(!(NvOffset_LO32(gpu_va) & ~HWSHIFTMASK(A16F, SEMAPHOREB, OFFSET_LOWER)));
|
||||
sem_lo = READ_HWVALUE(NvOffset_LO32(gpu_va), A16F, SEMAPHOREB, OFFSET_LOWER);
|
||||
NV_PUSH_4U(A16F, SEMAPHOREA, HWVALUE(A16F, SEMAPHOREA, OFFSET_UPPER, NvOffset_HI32(gpu_va)),
|
||||
@ -204,6 +216,12 @@ void uvm_hal_maxwell_host_semaphore_acquire(uvm_push_t *push, NvU64 gpu_va, NvU3
|
||||
void uvm_hal_maxwell_host_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va)
|
||||
{
|
||||
NvU32 sem_lo;
|
||||
|
||||
UVM_ASSERT_MSG(uvm_push_get_gpu(push)->parent->host_hal->semaphore_target_is_valid(push, gpu_va),
|
||||
"Semaphore target validation failed in channel %s, GPU %s.\n",
|
||||
push->channel->name,
|
||||
uvm_gpu_name(uvm_push_get_gpu(push)));
|
||||
|
||||
UVM_ASSERT(!(NvOffset_LO32(gpu_va) & ~HWSHIFTMASK(A16F, SEMAPHOREB, OFFSET_LOWER)));
|
||||
sem_lo = READ_HWVALUE(NvOffset_LO32(gpu_va), A16F, SEMAPHOREB, OFFSET_LOWER);
|
||||
|
||||
|
@ -451,9 +451,11 @@ static gfp_t sysmem_allocation_gfp_flags(int order, bool zero)
|
||||
return gfp_flags;
|
||||
}
|
||||
|
||||
|
||||
// This allocation is a non-protected memory allocation under Confidential
|
||||
// Computing.
|
||||
//
|
||||
//
|
||||
// There is a tighter coupling between allocation and mapping because of the
|
||||
// allocator UVM must use. Hence, this function does the equivalent of
|
||||
// uvm_mem_map_gpu_phys().
|
||||
@ -708,7 +710,7 @@ static NV_STATUS mem_map_cpu_to_sysmem_kernel(uvm_mem_t *mem)
|
||||
{
|
||||
struct page **pages = mem->sysmem.pages;
|
||||
size_t num_pages = uvm_mem_physical_size(mem) / PAGE_SIZE;
|
||||
pgprot_t prot = PAGE_KERNEL;
|
||||
pgprot_t prot;
|
||||
|
||||
UVM_ASSERT(uvm_mem_is_sysmem(mem));
|
||||
|
||||
@ -725,6 +727,8 @@ static NV_STATUS mem_map_cpu_to_sysmem_kernel(uvm_mem_t *mem)
|
||||
|
||||
if (g_uvm_global.conf_computing_enabled && uvm_mem_is_sysmem_dma(mem))
|
||||
prot = uvm_pgprot_decrypted(PAGE_KERNEL_NOENC);
|
||||
else
|
||||
prot = PAGE_KERNEL;
|
||||
|
||||
mem->kernel.cpu_addr = vmap(pages, num_pages, VM_MAP, prot);
|
||||
|
||||
@ -992,7 +996,7 @@ uvm_gpu_address_t uvm_mem_gpu_address_copy(uvm_mem_t *mem, uvm_gpu_t *accessing_
|
||||
// Peer GPUs may need to use some form of translation (identity mappings,
|
||||
// indirect peers) to copy.
|
||||
chunk = mem_get_chunk(mem, offset, &chunk_offset);
|
||||
copy_addr = uvm_pmm_gpu_peer_copy_address(&mem->backing_gpu->pmm, chunk, accessing_gpu);
|
||||
copy_addr = uvm_gpu_peer_copy_address(mem->backing_gpu, chunk->address, accessing_gpu);
|
||||
copy_addr.address += chunk_offset;
|
||||
return copy_addr;
|
||||
}
|
||||
|
@ -161,7 +161,7 @@ struct uvm_mem_struct
|
||||
// lifetime of the GPU. For CPU allocations there is no lifetime limitation.
|
||||
uvm_gpu_t *backing_gpu;
|
||||
|
||||
// For Confidential Computing, the accessing GPU needs to be known at alloc
|
||||
// In Confidential Computing, the accessing GPU needs to be known at alloc
|
||||
// time for sysmem allocations.
|
||||
uvm_gpu_t *dma_owner;
|
||||
|
||||
|
@ -358,10 +358,8 @@ static NV_STATUS test_all(uvm_va_space_t *va_space)
|
||||
// Pascal+ can map sysmem with 4K, 64K and 2M PTEs, other GPUs can only use
|
||||
// 4K. Test all of the sizes supported by Pascal+ and 128K to match big page
|
||||
// size on pre-Pascal GPUs with 128K big page size.
|
||||
// Ampere+ also supports 512M PTEs, but since UVM's maximum chunk size is
|
||||
// 2M, we don't test for this page size.
|
||||
// Blackwell+ also supports 256G PTEs and the above holds for this case too.
|
||||
|
||||
// Ampere+ supports 512M PTEs and Blackwell+ supports 256G PTEs, but since
|
||||
// UVM's maximum chunk size is 2M, we don't test for these page sizes.
|
||||
static const NvU64 cpu_chunk_sizes = PAGE_SIZE | UVM_PAGE_SIZE_64K | UVM_PAGE_SIZE_128K | UVM_PAGE_SIZE_2M;
|
||||
|
||||
// All supported page sizes will be tested, CPU has the most with 4 and +1
|
||||
|
@ -227,6 +227,8 @@ NV_STATUS uvm_va_block_migrate_locked(uvm_va_block_t *va_block,
|
||||
uvm_assert_mutex_locked(&va_block->lock);
|
||||
UVM_ASSERT(uvm_hmm_check_context_vma_is_valid(va_block, va_block_context->hmm.vma, region));
|
||||
|
||||
uvm_processor_mask_zero(&va_block_context->make_resident.all_involved_processors);
|
||||
|
||||
if (uvm_va_block_is_hmm(va_block)) {
|
||||
status = uvm_hmm_va_block_migrate_locked(va_block,
|
||||
va_block_retry,
|
||||
@ -269,6 +271,10 @@ NV_STATUS uvm_va_block_migrate_locked(uvm_va_block_t *va_block,
|
||||
if (out_tracker)
|
||||
tracker_status = uvm_tracker_add_tracker_safe(out_tracker, &va_block->tracker);
|
||||
|
||||
uvm_processor_mask_or(&service_context->gpus_to_check_for_nvlink_errors,
|
||||
&service_context->gpus_to_check_for_nvlink_errors,
|
||||
&va_block_context->make_resident.all_involved_processors);
|
||||
|
||||
return status == NV_OK ? tracker_status : status;
|
||||
}
|
||||
|
||||
@ -320,7 +326,7 @@ NV_STATUS uvm_va_block_migrate_locked(uvm_va_block_t *va_block,
|
||||
// The current logic checks that:
|
||||
// - We are in the first pass of the migration (see the explanation of the
|
||||
// two-pass strategy in uvm_migrate).
|
||||
// - The CPU has an NVLINK interconnect to the GPUs. Otherwise, we don't
|
||||
// - The CPU has an NVLINK or C2C interconnect to the GPUs. Otherwise, we don't
|
||||
// need this optimization since we are already limited by PCIe BW.
|
||||
// - If the migration spans several VA blocks, otherwise skip the preunmap to
|
||||
// avoid the overhead.
|
||||
@ -335,7 +341,7 @@ static bool migration_should_do_cpu_preunmap(uvm_va_space_t *va_space,
|
||||
if (pass != UVM_MIGRATE_PASS_FIRST || is_single_block)
|
||||
return false;
|
||||
|
||||
if (uvm_processor_mask_get_gpu_count(&va_space->has_nvlink[UVM_ID_CPU_VALUE]) == 0)
|
||||
if (uvm_processor_mask_get_gpu_count(&va_space->has_fast_link[UVM_ID_CPU_VALUE]) == 0)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
@ -559,7 +565,7 @@ static NV_STATUS uvm_migrate_ranges(uvm_va_space_t *va_space,
|
||||
UVM_ASSERT(first_managed_range == uvm_va_space_iter_managed_first(va_space, base, base));
|
||||
|
||||
managed_range_last = NULL;
|
||||
uvm_for_each_va_range_managed_in_contig_from(managed_range, va_space, first_managed_range, end) {
|
||||
uvm_for_each_va_range_managed_in_contig_from(managed_range, first_managed_range, end) {
|
||||
uvm_range_group_range_iter_t iter;
|
||||
uvm_va_policy_t *policy = &managed_range->policy;
|
||||
|
||||
@ -624,7 +630,8 @@ static NV_STATUS uvm_migrate(uvm_va_space_t *va_space,
|
||||
int dest_nid,
|
||||
NvU32 migrate_flags,
|
||||
uvm_va_range_managed_t *first_managed_range,
|
||||
uvm_tracker_t *out_tracker)
|
||||
uvm_tracker_t *out_tracker,
|
||||
uvm_processor_mask_t *gpus_to_check_for_nvlink_errors)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_service_block_context_t *service_context;
|
||||
@ -651,6 +658,8 @@ static NV_STATUS uvm_migrate(uvm_va_space_t *va_space,
|
||||
|
||||
service_context->block_context->make_resident.dest_nid = dest_nid;
|
||||
|
||||
uvm_processor_mask_zero(&service_context->gpus_to_check_for_nvlink_errors);
|
||||
|
||||
// We perform two passes (unless the migration only covers a single VA
|
||||
// block or UVM_MIGRATE_FLAG_SKIP_CPU_MAP is passed). This helps in the
|
||||
// following scenarios:
|
||||
@ -707,6 +716,7 @@ static NV_STATUS uvm_migrate(uvm_va_space_t *va_space,
|
||||
out_tracker);
|
||||
}
|
||||
|
||||
uvm_processor_mask_copy(gpus_to_check_for_nvlink_errors, &service_context->gpus_to_check_for_nvlink_errors);
|
||||
uvm_service_block_context_free(service_context);
|
||||
|
||||
return status;
|
||||
@ -871,6 +881,7 @@ NV_STATUS uvm_api_migrate(UVM_MIGRATE_PARAMS *params, struct file *filp)
|
||||
bool flush_events = false;
|
||||
const bool synchronous = !(params->flags & UVM_MIGRATE_FLAG_ASYNC);
|
||||
int cpu_numa_node = (int)params->cpuNumaNode;
|
||||
uvm_processor_mask_t *gpus_to_check_for_nvlink_errors = NULL;
|
||||
|
||||
// We temporarily allow 0 length in the IOCTL parameters as a signal to
|
||||
// only release the semaphore. This is because user-space is in charge of
|
||||
@ -892,6 +903,12 @@ NV_STATUS uvm_api_migrate(UVM_MIGRATE_PARAMS *params, struct file *filp)
|
||||
return NV_ERR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
gpus_to_check_for_nvlink_errors = uvm_processor_mask_cache_alloc();
|
||||
if (!gpus_to_check_for_nvlink_errors)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
uvm_processor_mask_zero(gpus_to_check_for_nvlink_errors);
|
||||
|
||||
// mmap_lock will be needed if we have to create CPU mappings
|
||||
mm = uvm_va_space_mm_or_current_retain_lock(va_space);
|
||||
uvm_va_space_down_read(va_space);
|
||||
@ -986,6 +1003,8 @@ NV_STATUS uvm_api_migrate(UVM_MIGRATE_PARAMS *params, struct file *filp)
|
||||
.populate_on_migrate_vma_failures = true,
|
||||
.user_space_start = ¶ms->userSpaceStart,
|
||||
.user_space_length = ¶ms->userSpaceLength,
|
||||
.gpus_to_check_for_nvlink_errors = gpus_to_check_for_nvlink_errors,
|
||||
.fail_on_unresolved_sto_errors = false,
|
||||
};
|
||||
|
||||
status = uvm_migrate_pageable(&uvm_migrate_args);
|
||||
@ -999,11 +1018,14 @@ NV_STATUS uvm_api_migrate(UVM_MIGRATE_PARAMS *params, struct file *filp)
|
||||
(UVM_ID_IS_CPU(dest_id) ? cpu_numa_node : NUMA_NO_NODE),
|
||||
params->flags,
|
||||
uvm_va_space_iter_managed_first(va_space, params->base, params->base),
|
||||
tracker_ptr);
|
||||
tracker_ptr,
|
||||
gpus_to_check_for_nvlink_errors);
|
||||
}
|
||||
}
|
||||
|
||||
done:
|
||||
uvm_global_gpu_retain(gpus_to_check_for_nvlink_errors);
|
||||
|
||||
// We only need to hold mmap_lock to create new CPU mappings, so drop it if
|
||||
// we need to wait for the tracker to finish.
|
||||
//
|
||||
@ -1042,6 +1064,13 @@ done:
|
||||
uvm_va_space_up_read(va_space);
|
||||
uvm_va_space_mm_or_current_release(va_space, mm);
|
||||
|
||||
// Check for STO errors in case there was no other error until now.
|
||||
if (status == NV_OK && !uvm_processor_mask_empty(gpus_to_check_for_nvlink_errors))
|
||||
status = uvm_global_gpu_check_nvlink_error(gpus_to_check_for_nvlink_errors);
|
||||
|
||||
uvm_global_gpu_release(gpus_to_check_for_nvlink_errors);
|
||||
uvm_processor_mask_cache_free(gpus_to_check_for_nvlink_errors);
|
||||
|
||||
// If the migration is known to be complete, eagerly dispatch the migration
|
||||
// events, instead of processing them on a later event flush. Note that an
|
||||
// asynchronous migration could be complete by now, but the flush would not
|
||||
@ -1064,6 +1093,13 @@ NV_STATUS uvm_api_migrate_range_group(UVM_MIGRATE_RANGE_GROUP_PARAMS *params, st
|
||||
uvm_tracker_t local_tracker = UVM_TRACKER_INIT();
|
||||
NvU32 migrate_flags = 0;
|
||||
uvm_gpu_t *gpu = NULL;
|
||||
uvm_processor_mask_t *gpus_to_check_for_nvlink_errors = NULL;
|
||||
|
||||
gpus_to_check_for_nvlink_errors = uvm_processor_mask_cache_alloc();
|
||||
if (!gpus_to_check_for_nvlink_errors)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
uvm_processor_mask_zero(gpus_to_check_for_nvlink_errors);
|
||||
|
||||
// mmap_lock will be needed if we have to create CPU mappings
|
||||
mm = uvm_va_space_mm_or_current_retain_lock(va_space);
|
||||
@ -1113,7 +1149,8 @@ NV_STATUS uvm_api_migrate_range_group(UVM_MIGRATE_RANGE_GROUP_PARAMS *params, st
|
||||
NUMA_NO_NODE,
|
||||
migrate_flags,
|
||||
first_managed_range,
|
||||
&local_tracker);
|
||||
&local_tracker,
|
||||
gpus_to_check_for_nvlink_errors);
|
||||
}
|
||||
|
||||
if (status != NV_OK)
|
||||
@ -1121,6 +1158,8 @@ NV_STATUS uvm_api_migrate_range_group(UVM_MIGRATE_RANGE_GROUP_PARAMS *params, st
|
||||
}
|
||||
|
||||
done:
|
||||
uvm_global_gpu_retain(gpus_to_check_for_nvlink_errors);
|
||||
|
||||
// We only need to hold mmap_lock to create new CPU mappings, so drop it if
|
||||
// we need to wait for the tracker to finish.
|
||||
//
|
||||
@ -1138,5 +1177,12 @@ done:
|
||||
// This API is synchronous, so wait for migrations to finish
|
||||
uvm_tools_flush_events();
|
||||
|
||||
// Check for STO errors in case there was no other error until now.
|
||||
if (status == NV_OK && tracker_status == NV_OK)
|
||||
status = uvm_global_gpu_check_nvlink_error(gpus_to_check_for_nvlink_errors);
|
||||
|
||||
uvm_global_gpu_release(gpus_to_check_for_nvlink_errors);
|
||||
uvm_processor_mask_cache_free(gpus_to_check_for_nvlink_errors);
|
||||
|
||||
return status == NV_OK? tracker_status : status;
|
||||
}
|
||||
|
@ -62,10 +62,9 @@ static NV_STATUS migrate_vma_page_copy_address(struct page *page,
|
||||
*gpu_addr = uvm_gpu_address_copy(owning_gpu, uvm_gpu_page_to_phys_address(owning_gpu, page));
|
||||
}
|
||||
else if (owning_gpu && can_copy_from) {
|
||||
uvm_gpu_identity_mapping_t *gpu_peer_mappings = uvm_gpu_get_peer_mapping(copying_gpu, owning_gpu->id);
|
||||
uvm_gpu_phys_address_t phys_addr = uvm_gpu_page_to_phys_address(owning_gpu, page);
|
||||
|
||||
*gpu_addr = uvm_gpu_address_virtual(gpu_peer_mappings->base + phys_addr.address);
|
||||
*gpu_addr = uvm_gpu_peer_copy_address(owning_gpu, phys_addr.address, copying_gpu);
|
||||
}
|
||||
else {
|
||||
NV_STATUS status = uvm_parent_gpu_map_cpu_page(copying_gpu->parent, page, &state->dma.addrs[page_index]);
|
||||
@ -399,6 +398,38 @@ static NV_STATUS migrate_vma_populate_anon_pages(struct vm_area_struct *vma,
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS zero_non_failed_pages_in_mask(uvm_push_t *push,
|
||||
const unsigned long *pfns,
|
||||
unsigned long *page_mask,
|
||||
unsigned long mask_size,
|
||||
migrate_vma_state_t *state)
|
||||
{
|
||||
unsigned long i;
|
||||
uvm_migrate_args_t *uvm_migrate_args = state->uvm_migrate_args;
|
||||
uvm_processor_id_t dst_id = uvm_migrate_args->dst_id;
|
||||
uvm_gpu_t *zeroing_gpu = uvm_push_get_gpu(push);
|
||||
|
||||
for_each_set_bit(i, page_mask, mask_size) {
|
||||
struct page *page;
|
||||
uvm_gpu_address_t dst_address;
|
||||
NV_STATUS status;
|
||||
|
||||
if (test_bit(i, state->allocation_failed_mask))
|
||||
continue;
|
||||
|
||||
page = migrate_pfn_to_page(pfns[i]);
|
||||
status = migrate_vma_page_copy_address(page, i, dst_id, zeroing_gpu, state, &dst_address);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
uvm_push_set_flag(push, UVM_PUSH_FLAG_CE_NEXT_PIPELINED);
|
||||
uvm_push_set_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
|
||||
zeroing_gpu->parent->ce_hal->memset_8(push, dst_address, 0, PAGE_SIZE);
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS migrate_vma_copy_pages_from(struct vm_area_struct *vma,
|
||||
const unsigned long *src,
|
||||
unsigned long *dst,
|
||||
@ -411,36 +442,82 @@ static NV_STATUS migrate_vma_copy_pages_from(struct vm_area_struct *vma,
|
||||
uvm_push_t push;
|
||||
unsigned long i;
|
||||
uvm_gpu_t *copying_gpu = NULL;
|
||||
uvm_gpu_t *src_gpu = UVM_ID_IS_GPU(src_id) ? uvm_gpu_get(src_id) : NULL;
|
||||
uvm_migrate_args_t *uvm_migrate_args = state->uvm_migrate_args;
|
||||
uvm_processor_id_t dst_id = uvm_migrate_args->dst_id;
|
||||
unsigned long *page_mask = state->processors[uvm_id_value(src_id)].page_mask;
|
||||
uvm_va_space_t *va_space = uvm_migrate_args->va_space;
|
||||
uvm_tracker_t zero_tracker = UVM_TRACKER_INIT();
|
||||
|
||||
UVM_ASSERT(!bitmap_empty(page_mask, state->num_pages));
|
||||
|
||||
// Pre-allocate the dst pages and mark the ones that failed
|
||||
for_each_set_bit(i, page_mask, state->num_pages) {
|
||||
uvm_gpu_address_t src_address;
|
||||
uvm_gpu_address_t dst_address;
|
||||
struct page *src_page = migrate_pfn_to_page(src[i]);
|
||||
struct page *dst_page;
|
||||
|
||||
UVM_ASSERT(src[i] & MIGRATE_PFN_VALID);
|
||||
UVM_ASSERT(src_page);
|
||||
|
||||
dst_page = migrate_vma_alloc_page(state);
|
||||
struct page *dst_page = migrate_vma_alloc_page(state);
|
||||
if (!dst_page) {
|
||||
__set_bit(i, state->allocation_failed_mask);
|
||||
continue;
|
||||
}
|
||||
|
||||
lock_page(dst_page);
|
||||
dst[i] = migrate_pfn(page_to_pfn(dst_page));
|
||||
}
|
||||
|
||||
// Zero destination pages in case of NVLINK copy that can hit STO or XC,
|
||||
// or in case of injected unresolved NVLINK error.
|
||||
// TODO: Bug 4922701: [uvm] Re-evaluate STO handling for ATS migrations
|
||||
// This can be removed if the false-positive rate of STO
|
||||
// fast-path is low enough to prefer failing the copy when an STO
|
||||
// fast-path error is detected.
|
||||
if (UVM_ID_IS_GPU(src_id) &&
|
||||
UVM_ID_IS_GPU(dst_id) &&
|
||||
((src_gpu->nvlink_status.enabled &&
|
||||
(uvm_parent_gpu_peer_link_type(src_gpu->parent, uvm_gpu_get(dst_id)->parent) >= UVM_GPU_LINK_NVLINK_5)) ||
|
||||
uvm_gpu_get_injected_nvlink_error(src_gpu) == NV_WARN_MORE_PROCESSING_REQUIRED)) {
|
||||
uvm_gpu_t *dst_gpu = uvm_gpu_get(dst_id);
|
||||
uvm_push_t zero_push;
|
||||
|
||||
status = migrate_vma_zero_begin_push(va_space, dst_id, dst_gpu, start, outer - 1, &zero_push);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
status = zero_non_failed_pages_in_mask(&zero_push, dst, page_mask, state->num_pages, state);
|
||||
|
||||
uvm_push_end(&zero_push);
|
||||
|
||||
if (status == NV_OK)
|
||||
status = uvm_tracker_add_push_safe(&zero_tracker, &zero_push);
|
||||
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
}
|
||||
|
||||
for_each_set_bit(i, page_mask, state->num_pages) {
|
||||
uvm_gpu_address_t src_address;
|
||||
uvm_gpu_address_t dst_address;
|
||||
struct page *src_page = migrate_pfn_to_page(src[i]);
|
||||
struct page *dst_page = migrate_pfn_to_page(dst[i]);
|
||||
|
||||
if (test_bit(i, state->allocation_failed_mask))
|
||||
continue;
|
||||
|
||||
UVM_ASSERT(src[i] & MIGRATE_PFN_VALID);
|
||||
UVM_ASSERT(src_page);
|
||||
UVM_ASSERT(dst[i] & MIGRATE_PFN_VALID);
|
||||
UVM_ASSERT(dst_page);
|
||||
|
||||
if (!copying_gpu) {
|
||||
status = migrate_vma_copy_begin_push(va_space, dst_id, src_id, start, outer - 1, &push);
|
||||
if (status != NV_OK) {
|
||||
__free_page(dst_page);
|
||||
return status;
|
||||
}
|
||||
if (status != NV_OK)
|
||||
break;
|
||||
|
||||
copying_gpu = uvm_push_get_gpu(&push);
|
||||
if (src_gpu)
|
||||
UVM_ASSERT(src_gpu == copying_gpu);
|
||||
|
||||
// The zero tracker will be empty if zeroing is not necessary
|
||||
uvm_push_acquire_tracker(&push, &zero_tracker);
|
||||
uvm_tracker_deinit(&zero_tracker);
|
||||
}
|
||||
else {
|
||||
uvm_push_set_flag(&push, UVM_PUSH_FLAG_CE_NEXT_PIPELINED);
|
||||
@ -452,18 +529,12 @@ static NV_STATUS migrate_vma_copy_pages_from(struct vm_area_struct *vma,
|
||||
if (status == NV_OK)
|
||||
status = migrate_vma_page_copy_address(dst_page, i, dst_id, copying_gpu, state, &dst_address);
|
||||
|
||||
if (status != NV_OK) {
|
||||
__free_page(dst_page);
|
||||
if (status != NV_OK)
|
||||
break;
|
||||
}
|
||||
|
||||
lock_page(dst_page);
|
||||
|
||||
// We'll push one membar later for all copies in this loop
|
||||
uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
|
||||
copying_gpu->parent->ce_hal->memcopy(&push, dst_address, src_address, PAGE_SIZE);
|
||||
|
||||
dst[i] = migrate_pfn(page_to_pfn(dst_page));
|
||||
}
|
||||
|
||||
// TODO: Bug 1766424: If the destination is a GPU and the copy was done by
|
||||
@ -523,6 +594,7 @@ static void migrate_vma_alloc_and_copy(struct migrate_vma *args, migrate_vma_sta
|
||||
unsigned long start = args->start;
|
||||
unsigned long outer = args->end;
|
||||
NV_STATUS tracker_status;
|
||||
uvm_migrate_args_t *uvm_migrate_args = state->uvm_migrate_args;
|
||||
|
||||
uvm_tracker_init(&state->tracker);
|
||||
|
||||
@ -542,6 +614,40 @@ static void migrate_vma_alloc_and_copy(struct migrate_vma *args, migrate_vma_sta
|
||||
if (state->status == NV_OK)
|
||||
state->status = tracker_status;
|
||||
|
||||
// Check if the copy might have been impacted by NVLINK errors.
|
||||
if (state->status == NV_OK) {
|
||||
uvm_processor_id_t src_id;
|
||||
|
||||
for_each_id_in_mask(src_id, &state->src_processors) {
|
||||
NV_STATUS status;
|
||||
|
||||
// Skip CPU source, even if for some reason the operation went over
|
||||
// NVLINK, it'd be a read and hit poison.
|
||||
if (UVM_ID_IS_CPU(src_id))
|
||||
continue;
|
||||
|
||||
UVM_ASSERT(UVM_ID_IS_GPU(src_id));
|
||||
status = uvm_gpu_check_nvlink_error_no_rm(uvm_gpu_get(src_id));
|
||||
|
||||
// Set state->status to the first error if there's an NVLINK error.
|
||||
// Do not report NV_WARN_MORE_PROCESSING_REQUIRED. The call to the
|
||||
// uvm_migrate_vma_copy_pages above zeroed the destination.
|
||||
// Thus in case of real STO error zeroed pages will be mapped.
|
||||
if (state->status == NV_OK && status != NV_WARN_MORE_PROCESSING_REQUIRED)
|
||||
state->status = status;
|
||||
|
||||
// Record unresolved GPU errors if the caller can use the information
|
||||
if (status == NV_WARN_MORE_PROCESSING_REQUIRED) {
|
||||
if (uvm_migrate_args->gpus_to_check_for_nvlink_errors)
|
||||
uvm_processor_mask_set(uvm_migrate_args->gpus_to_check_for_nvlink_errors, src_id);
|
||||
|
||||
// fail the copy if requested by the caller
|
||||
if (uvm_migrate_args->fail_on_unresolved_sto_errors && state->status == NV_OK)
|
||||
state->status = NV_ERR_BUSY_RETRY;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Mark all pages as not migrating if we're failing
|
||||
if (state->status != NV_OK)
|
||||
migrate_vma_cleanup_pages(args->dst, state->num_pages);
|
||||
@ -870,6 +976,14 @@ static NV_STATUS migrate_pageable_vma(struct vm_area_struct *vma,
|
||||
if (va_space->test.skip_migrate_vma)
|
||||
return NV_WARN_NOTHING_TO_DO;
|
||||
|
||||
// This isn't the right path for a UVM-owned vma. In most cases the callers
|
||||
// will take the correct (managed) path, but we can get here if invoked on a
|
||||
// disabled vma (see uvm_disable_vma()) that has no VA range but still has a
|
||||
// vma. This could cause locking issues if the caller has the VA space
|
||||
// locked and we invoke a UVM fault handler, so avoid it entirely.
|
||||
if (uvm_file_is_nvidia_uvm(vma->vm_file))
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
|
||||
// TODO: Bug 2419180: support file-backed pages in migrate_vma, when
|
||||
// support for it is added to the Linux kernel
|
||||
if (!vma_is_anonymous(vma))
|
||||
@ -1002,9 +1116,12 @@ NV_STATUS uvm_migrate_pageable(uvm_migrate_args_t *uvm_migrate_args)
|
||||
return NV_ERR_INVALID_ARGUMENT;
|
||||
}
|
||||
else {
|
||||
uvm_gpu_t *gpu = uvm_gpu_get(dst_id);
|
||||
|
||||
// Incoming dst_node_id is only valid if dst_id belongs to the CPU. Use
|
||||
// dst_node_id as the GPU node id if dst_id doesn't belong to the CPU.
|
||||
uvm_migrate_args->dst_node_id = uvm_gpu_numa_node(uvm_gpu_get(dst_id));
|
||||
UVM_ASSERT(gpu->mem_info.numa.enabled);
|
||||
uvm_migrate_args->dst_node_id = uvm_gpu_numa_node(gpu);
|
||||
}
|
||||
|
||||
state = kmem_cache_alloc(g_uvm_migrate_vma_state_cache, NV_UVM_GFP_FLAGS);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2018 NVIDIA Corporation
|
||||
Copyright (c) 2018-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -47,6 +47,9 @@ typedef struct
|
||||
bool populate_on_migrate_vma_failures : 1;
|
||||
NvU64 *user_space_start;
|
||||
NvU64 *user_space_length;
|
||||
|
||||
uvm_processor_mask_t *gpus_to_check_for_nvlink_errors;
|
||||
bool fail_on_unresolved_sto_errors;
|
||||
} uvm_migrate_args_t;
|
||||
|
||||
#if defined(CONFIG_MIGRATE_VMA_HELPER)
|
||||
@ -219,7 +222,8 @@ static NV_STATUS uvm_migrate_pageable(uvm_migrate_args_t *uvm_migrate_args)
|
||||
uvm_migrate_args->length,
|
||||
0,
|
||||
uvm_migrate_args->touch,
|
||||
uvm_migrate_args->populate_permissions);
|
||||
uvm_migrate_args->populate_permissions,
|
||||
0);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
|
@ -1029,7 +1029,6 @@ error:
|
||||
// the pages can be in more than one location: vidmem is given priority, but if
|
||||
// the allocation fails it will fallback to sysmem.
|
||||
//
|
||||
// Behavior outside of SR-IOV heavy (bare metal, SR-IOV standard, etc):
|
||||
// Inputs Outputs
|
||||
// init location | uvm_page_table_location || tree->location | tree->location_sys_fallback
|
||||
// --------------|-------------------------||----------------|----------------
|
||||
@ -1039,12 +1038,17 @@ error:
|
||||
// default | vidmem || vidmem | false
|
||||
// default | sysmem || sysmem | false
|
||||
//
|
||||
// Some configurations impose more strict restrictions on the tree location,
|
||||
// and do not obey the table above:
|
||||
//
|
||||
// - in SR-IOV heavy the page tree must be in vidmem, to prevent guest drivers
|
||||
// from updating GPU page tables without hypervisor knowledge.
|
||||
//
|
||||
// - in Confidential Computing, all kernel allocations must be in vidmem.
|
||||
// This is a hardware security constraint.
|
||||
//
|
||||
// In these setups, the location table is:
|
||||
//
|
||||
// In SR-IOV heavy the the page tree must be in vidmem, to prevent guest drivers
|
||||
// from updating GPU page tables without hypervisor knowledge.
|
||||
// When the Confidential Computing feature is enabled, all kernel
|
||||
// allocations must be made in the CPR of vidmem. This is a hardware security
|
||||
// constraint.
|
||||
// Inputs Outputs
|
||||
// init location | uvm_page_table_location || tree->location | tree->location_sys_fallback
|
||||
// -------------|-------------------------||----------------|----------------
|
||||
@ -1065,9 +1069,7 @@ static void page_tree_set_location(uvm_page_tree_t *tree, uvm_aperture_t locatio
|
||||
// be identified by having no channel manager.
|
||||
if (tree->gpu->channel_manager != NULL) {
|
||||
|
||||
if (uvm_parent_gpu_is_virt_mode_sriov_heavy(tree->gpu->parent))
|
||||
UVM_ASSERT(location == UVM_APERTURE_VID);
|
||||
else if (g_uvm_global.conf_computing_enabled)
|
||||
if (uvm_parent_gpu_is_virt_mode_sriov_heavy(tree->gpu->parent) || g_uvm_global.conf_computing_enabled)
|
||||
UVM_ASSERT(location == UVM_APERTURE_VID);
|
||||
}
|
||||
|
||||
@ -2421,8 +2423,7 @@ void uvm_mmu_init_gpu_peer_addresses(uvm_gpu_t *gpu)
|
||||
uvm_gpu_id_t gpu_id;
|
||||
|
||||
for_each_gpu_id(gpu_id) {
|
||||
uvm_gpu_get_peer_mapping(gpu, gpu_id)->base = gpu->parent->rm_va_base +
|
||||
gpu->parent->rm_va_size +
|
||||
uvm_gpu_get_peer_mapping(gpu, gpu_id)->base = gpu->parent->peer_va_base +
|
||||
UVM_PEER_IDENTITY_VA_SIZE * uvm_id_gpu_index(gpu_id);
|
||||
}
|
||||
|
||||
|
@ -56,8 +56,8 @@
|
||||
// | (up to 1TB) |
|
||||
// ------------------ 64PB + 32TB (flat_vidmem_va_base)
|
||||
// |peer ident. maps|
|
||||
// |32 * 1TB = 32TB | ==> NV_MAX_DEVICES * UVM_PEER_IDENTITY_VA_SIZE
|
||||
// ------------------ 64PB
|
||||
// |32 * 1TB = 32TB | ==> NV_MAX_DEVICES * UVM_PEER_IDENTITY_VA_SIZE (peer_va_size)
|
||||
// ------------------ 64PB (peer_va_base)
|
||||
// | |
|
||||
// | rm_mem(64PB) | (rm_va_size)
|
||||
// | |
|
||||
@ -84,8 +84,8 @@
|
||||
// | (up to 1TB) |
|
||||
// ------------------ 160TB (flat_vidmem_va_base)
|
||||
// |peer ident. maps|
|
||||
// |32 * 1TB = 32TB | ==> NV_MAX_DEVICES * UVM_PEER_IDENTITY_VA_SIZE
|
||||
// ------------------ 128TB
|
||||
// |32 * 1TB = 32TB | ==> NV_MAX_DEVICES * UVM_PEER_IDENTITY_VA_SIZE (peer_va_size)
|
||||
// ------------------ 128TB (peer_va_base)
|
||||
// | |
|
||||
// | rm_mem(128TB) | (rm_va_size)
|
||||
// | |
|
||||
|
@ -29,26 +29,33 @@
|
||||
#include "uvm_tlb_batch.h"
|
||||
#include "uvm_mmu.h"
|
||||
#include "uvm_kvmalloc.h"
|
||||
|
||||
// MAXWELL_*
|
||||
#include "cla16f.h"
|
||||
#include "clb0b5.h"
|
||||
|
||||
// PASCAL_*
|
||||
#include "clb069.h" // MAXWELL_FAULT_BUFFER_A
|
||||
#include "clc0b5.h"
|
||||
#include "clc06f.h"
|
||||
#include "clc0b5.h"
|
||||
|
||||
// VOLTA_*
|
||||
#include "clc369.h" // MMU_FAULT_BUFFER
|
||||
#include "clc3b5.h"
|
||||
#include "clc36f.h"
|
||||
#include "clc3b5.h"
|
||||
|
||||
// AMPERE_*
|
||||
#include "clc56f.h"
|
||||
#include "clc6b5.h"
|
||||
|
||||
// HOPPER_*
|
||||
#include "clc8b5.h"
|
||||
#include "clc86f.h"
|
||||
#include "clc8b5.h"
|
||||
|
||||
// BLACKWELL_*
|
||||
#include "clc96f.h"
|
||||
#include "clc9b5.h"
|
||||
|
||||
// ARCHITECTURE_*
|
||||
#include "ctrl2080mc.h"
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2016-2023 NVIDIA Corporation
|
||||
Copyright (c) 2016-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -51,6 +51,9 @@ void uvm_hal_pascal_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
parent_gpu->rm_va_base = 0;
|
||||
parent_gpu->rm_va_size = 128 * UVM_SIZE_1TB;
|
||||
|
||||
parent_gpu->peer_va_base = parent_gpu->rm_va_base + parent_gpu->rm_va_size;
|
||||
parent_gpu->peer_va_size = NV_MAX_DEVICES * UVM_PEER_IDENTITY_VA_SIZE;
|
||||
|
||||
parent_gpu->uvm_mem_va_base = 384 * UVM_SIZE_1TB;
|
||||
parent_gpu->uvm_mem_va_size = UVM_MEM_VA_SIZE;
|
||||
|
||||
@ -104,4 +107,6 @@ void uvm_hal_pascal_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
parent_gpu->plc_supported = false;
|
||||
|
||||
parent_gpu->no_ats_range_required = false;
|
||||
|
||||
parent_gpu->conf_computing.per_channel_key_rotation = false;
|
||||
}
|
||||
|
@ -46,6 +46,11 @@ void uvm_hal_pascal_ce_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 p
|
||||
NvU32 launch_dma_plc_mode;
|
||||
bool use_flush;
|
||||
|
||||
UVM_ASSERT_MSG(gpu->parent->ce_hal->semaphore_target_is_valid(push, gpu_va),
|
||||
"Semaphore target validation failed in channel %s, GPU %s.\n",
|
||||
push->channel->name,
|
||||
uvm_gpu_name(gpu));
|
||||
|
||||
use_flush = uvm_hal_membar_before_semaphore(push);
|
||||
|
||||
if (use_flush)
|
||||
@ -72,6 +77,11 @@ void uvm_hal_pascal_ce_semaphore_reduction_inc(uvm_push_t *push, NvU64 gpu_va, N
|
||||
NvU32 launch_dma_plc_mode;
|
||||
bool use_flush;
|
||||
|
||||
UVM_ASSERT_MSG(gpu->parent->ce_hal->semaphore_target_is_valid(push, gpu_va),
|
||||
"Semaphore target validation failed in channel %s, GPU %s.\n",
|
||||
push->channel->name,
|
||||
uvm_gpu_name(gpu));
|
||||
|
||||
use_flush = uvm_hal_membar_before_semaphore(push);
|
||||
|
||||
if (use_flush)
|
||||
@ -96,11 +106,16 @@ void uvm_hal_pascal_ce_semaphore_reduction_inc(uvm_push_t *push, NvU64 gpu_va, N
|
||||
|
||||
void uvm_hal_pascal_ce_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va)
|
||||
{
|
||||
uvm_gpu_t *gpu;
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||||
NvU32 flush_value;
|
||||
NvU32 launch_dma_plc_mode;
|
||||
bool use_flush;
|
||||
|
||||
UVM_ASSERT_MSG(gpu->parent->ce_hal->semaphore_target_is_valid(push, gpu_va),
|
||||
"Semaphore target validation failed in channel %s, GPU %s.\n",
|
||||
push->channel->name,
|
||||
uvm_gpu_name(gpu));
|
||||
|
||||
use_flush = uvm_hal_membar_before_semaphore(push);
|
||||
|
||||
if (use_flush)
|
||||
@ -112,7 +127,6 @@ void uvm_hal_pascal_ce_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va)
|
||||
SET_SEMAPHORE_B, HWVALUE(C0B5, SET_SEMAPHORE_B, LOWER, NvOffset_LO32(gpu_va)),
|
||||
SET_SEMAPHORE_PAYLOAD, 0xdeadbeef);
|
||||
|
||||
gpu = uvm_push_get_gpu(push);
|
||||
launch_dma_plc_mode = gpu->parent->ce_hal->plc_mode();
|
||||
|
||||
NV_PUSH_1U(C0B5, LAUNCH_DMA, flush_value |
|
||||
|
@ -286,8 +286,6 @@ NV_STATUS uvm_hal_pascal_fault_buffer_parse_replayable_entry(uvm_parent_gpu_t *p
|
||||
|
||||
bool uvm_hal_pascal_fault_buffer_entry_is_valid(uvm_parent_gpu_t *parent_gpu, NvU32 index)
|
||||
{
|
||||
NvU32 *fault_entry;
|
||||
|
||||
if (g_uvm_global.conf_computing_enabled) {
|
||||
// Use the valid bit present in the encryption metadata, which is
|
||||
// unencrypted, instead of the valid bit present in the (encrypted)
|
||||
@ -296,15 +294,15 @@ bool uvm_hal_pascal_fault_buffer_entry_is_valid(uvm_parent_gpu_t *parent_gpu, Nv
|
||||
|
||||
return fault_entry_metadata->valid;
|
||||
}
|
||||
else {
|
||||
NvU32 *fault_entry = get_fault_buffer_entry(parent_gpu, index);
|
||||
|
||||
fault_entry = get_fault_buffer_entry(parent_gpu, index);
|
||||
return READ_HWVALUE_MW(fault_entry, B069, FAULT_BUF_ENTRY, VALID);
|
||||
return READ_HWVALUE_MW(fault_entry, B069, FAULT_BUF_ENTRY, VALID);
|
||||
}
|
||||
}
|
||||
|
||||
void uvm_hal_pascal_fault_buffer_entry_clear_valid(uvm_parent_gpu_t *parent_gpu, NvU32 index)
|
||||
{
|
||||
NvU32 *fault_entry;
|
||||
|
||||
if (g_uvm_global.conf_computing_enabled) {
|
||||
// Use the valid bit present in the encryption metadata, which is
|
||||
// unencrypted, instead of the valid bit present in the (encrypted)
|
||||
@ -312,11 +310,12 @@ void uvm_hal_pascal_fault_buffer_entry_clear_valid(uvm_parent_gpu_t *parent_gpu,
|
||||
UvmFaultMetadataPacket *fault_entry_metadata = get_fault_buffer_entry_metadata(parent_gpu, index);
|
||||
|
||||
fault_entry_metadata->valid = false;
|
||||
return;
|
||||
}
|
||||
else {
|
||||
NvU32 *fault_entry = get_fault_buffer_entry(parent_gpu, index);
|
||||
|
||||
fault_entry = get_fault_buffer_entry(parent_gpu, index);
|
||||
WRITE_HWCONST_MW(fault_entry, B069, FAULT_BUF_ENTRY, VALID, FALSE);
|
||||
WRITE_HWCONST_MW(fault_entry, B069, FAULT_BUF_ENTRY, VALID, FALSE);
|
||||
}
|
||||
}
|
||||
|
||||
NvU32 uvm_hal_pascal_fault_buffer_entry_size(uvm_parent_gpu_t *parent_gpu)
|
||||
|
@ -387,6 +387,7 @@ static void mmu_set_prefetch_faults(uvm_parent_gpu_t *parent_gpu, bool enable)
|
||||
// Access to the register is currently blocked only in Confidential
|
||||
// Computing.
|
||||
UVM_ASSERT(g_uvm_global.conf_computing_enabled);
|
||||
|
||||
status = nvUvmInterfaceTogglePrefetchFaults(&parent_gpu->fault_buffer_info.rm_info, (NvBool)enable);
|
||||
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
|
@ -1400,10 +1400,10 @@ static bool thrashing_processors_have_fast_access_to(uvm_va_space_t *va_space,
|
||||
if (UVM_ID_IS_INVALID(to))
|
||||
return false;
|
||||
|
||||
// Combine NVLINK and native atomics mask since we could have PCIe
|
||||
// Combine NVLINK/C2C and native atomics mask since we could have PCIe
|
||||
// atomics in the future
|
||||
uvm_processor_mask_and(fast_to,
|
||||
&va_space->has_nvlink[uvm_id_value(to)],
|
||||
&va_space->has_fast_link[uvm_id_value(to)],
|
||||
&va_space->has_native_atomics[uvm_id_value(to)]);
|
||||
if (UVM_ID_IS_CPU(to)) {
|
||||
uvm_processor_mask_set(fast_to, to);
|
||||
|
@ -244,13 +244,11 @@ const char *uvm_pmm_gpu_memory_type_string(uvm_pmm_gpu_memory_type_t type)
|
||||
{
|
||||
switch (type) {
|
||||
UVM_ENUM_STRING_CASE(UVM_PMM_GPU_MEMORY_TYPE_USER);
|
||||
UVM_ENUM_STRING_CASE(UVM_PMM_GPU_MEMORY_TYPE_USER_UNPROTECTED);
|
||||
UVM_ENUM_STRING_CASE(UVM_PMM_GPU_MEMORY_TYPE_KERNEL);
|
||||
UVM_ENUM_STRING_CASE(UVM_PMM_GPU_MEMORY_TYPE_KERNEL_UNPROTECTED);
|
||||
UVM_ENUM_STRING_DEFAULT();
|
||||
}
|
||||
|
||||
BUILD_BUG_ON(UVM_PMM_GPU_MEMORY_TYPE_COUNT != 4);
|
||||
BUILD_BUG_ON(UVM_PMM_GPU_MEMORY_TYPE_COUNT != 2);
|
||||
}
|
||||
|
||||
const char *uvm_pmm_gpu_chunk_state_string(uvm_pmm_gpu_chunk_state_t state)
|
||||
@ -453,30 +451,6 @@ static void chunk_unpin(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk, uvm_pmm_gpu_
|
||||
chunk->suballoc->pinned_leaf_chunks--;
|
||||
}
|
||||
|
||||
bool uvm_pmm_gpu_memory_type_is_user(uvm_pmm_gpu_memory_type_t type)
|
||||
{
|
||||
UVM_ASSERT(type < UVM_PMM_GPU_MEMORY_TYPE_COUNT);
|
||||
|
||||
switch (type) {
|
||||
case UVM_PMM_GPU_MEMORY_TYPE_USER: // Alias UVM_PMM_GPU_MEMORY_TYPE_USER_PROTECTED
|
||||
case UVM_PMM_GPU_MEMORY_TYPE_USER_UNPROTECTED:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool uvm_pmm_gpu_memory_type_is_protected(uvm_pmm_gpu_memory_type_t type)
|
||||
{
|
||||
switch (type) {
|
||||
case UVM_PMM_GPU_MEMORY_TYPE_USER: // Alias UVM_PMM_GPU_MEMORY_TYPE_USER_PROTECTED
|
||||
case UVM_PMM_GPU_MEMORY_TYPE_KERNEL: // Alias UVM_PMM_GPU_MEMORY_TYPE_KERNEL_PROTECTED:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static void uvm_gpu_chunk_set_in_eviction(uvm_gpu_chunk_t *chunk, bool in_eviction)
|
||||
{
|
||||
UVM_ASSERT(uvm_gpu_chunk_is_user(chunk));
|
||||
@ -535,20 +509,6 @@ void uvm_pmm_gpu_sync(uvm_pmm_gpu_t *pmm)
|
||||
}
|
||||
}
|
||||
|
||||
static uvm_pmm_gpu_memory_type_t pmm_squash_memory_type(uvm_pmm_gpu_memory_type_t type)
|
||||
{
|
||||
if (g_uvm_global.conf_computing_enabled)
|
||||
return type;
|
||||
|
||||
// Enforce the contract that when the Confidential Computing feature is
|
||||
// disabled, all user types are alike, as well as all kernel types,
|
||||
// respectively. See uvm_pmm_gpu_memory_type_t.
|
||||
if (uvm_pmm_gpu_memory_type_is_user(type))
|
||||
return UVM_PMM_GPU_MEMORY_TYPE_USER;
|
||||
|
||||
return UVM_PMM_GPU_MEMORY_TYPE_KERNEL;
|
||||
}
|
||||
|
||||
static NV_STATUS pmm_gpu_alloc(uvm_pmm_gpu_t *pmm,
|
||||
size_t num_chunks,
|
||||
uvm_chunk_size_t chunk_size,
|
||||
@ -573,7 +533,6 @@ static NV_STATUS pmm_gpu_alloc(uvm_pmm_gpu_t *pmm,
|
||||
uvm_assert_lockable_order(UVM_LOCK_ORDER_VA_BLOCK);
|
||||
}
|
||||
|
||||
mem_type = pmm_squash_memory_type(mem_type);
|
||||
for (i = 0; i < num_chunks; i++) {
|
||||
uvm_gpu_root_chunk_t *root_chunk;
|
||||
|
||||
@ -1199,38 +1158,6 @@ void uvm_pmm_gpu_merge_chunk(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
|
||||
uvm_mutex_unlock(&pmm->lock);
|
||||
}
|
||||
|
||||
uvm_gpu_phys_address_t uvm_pmm_gpu_peer_phys_address(uvm_pmm_gpu_t *pmm,
|
||||
uvm_gpu_chunk_t *chunk,
|
||||
uvm_gpu_t *accessing_gpu)
|
||||
{
|
||||
uvm_gpu_t *gpu = uvm_pmm_to_gpu(pmm);
|
||||
uvm_aperture_t aperture = uvm_gpu_peer_aperture(accessing_gpu, gpu);
|
||||
NvU64 addr;
|
||||
|
||||
if (uvm_parent_gpus_are_nvswitch_connected(accessing_gpu->parent, gpu->parent))
|
||||
addr = chunk->address + gpu->parent->nvswitch_info.fabric_memory_window_start;
|
||||
else
|
||||
addr = chunk->address;
|
||||
|
||||
return uvm_gpu_phys_address(aperture, addr);
|
||||
}
|
||||
|
||||
uvm_gpu_address_t uvm_pmm_gpu_peer_copy_address(uvm_pmm_gpu_t *pmm,
|
||||
uvm_gpu_chunk_t *chunk,
|
||||
uvm_gpu_t *accessing_gpu)
|
||||
{
|
||||
uvm_gpu_t *gpu = uvm_pmm_to_gpu(pmm);
|
||||
uvm_gpu_identity_mapping_t *gpu_peer_mapping;
|
||||
|
||||
if (accessing_gpu->parent->peer_copy_mode == UVM_GPU_PEER_COPY_MODE_PHYSICAL)
|
||||
return uvm_gpu_address_from_phys(uvm_pmm_gpu_peer_phys_address(pmm, chunk, accessing_gpu));
|
||||
|
||||
UVM_ASSERT(accessing_gpu->parent->peer_copy_mode == UVM_GPU_PEER_COPY_MODE_VIRTUAL);
|
||||
gpu_peer_mapping = uvm_gpu_get_peer_mapping(accessing_gpu, gpu->id);
|
||||
|
||||
return uvm_gpu_address_virtual(gpu_peer_mapping->base + chunk->address);
|
||||
}
|
||||
|
||||
static NV_STATUS evict_root_chunk_from_va_block(uvm_pmm_gpu_t *pmm,
|
||||
uvm_gpu_root_chunk_t *root_chunk,
|
||||
uvm_va_block_t *va_block)
|
||||
@ -1586,7 +1513,7 @@ static NV_STATUS pick_and_evict_root_chunk(uvm_pmm_gpu_t *pmm,
|
||||
|
||||
chunk = &root_chunk->chunk;
|
||||
|
||||
if (uvm_pmm_gpu_memory_type_is_kernel(type)) {
|
||||
if (type == UVM_PMM_GPU_MEMORY_TYPE_KERNEL) {
|
||||
NvU32 flags = 0;
|
||||
if (pmm_context == PMM_CONTEXT_PMA_EVICTION)
|
||||
flags |= UVM_PMA_CALLED_FROM_PMA_EVICTION;
|
||||
@ -1973,11 +1900,12 @@ NV_STATUS alloc_root_chunk(uvm_pmm_gpu_t *pmm,
|
||||
// Also, user pages that are about to be overwritten, don't need to be
|
||||
// zeroed, either. Add a flag to uvm_pmm_gpu_alloc_* to skip scrubbing.
|
||||
const bool skip_pma_scrubbing = gpu->mem_info.numa.enabled;
|
||||
UVM_ASSERT(uvm_pmm_gpu_memory_type_is_user(type) || uvm_pmm_gpu_memory_type_is_kernel(type));
|
||||
|
||||
UVM_ASSERT(type < UVM_PMM_GPU_MEMORY_TYPE_COUNT);
|
||||
|
||||
options.flags = UVM_PMA_ALLOCATE_DONT_EVICT;
|
||||
|
||||
if (uvm_pmm_gpu_memory_type_is_kernel(type) || !gpu_supports_pma_eviction(gpu))
|
||||
if ((type == UVM_PMM_GPU_MEMORY_TYPE_KERNEL) || !gpu_supports_pma_eviction(gpu))
|
||||
options.flags |= UVM_PMA_ALLOCATE_PINNED;
|
||||
|
||||
if (skip_pma_scrubbing)
|
||||
@ -1988,9 +1916,10 @@ NV_STATUS alloc_root_chunk(uvm_pmm_gpu_t *pmm,
|
||||
if (gpu->mem_info.numa.enabled)
|
||||
flags |= UVM_PMM_ALLOC_FLAGS_DONT_BATCH;
|
||||
|
||||
// When the Confidential Computing feature is enabled, allocate GPU memory
|
||||
// in the protected region, unless specified otherwise.
|
||||
if (g_uvm_global.conf_computing_enabled && uvm_pmm_gpu_memory_type_is_protected(type))
|
||||
// In Confidential Computing, the PMA allocator exposes not only regular
|
||||
// ("protected") vidmem, but also another type called ""unprotected" vidmem.
|
||||
// UVM has no use for the latter type.
|
||||
if (g_uvm_global.conf_computing_enabled)
|
||||
options.flags |= UVM_PMA_ALLOCATE_PROTECTED_REGION;
|
||||
|
||||
if (!gpu->parent->rm_info.isSimulated &&
|
||||
@ -2245,11 +2174,7 @@ static bool check_chunk(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
|
||||
UVM_ASSERT(chunk_size & chunk_sizes);
|
||||
UVM_ASSERT(IS_ALIGNED(chunk->address, chunk_size));
|
||||
UVM_ASSERT(uvm_id_equal(uvm_gpu_id_from_index(chunk->gpu_index), gpu->id));
|
||||
|
||||
|
||||
// See pmm_squash_memory_type().
|
||||
if (!g_uvm_global.conf_computing_enabled)
|
||||
UVM_ASSERT((chunk->type == UVM_PMM_GPU_MEMORY_TYPE_USER) || (chunk->type == UVM_PMM_GPU_MEMORY_TYPE_KERNEL));
|
||||
UVM_ASSERT(chunk->type < UVM_PMM_GPU_MEMORY_TYPE_COUNT);
|
||||
|
||||
if (chunk->state == UVM_PMM_GPU_CHUNK_STATE_IS_SPLIT)
|
||||
UVM_ASSERT(chunk_size > uvm_chunk_find_first_size(chunk_sizes));
|
||||
@ -2559,8 +2484,12 @@ static NV_STATUS uvm_pmm_gpu_pma_evict_pages(void *void_pmm,
|
||||
UVM_ASSERT(IS_ALIGNED(UVM_CHUNK_SIZE_MAX, page_size));
|
||||
UVM_ASSERT(UVM_CHUNK_SIZE_MAX >= page_size);
|
||||
|
||||
// Currently, when the Confidential Computing feature is enabled, the
|
||||
// entirety of vidmem is protected.
|
||||
// In Confidential Computing, RM should only request eviction of protected
|
||||
// vidmem.
|
||||
//
|
||||
// TODO: Bug 4287430: there shouldn't be any memory type argument, because
|
||||
// the callback can only relate to protected vidmem. This check can be
|
||||
// removed alongside the parameter.
|
||||
if (g_uvm_global.conf_computing_enabled && (mem_type != UVM_PMA_GPU_MEMORY_TYPE_PROTECTED))
|
||||
return NV_ERR_INVALID_ARGUMENT;
|
||||
|
||||
@ -2698,6 +2627,15 @@ static NV_STATUS uvm_pmm_gpu_pma_evict_range(void *void_pmm,
|
||||
phys_begin,
|
||||
phys_end);
|
||||
|
||||
// In Confidential Computing, RM should only request eviction of protected
|
||||
// vidmem.
|
||||
//
|
||||
// TODO: Bug 4287430: there shouldn't be any memory type argument, because
|
||||
// the callback can only relate to protected vidmem. This check can be
|
||||
// removed alongside the parameter.
|
||||
if (g_uvm_global.conf_computing_enabled && (mem_type != UVM_PMA_GPU_MEMORY_TYPE_PROTECTED))
|
||||
return NV_ERR_INVALID_ARGUMENT;
|
||||
|
||||
// Make sure that all pending allocations, that could have started before
|
||||
// the eviction callback was called, are done. This is required to guarantee
|
||||
// that any address that, PMA thinks, is owned by UVM has been indeed
|
||||
@ -2990,7 +2928,7 @@ static NV_STATUS get_chunk_mappings_in_range(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t
|
||||
uvm_assert_mutex_locked(&pmm->lock);
|
||||
|
||||
// Kernel chunks do not have assigned VA blocks so we can just skip them
|
||||
if (uvm_pmm_gpu_memory_type_is_kernel(chunk->type))
|
||||
if (chunk->type == UVM_PMM_GPU_MEMORY_TYPE_KERNEL)
|
||||
return NV_WARN_NOTHING_TO_DO;
|
||||
|
||||
// This chunk is located before the requested physical range. Skip its
|
||||
@ -3434,11 +3372,8 @@ NV_STATUS uvm_pmm_gpu_init(uvm_pmm_gpu_t *pmm)
|
||||
uvm_gpu_t *gpu = uvm_pmm_to_gpu(pmm);
|
||||
const uvm_chunk_sizes_mask_t chunk_size_init[][UVM_PMM_GPU_MEMORY_TYPE_COUNT] =
|
||||
{
|
||||
{ gpu->parent->mmu_user_chunk_sizes,
|
||||
gpu->parent->mmu_user_chunk_sizes,
|
||||
gpu->parent->mmu_kernel_chunk_sizes,
|
||||
gpu->parent->mmu_kernel_chunk_sizes },
|
||||
{ 0, 0, uvm_mem_kernel_chunk_sizes(gpu), uvm_mem_kernel_chunk_sizes(gpu)},
|
||||
{ gpu->parent->mmu_user_chunk_sizes, gpu->parent->mmu_kernel_chunk_sizes },
|
||||
{ 0, uvm_mem_kernel_chunk_sizes(gpu)},
|
||||
};
|
||||
NV_STATUS status = NV_OK;
|
||||
size_t i, j, k;
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user