mirror of
https://github.com/NVIDIA/open-gpu-kernel-modules.git
synced 2025-02-20 06:54:20 +01:00
555.42.02
This commit is contained in:
parent
083cd9cf17
commit
5a1c474040
@ -1,5 +1,9 @@
|
||||
# Changelog
|
||||
|
||||
## Release 555 Entries
|
||||
|
||||
### [555.42.02] 2024-05-21
|
||||
|
||||
## Release 550 Entries
|
||||
|
||||
### [550.78] 2024-04-25
|
||||
|
@ -1,7 +1,7 @@
|
||||
# NVIDIA Linux Open GPU Kernel Module Source
|
||||
|
||||
This is the source release of the NVIDIA Linux open GPU kernel modules,
|
||||
version 550.78.
|
||||
version 555.42.02.
|
||||
|
||||
|
||||
## How to Build
|
||||
@ -17,7 +17,7 @@ as root:
|
||||
|
||||
Note that the kernel modules built here must be used with GSP
|
||||
firmware and user-space NVIDIA GPU driver components from a corresponding
|
||||
550.78 driver release. This can be achieved by installing
|
||||
555.42.02 driver release. This can be achieved by installing
|
||||
the NVIDIA GPU driver from the .run file using the `--no-kernel-modules`
|
||||
option. E.g.,
|
||||
|
||||
@ -74,7 +74,7 @@ kernel.
|
||||
|
||||
The NVIDIA open kernel modules support the same range of Linux kernel
|
||||
versions that are supported with the proprietary NVIDIA kernel modules.
|
||||
This is currently Linux kernel 3.10 or newer.
|
||||
This is currently Linux kernel 4.15 or newer.
|
||||
|
||||
|
||||
## How to Contribute
|
||||
@ -188,7 +188,7 @@ encountered specific to them.
|
||||
For details on feature support and limitations, see the NVIDIA GPU driver
|
||||
end user README here:
|
||||
|
||||
https://us.download.nvidia.com/XFree86/Linux-x86_64/550.78/README/kernel_open.html
|
||||
https://us.download.nvidia.com/XFree86/Linux-x86_64/555.42.02/README/kernel_open.html
|
||||
|
||||
For vGPU support, please refer to the README.vgpu packaged in the vGPU Host
|
||||
Package for more details.
|
||||
@ -856,6 +856,7 @@ Subsystem Device ID.
|
||||
| NVIDIA RTX A500 Embedded GPU | 25FB |
|
||||
| NVIDIA GeForce RTX 4090 | 2684 |
|
||||
| NVIDIA GeForce RTX 4090 D | 2685 |
|
||||
| NVIDIA GeForce RTX 4070 Ti SUPER | 2689 |
|
||||
| NVIDIA RTX 6000 Ada Generation | 26B1 1028 16A1 |
|
||||
| NVIDIA RTX 6000 Ada Generation | 26B1 103C 16A1 |
|
||||
| NVIDIA RTX 6000 Ada Generation | 26B1 10DE 16A1 |
|
||||
|
@ -72,7 +72,7 @@ EXTRA_CFLAGS += -I$(src)/common/inc
|
||||
EXTRA_CFLAGS += -I$(src)
|
||||
EXTRA_CFLAGS += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-format-extra-args
|
||||
EXTRA_CFLAGS += -D__KERNEL__ -DMODULE -DNVRM
|
||||
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"550.78\"
|
||||
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"555.42.02\"
|
||||
|
||||
ifneq ($(SYSSRCHOST1X),)
|
||||
EXTRA_CFLAGS += -I$(SYSSRCHOST1X)
|
||||
@ -118,7 +118,7 @@ ifeq ($(ARCH),x86_64)
|
||||
endif
|
||||
|
||||
ifeq ($(ARCH),powerpc)
|
||||
EXTRA_CFLAGS += -mlittle-endian -mno-strict-align -mno-altivec
|
||||
EXTRA_CFLAGS += -mlittle-endian -mno-strict-align
|
||||
endif
|
||||
|
||||
EXTRA_CFLAGS += -DNV_UVM_ENABLE
|
||||
@ -172,6 +172,7 @@ NV_CFLAGS_FROM_CONFTEST := $(shell $(NV_CONFTEST_CMD) build_cflags)
|
||||
NV_CONFTEST_CFLAGS = $(NV_CFLAGS_FROM_CONFTEST) $(EXTRA_CFLAGS) -fno-pie
|
||||
NV_CONFTEST_CFLAGS += $(call cc-disable-warning,pointer-sign)
|
||||
NV_CONFTEST_CFLAGS += $(call cc-option,-fshort-wchar,)
|
||||
NV_CONFTEST_CFLAGS += $(call cc-option,-Werror=incompatible-pointer-types,)
|
||||
NV_CONFTEST_CFLAGS += -Wno-error
|
||||
|
||||
NV_CONFTEST_COMPILE_TEST_HEADERS := $(obj)/conftest/macros.h
|
||||
|
@ -28,7 +28,7 @@ else
|
||||
else
|
||||
KERNEL_UNAME ?= $(shell uname -r)
|
||||
KERNEL_MODLIB := /lib/modules/$(KERNEL_UNAME)
|
||||
KERNEL_SOURCES := $(shell test -d $(KERNEL_MODLIB)/source && echo $(KERNEL_MODLIB)/source || echo $(KERNEL_MODLIB)/build)
|
||||
KERNEL_SOURCES := $(shell ((test -d $(KERNEL_MODLIB)/source && echo $(KERNEL_MODLIB)/source) || (test -d $(KERNEL_MODLIB)/build/source && echo $(KERNEL_MODLIB)/build/source)) || echo $(KERNEL_MODLIB)/build)
|
||||
endif
|
||||
|
||||
KERNEL_OUTPUT := $(KERNEL_SOURCES)
|
||||
@ -42,7 +42,11 @@ else
|
||||
else
|
||||
KERNEL_UNAME ?= $(shell uname -r)
|
||||
KERNEL_MODLIB := /lib/modules/$(KERNEL_UNAME)
|
||||
ifeq ($(KERNEL_SOURCES), $(KERNEL_MODLIB)/source)
|
||||
# $(filter patter...,text) - Returns all whitespace-separated words in text that
|
||||
# do match any of the pattern words, removing any words that do not match.
|
||||
# Set the KERNEL_OUTPUT only if either $(KERNEL_MODLIB)/source or
|
||||
# $(KERNEL_MODLIB)/build/source path matches the KERNEL_SOURCES.
|
||||
ifneq ($(filter $(KERNEL_SOURCES),$(KERNEL_MODLIB)/source $(KERNEL_MODLIB)/build/source),)
|
||||
KERNEL_OUTPUT := $(KERNEL_MODLIB)/build
|
||||
KBUILD_PARAMS := KBUILD_OUTPUT=$(KERNEL_OUTPUT)
|
||||
endif
|
||||
|
@ -37,13 +37,11 @@ typedef enum _HYPERVISOR_TYPE
|
||||
OS_HYPERVISOR_UNKNOWN
|
||||
} HYPERVISOR_TYPE;
|
||||
|
||||
#define CMD_VGPU_VFIO_WAKE_WAIT_QUEUE 0
|
||||
#define CMD_VGPU_VFIO_INJECT_INTERRUPT 1
|
||||
#define CMD_VGPU_VFIO_REGISTER_MDEV 2
|
||||
#define CMD_VGPU_VFIO_PRESENT 3
|
||||
#define CMD_VFIO_PCI_CORE_PRESENT 4
|
||||
#define CMD_VFIO_WAKE_REMOVE_GPU 1
|
||||
#define CMD_VGPU_VFIO_PRESENT 2
|
||||
#define CMD_VFIO_PCI_CORE_PRESENT 3
|
||||
|
||||
#define MAX_VF_COUNT_PER_GPU 64
|
||||
#define MAX_VF_COUNT_PER_GPU 64
|
||||
|
||||
typedef enum _VGPU_TYPE_INFO
|
||||
{
|
||||
@ -54,17 +52,11 @@ typedef enum _VGPU_TYPE_INFO
|
||||
|
||||
typedef struct
|
||||
{
|
||||
void *vgpuVfioRef;
|
||||
void *waitQueue;
|
||||
void *nv;
|
||||
NvU32 *vgpuTypeIds;
|
||||
NvU8 **vgpuNames;
|
||||
NvU32 numVgpuTypes;
|
||||
NvU32 domain;
|
||||
NvU8 bus;
|
||||
NvU8 slot;
|
||||
NvU8 function;
|
||||
NvBool is_virtfn;
|
||||
NvU32 domain;
|
||||
NvU32 bus;
|
||||
NvU32 device;
|
||||
NvU32 return_status;
|
||||
} vgpu_vfio_info;
|
||||
|
||||
typedef struct
|
||||
|
@ -58,14 +58,10 @@
|
||||
#include <linux/version.h>
|
||||
#include <linux/utsname.h>
|
||||
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 32)
|
||||
#error "This driver does not support kernels older than 2.6.32!"
|
||||
#elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 7, 0)
|
||||
# define KERNEL_2_6
|
||||
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 0, 0)
|
||||
# define KERNEL_3
|
||||
#else
|
||||
#error "This driver does not support development kernels!"
|
||||
#if LINUX_VERSION_CODE == KERNEL_VERSION(4, 4, 0)
|
||||
// Version 4.4 is allowed, temporarily, although not officially supported.
|
||||
#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 15, 0)
|
||||
#error "This driver does not support kernels older than Linux 4.15!"
|
||||
#endif
|
||||
|
||||
#if defined (CONFIG_SMP) && !defined (__SMP__)
|
||||
@ -836,16 +832,16 @@ static inline dma_addr_t nv_phys_to_dma(struct device *dev, NvU64 pa)
|
||||
#define NV_PRINT_AT(nv_debug_level,at) \
|
||||
{ \
|
||||
nv_printf(nv_debug_level, \
|
||||
"NVRM: VM: %s:%d: 0x%p, %d page(s), count = %d, flags = 0x%08x, " \
|
||||
"NVRM: VM: %s:%d: 0x%p, %d page(s), count = %d, " \
|
||||
"page_table = 0x%p\n", __FUNCTION__, __LINE__, at, \
|
||||
at->num_pages, NV_ATOMIC_READ(at->usage_count), \
|
||||
at->flags, at->page_table); \
|
||||
at->page_table); \
|
||||
}
|
||||
|
||||
#define NV_PRINT_VMA(nv_debug_level,vma) \
|
||||
{ \
|
||||
nv_printf(nv_debug_level, \
|
||||
"NVRM: VM: %s:%d: 0x%lx - 0x%lx, 0x%08x bytes @ 0x%016llx, 0x%p, 0x%p\n", \
|
||||
"NVRM: VM: %s:%d: 0x%lx - 0x%lx, 0x%08lx bytes @ 0x%016llx, 0x%p, 0x%p\n", \
|
||||
__FUNCTION__, __LINE__, vma->vm_start, vma->vm_end, NV_VMA_SIZE(vma), \
|
||||
NV_VMA_OFFSET(vma), NV_VMA_PRIVATE(vma), NV_VMA_FILE(vma)); \
|
||||
}
|
||||
@ -1078,6 +1074,8 @@ static inline void nv_kmem_ctor_dummy(void *arg)
|
||||
kmem_cache_destroy(kmem_cache); \
|
||||
}
|
||||
|
||||
#define NV_KMEM_CACHE_ALLOC_ATOMIC(kmem_cache) \
|
||||
kmem_cache_alloc(kmem_cache, GFP_ATOMIC)
|
||||
#define NV_KMEM_CACHE_ALLOC(kmem_cache) \
|
||||
kmem_cache_alloc(kmem_cache, GFP_KERNEL)
|
||||
#define NV_KMEM_CACHE_FREE(ptr, kmem_cache) \
|
||||
@ -1104,6 +1102,23 @@ static inline void *nv_kmem_cache_zalloc(struct kmem_cache *k, gfp_t flags)
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline int nv_kmem_cache_alloc_stack_atomic(nvidia_stack_t **stack)
|
||||
{
|
||||
nvidia_stack_t *sp = NULL;
|
||||
#if defined(NVCPU_X86_64)
|
||||
if (rm_is_altstack_in_use())
|
||||
{
|
||||
sp = NV_KMEM_CACHE_ALLOC_ATOMIC(nvidia_stack_t_cache);
|
||||
if (sp == NULL)
|
||||
return -ENOMEM;
|
||||
sp->size = sizeof(sp->stack);
|
||||
sp->top = sp->stack + sp->size;
|
||||
}
|
||||
#endif
|
||||
*stack = sp;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int nv_kmem_cache_alloc_stack(nvidia_stack_t **stack)
|
||||
{
|
||||
nvidia_stack_t *sp = NULL;
|
||||
@ -1614,6 +1629,10 @@ typedef struct nv_linux_state_s {
|
||||
nv_kthread_q_t open_q;
|
||||
NvBool is_accepting_opens;
|
||||
struct semaphore open_q_lock;
|
||||
#if defined(NV_VGPU_KVM_BUILD)
|
||||
wait_queue_head_t wait;
|
||||
NvS32 return_status;
|
||||
#endif
|
||||
} nv_linux_state_t;
|
||||
|
||||
extern nv_linux_state_t *nv_linux_devices;
|
||||
|
@ -29,17 +29,17 @@
|
||||
typedef int vm_fault_t;
|
||||
#endif
|
||||
|
||||
/* pin_user_pages
|
||||
/*
|
||||
* pin_user_pages()
|
||||
*
|
||||
* Presence of pin_user_pages() also implies the presence of unpin-user_page().
|
||||
* Both were added in the v5.6-rc1
|
||||
* Both were added in the v5.6.
|
||||
*
|
||||
* pin_user_pages() was added by commit eddb1c228f7951d399240
|
||||
* ("mm/gup: introduce pin_user_pages*() and FOLL_PIN") in v5.6-rc1 (2020-01-30)
|
||||
*
|
||||
* Removed vmas parameter from pin_user_pages() by commit 40896a02751
|
||||
* ("mm/gup: remove vmas parameter from pin_user_pages()")
|
||||
* in linux-next, expected in v6.5-rc1 (2023-05-17)
|
||||
* pin_user_pages() was added by commit eddb1c228f79
|
||||
* ("mm/gup: introduce pin_user_pages*() and FOLL_PIN") in v5.6.
|
||||
*
|
||||
* Removed vmas parameter from pin_user_pages() by commit 4c630f307455
|
||||
* ("mm/gup: remove vmas parameter from pin_user_pages()") in v6.5.
|
||||
*/
|
||||
|
||||
#include <linux/mm.h>
|
||||
@ -63,25 +63,28 @@ typedef int vm_fault_t;
|
||||
#define NV_UNPIN_USER_PAGE put_page
|
||||
#endif // NV_PIN_USER_PAGES_PRESENT
|
||||
|
||||
/* get_user_pages
|
||||
/*
|
||||
* get_user_pages()
|
||||
*
|
||||
* The 8-argument version of get_user_pages was deprecated by commit
|
||||
* (2016 Feb 12: cde70140fed8429acf7a14e2e2cbd3e329036653)for the non-remote case
|
||||
* The 8-argument version of get_user_pages() was deprecated by commit
|
||||
* cde70140fed8 ("mm/gup: Overload get_user_pages() functions") in v4.6-rc1.
|
||||
* (calling get_user_pages with current and current->mm).
|
||||
*
|
||||
* Completely moved to the 6 argument version of get_user_pages -
|
||||
* 2016 Apr 4: c12d2da56d0e07d230968ee2305aaa86b93a6832
|
||||
* Completely moved to the 6 argument version of get_user_pages() by
|
||||
* commit c12d2da56d0e ("mm/gup: Remove the macro overload API migration
|
||||
* helpers from the get_user*() APIs") in v4.6-rc4.
|
||||
*
|
||||
* write and force parameters were replaced with gup_flags by -
|
||||
* 2016 Oct 12: 768ae309a96103ed02eb1e111e838c87854d8b51
|
||||
* write and force parameters were replaced with gup_flags by
|
||||
* commit 768ae309a961 ("mm: replace get_user_pages() write/force parameters
|
||||
* with gup_flags") in v4.9.
|
||||
*
|
||||
* A 7-argument version of get_user_pages was introduced into linux-4.4.y by
|
||||
* commit 8e50b8b07f462ab4b91bc1491b1c91bd75e4ad40 which cherry-picked the
|
||||
* replacement of the write and force parameters with gup_flags
|
||||
* commit 8e50b8b07f462 ("mm: replace get_user_pages() write/force parameters
|
||||
* with gup_flags") which cherry-picked the replacement of the write and
|
||||
* force parameters with gup_flags.
|
||||
*
|
||||
* Removed vmas parameter from get_user_pages() by commit 7bbf9c8c99
|
||||
* ("mm/gup: remove unused vmas parameter from get_user_pages()")
|
||||
* in linux-next, expected in v6.5-rc1 (2023-05-17)
|
||||
* Removed vmas parameter from get_user_pages() by commit 54d020692b34
|
||||
* ("mm/gup: remove unused vmas parameter from get_user_pages()") in v6.5.
|
||||
*
|
||||
*/
|
||||
|
||||
@ -112,18 +115,19 @@ typedef int vm_fault_t;
|
||||
}
|
||||
#endif // NV_GET_USER_PAGES_HAS_ARGS_FLAGS
|
||||
|
||||
/* pin_user_pages_remote
|
||||
/*
|
||||
* pin_user_pages_remote()
|
||||
*
|
||||
* pin_user_pages_remote() was added by commit eddb1c228f7951d399240
|
||||
* ("mm/gup: introduce pin_user_pages*() and FOLL_PIN") in v5.6 (2020-01-30)
|
||||
* pin_user_pages_remote() was added by commit eddb1c228f79
|
||||
* ("mm/gup: introduce pin_user_pages*() and FOLL_PIN") in v5.6.
|
||||
*
|
||||
* pin_user_pages_remote() removed 'tsk' parameter by commit
|
||||
* 64019a2e467a ("mm/gup: remove task_struct pointer for all gup code")
|
||||
* in v5.9-rc1 (2020-08-11). *
|
||||
* 64019a2e467a ("mm/gup: remove task_struct pointer for all gup code")
|
||||
* in v5.9.
|
||||
*
|
||||
* Removed unused vmas parameter from pin_user_pages_remote() by commit
|
||||
* 83bcc2e132("mm/gup: remove unused vmas parameter from pin_user_pages_remote()")
|
||||
* in linux-next, expected in v6.5-rc1 (2023-05-14)
|
||||
* 0b295316b3a9 ("mm/gup: remove unused vmas parameter from
|
||||
* pin_user_pages_remote()") in v6.5.
|
||||
*
|
||||
*/
|
||||
|
||||
@ -143,7 +147,7 @@ typedef int vm_fault_t;
|
||||
|
||||
/*
|
||||
* get_user_pages_remote() was added by commit 1e9877902dc7
|
||||
* ("mm/gup: Introduce get_user_pages_remote()") in v4.6 (2016-02-12).
|
||||
* ("mm/gup: Introduce get_user_pages_remote()") in v4.6.
|
||||
*
|
||||
* Note that get_user_pages_remote() requires the caller to hold a reference on
|
||||
* the task_struct (if non-NULL and if this API has tsk argument) and the mm_struct.
|
||||
@ -153,19 +157,17 @@ typedef int vm_fault_t;
|
||||
*
|
||||
* get_user_pages_remote() write/force parameters were replaced
|
||||
* with gup_flags by commit 9beae1ea8930 ("mm: replace get_user_pages_remote()
|
||||
* write/force parameters with gup_flags") in v4.9 (2016-10-13).
|
||||
* write/force parameters with gup_flags") in v4.9.
|
||||
*
|
||||
* get_user_pages_remote() added 'locked' parameter by commit 5b56d49fc31d
|
||||
* ("mm: add locked parameter to get_user_pages_remote()") in
|
||||
* v4.10 (2016-12-14).
|
||||
* ("mm: add locked parameter to get_user_pages_remote()") in v4.10.
|
||||
*
|
||||
* get_user_pages_remote() removed 'tsk' parameter by
|
||||
* commit 64019a2e467a ("mm/gup: remove task_struct pointer for
|
||||
* all gup code") in v5.9-rc1 (2020-08-11).
|
||||
* all gup code") in v5.9.
|
||||
*
|
||||
* Removed vmas parameter from get_user_pages_remote() by commit a4bde14d549
|
||||
* ("mm/gup: remove vmas parameter from get_user_pages_remote()")
|
||||
* in linux-next, expected in v6.5-rc1 (2023-05-14)
|
||||
* Removed vmas parameter from get_user_pages_remote() by commit ca5e863233e8
|
||||
* ("mm/gup: remove vmas parameter from get_user_pages_remote()") in v6.5.
|
||||
*
|
||||
*/
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1999-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1999-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@ -609,6 +609,15 @@ typedef enum
|
||||
NV_POWER_STATE_RUNNING
|
||||
} nv_power_state_t;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
const char *vidmem_power_status;
|
||||
const char *dynamic_power_status;
|
||||
const char *gc6_support;
|
||||
const char *gcoff_support;
|
||||
const char *s0ix_status;
|
||||
} nv_power_info_t;
|
||||
|
||||
#define NV_PRIMARY_VGA(nv) ((nv)->primary_vga)
|
||||
|
||||
#define NV_IS_CTL_DEVICE(nv) ((nv)->flags & NV_FLAG_CONTROL)
|
||||
@ -778,7 +787,7 @@ nv_state_t* NV_API_CALL nv_get_ctl_state (void);
|
||||
|
||||
void NV_API_CALL nv_set_dma_address_size (nv_state_t *, NvU32 );
|
||||
|
||||
NV_STATUS NV_API_CALL nv_alias_pages (nv_state_t *, NvU32, NvU32, NvU32, NvU64, NvU64 *, void **);
|
||||
NV_STATUS NV_API_CALL nv_alias_pages (nv_state_t *, NvU32, NvU64, NvU32, NvU32, NvU64, NvU64 *, void **);
|
||||
NV_STATUS NV_API_CALL nv_alloc_pages (nv_state_t *, NvU32, NvU64, NvBool, NvU32, NvBool, NvBool, NvS32, NvU64 *, void **);
|
||||
NV_STATUS NV_API_CALL nv_free_pages (nv_state_t *, NvU32, NvBool, NvU32, void *);
|
||||
|
||||
@ -822,6 +831,7 @@ void NV_API_CALL nv_acpi_methods_init (NvU32 *);
|
||||
void NV_API_CALL nv_acpi_methods_uninit (void);
|
||||
|
||||
NV_STATUS NV_API_CALL nv_acpi_method (NvU32, NvU32, NvU32, void *, NvU16, NvU32 *, void *, NvU16 *);
|
||||
NV_STATUS NV_API_CALL nv_acpi_d3cold_dsm_for_upstream_port (nv_state_t *, NvU8 *, NvU32, NvU32, NvU32 *);
|
||||
NV_STATUS NV_API_CALL nv_acpi_dsm_method (nv_state_t *, NvU8 *, NvU32, NvBool, NvU32, void *, NvU16, NvU32 *, void *, NvU16 *);
|
||||
NV_STATUS NV_API_CALL nv_acpi_ddc_method (nv_state_t *, void *, NvU32 *, NvBool);
|
||||
NV_STATUS NV_API_CALL nv_acpi_dod_method (nv_state_t *, NvU32 *, NvU32 *);
|
||||
@ -990,10 +1000,10 @@ NV_STATUS NV_API_CALL rm_p2p_init_mapping (nvidia_stack_t *, NvU64, NvU6
|
||||
NV_STATUS NV_API_CALL rm_p2p_destroy_mapping (nvidia_stack_t *, NvU64);
|
||||
NV_STATUS NV_API_CALL rm_p2p_get_pages (nvidia_stack_t *, NvU64, NvU32, NvU64, NvU64, NvU64 *, NvU32 *, NvU32 *, NvU32 *, NvU8 **, void *);
|
||||
NV_STATUS NV_API_CALL rm_p2p_get_gpu_info (nvidia_stack_t *, NvU64, NvU64, NvU8 **, void **);
|
||||
NV_STATUS NV_API_CALL rm_p2p_get_pages_persistent (nvidia_stack_t *, NvU64, NvU64, void **, NvU64 *, NvU32 *, void *, void *);
|
||||
NV_STATUS NV_API_CALL rm_p2p_get_pages_persistent (nvidia_stack_t *, NvU64, NvU64, void **, NvU64 *, NvU32 *, void *, void *, void **);
|
||||
NV_STATUS NV_API_CALL rm_p2p_register_callback (nvidia_stack_t *, NvU64, NvU64, NvU64, void *, void (*)(void *), void *);
|
||||
NV_STATUS NV_API_CALL rm_p2p_put_pages (nvidia_stack_t *, NvU64, NvU32, NvU64, void *);
|
||||
NV_STATUS NV_API_CALL rm_p2p_put_pages_persistent(nvidia_stack_t *, void *, void *);
|
||||
NV_STATUS NV_API_CALL rm_p2p_put_pages_persistent(nvidia_stack_t *, void *, void *, void *);
|
||||
NV_STATUS NV_API_CALL rm_p2p_dma_map_pages (nvidia_stack_t *, nv_dma_device_t *, NvU8 *, NvU64, NvU32, NvU64 *, void **);
|
||||
NV_STATUS NV_API_CALL rm_dma_buf_dup_mem_handle (nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle, NvHandle, NvHandle, void *, NvHandle, NvU64, NvU64, NvHandle *, void **);
|
||||
void NV_API_CALL rm_dma_buf_undup_mem_handle(nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle);
|
||||
@ -1027,9 +1037,7 @@ void NV_API_CALL rm_enable_dynamic_power_management(nvidia_stack_t *, nv_s
|
||||
NV_STATUS NV_API_CALL rm_ref_dynamic_power(nvidia_stack_t *, nv_state_t *, nv_dynamic_power_mode_t);
|
||||
void NV_API_CALL rm_unref_dynamic_power(nvidia_stack_t *, nv_state_t *, nv_dynamic_power_mode_t);
|
||||
NV_STATUS NV_API_CALL rm_transition_dynamic_power(nvidia_stack_t *, nv_state_t *, NvBool, NvBool *);
|
||||
const char* NV_API_CALL rm_get_vidmem_power_status(nvidia_stack_t *, nv_state_t *);
|
||||
const char* NV_API_CALL rm_get_dynamic_power_management_status(nvidia_stack_t *, nv_state_t *);
|
||||
const char* NV_API_CALL rm_get_gpu_gcx_support(nvidia_stack_t *, nv_state_t *, NvBool);
|
||||
void NV_API_CALL rm_get_power_info(nvidia_stack_t *, nv_state_t *, nv_power_info_t *);
|
||||
|
||||
void NV_API_CALL rm_acpi_notify(nvidia_stack_t *, nv_state_t *, NvU32);
|
||||
void NV_API_CALL rm_acpi_nvpcf_notify(nvidia_stack_t *);
|
||||
@ -1041,13 +1049,12 @@ NV_STATUS NV_API_CALL nv_vgpu_create_request(nvidia_stack_t *, nv_state_t *, c
|
||||
NV_STATUS NV_API_CALL nv_vgpu_delete(nvidia_stack_t *, const NvU8 *, NvU16);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_get_type_ids(nvidia_stack_t *, nv_state_t *, NvU32 *, NvU32 *, NvBool, NvU8, NvBool);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_get_type_info(nvidia_stack_t *, nv_state_t *, NvU32, char *, int, NvU8);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_get_bar_info(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU64 *, NvU32, void *, NvBool *);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_get_bar_info(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU64 *,
|
||||
NvU64 *, NvU64 *, NvU32 *, NvBool *, NvU8 *);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_get_hbm_info(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU64 *, NvU64 *);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_start(nvidia_stack_t *, const NvU8 *, void *, NvS32 *, NvU8 *, NvU32);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_get_sparse_mmap(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU64 **, NvU64 **, NvU32 *);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_process_vf_info(nvidia_stack_t *, nv_state_t *, NvU8, NvU32, NvU8, NvU8, NvU8, NvBool, void *);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_update_request(nvidia_stack_t *, const NvU8 *, NvU32, NvU64 *, NvU64 *, const char *);
|
||||
NV_STATUS NV_API_CALL nv_gpu_bind_event(nvidia_stack_t *);
|
||||
NV_STATUS NV_API_CALL nv_gpu_unbind_event(nvidia_stack_t *, NvU32, NvBool *);
|
||||
|
||||
NV_STATUS NV_API_CALL nv_get_usermap_access_params(nv_state_t*, nv_usermap_access_params_t*);
|
||||
nv_soc_irq_type_t NV_API_CALL nv_get_current_irq_type(nv_state_t*);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2013-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2013-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@ -1462,6 +1462,29 @@ NV_STATUS nvUvmInterfacePagingChannelPushStream(UvmGpuPagingChannelHandle channe
|
||||
char *methodStream,
|
||||
NvU32 methodStreamSize);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceKeyRotationChannelDisable
|
||||
|
||||
This function notifies RM that the given channels are idle.
|
||||
|
||||
This function is called after RM has notified UVM that keys need to be rotated.
|
||||
When called RM will disable the channels, rotate their keys, and then re-enable
|
||||
the channels.
|
||||
|
||||
Locking: This function acquires an API and GPU lock.
|
||||
Memory : This function dynamically allocates memory.
|
||||
|
||||
Arguments:
|
||||
channelList[IN] - An array of channel handles whose channels are idle.
|
||||
channelListCount[IN] - Number of channels in channelList. Its value must be
|
||||
greater than 0.
|
||||
|
||||
Error codes:
|
||||
NV_ERR_INVALID_ARGUMENT - channelList is NULL or channeListCount is 0.
|
||||
*/
|
||||
NV_STATUS nvUvmInterfaceKeyRotationChannelDisable(uvmGpuChannelHandle channelList[],
|
||||
NvU32 channeListCount);
|
||||
|
||||
/*******************************************************************************
|
||||
Cryptography Services Library (CSL) Interface
|
||||
*/
|
||||
@ -1507,7 +1530,7 @@ void nvUvmInterfaceDeinitCslContext(UvmCslContext *uvmCslContext);
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceCslUpdateContext
|
||||
|
||||
Updates a context after a key rotation event and can only be called once per
|
||||
Updates contexts after a key rotation event and can only be called once per
|
||||
key rotation event. Following a key rotation event, and before
|
||||
nvUvmInterfaceCslUpdateContext is called, data encrypted by the GPU with the
|
||||
previous key can be decrypted with nvUvmInterfaceCslDecrypt.
|
||||
@ -1516,12 +1539,14 @@ void nvUvmInterfaceDeinitCslContext(UvmCslContext *uvmCslContext);
|
||||
Memory : This function does not dynamically allocate memory.
|
||||
|
||||
Arguments:
|
||||
uvmCslContext[IN] - The CSL context associated with a channel.
|
||||
|
||||
contextList[IN/OUT] - An array of pointers to CSL contexts.
|
||||
contextListCount[IN] - Number of CSL contexts in contextList. Its value
|
||||
must be greater than 0.
|
||||
Error codes:
|
||||
NV_ERR_INVALID_ARGUMENT - The CSL context is not associated with a channel.
|
||||
NV_ERR_INVALID_ARGUMENT - contextList is NULL or contextListCount is 0.
|
||||
*/
|
||||
NV_STATUS nvUvmInterfaceCslUpdateContext(UvmCslContext *uvmCslContext);
|
||||
NV_STATUS nvUvmInterfaceCslUpdateContext(UvmCslContext *contextList[],
|
||||
NvU32 contextListCount);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceCslRotateIv
|
||||
@ -1739,7 +1764,14 @@ NV_STATUS nvUvmInterfaceCslIncrementIv(UvmCslContext *uvmCslContext,
|
||||
Checks and logs information about non-CSL encryptions, such as those that
|
||||
originate from the GPU.
|
||||
|
||||
This function does not modify elements of the UvmCslContext.
|
||||
For contexts associated with channels, this function does not modify elements of
|
||||
the UvmCslContext and must be called for each external encryption invocation.
|
||||
|
||||
For the context associated with fault buffers, bufferSize can encompass multiple
|
||||
encryption invocations, and the UvmCslContext will be updated following a key
|
||||
rotation event.
|
||||
|
||||
In either case the IV remains unmodified after this function is called.
|
||||
|
||||
Locking: This function does not acquire an API or GPU lock.
|
||||
Memory : This function does not dynamically allocate memory.
|
||||
@ -1748,7 +1780,7 @@ NV_STATUS nvUvmInterfaceCslIncrementIv(UvmCslContext *uvmCslContext,
|
||||
|
||||
Arguments:
|
||||
uvmCslContext[IN/OUT] - The CSL context.
|
||||
bufferSize[OUT] - The size of the buffer encrypted by the
|
||||
bufferSize[OUT] - The size of the buffer(s) encrypted by the
|
||||
external entity in units of bytes.
|
||||
|
||||
Error codes:
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2014-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2014-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@ -39,12 +39,12 @@
|
||||
// are multiple BIG page sizes in RM. These defines are used as flags to "0"
|
||||
// should be OK when user is not sure which pagesize allocation it wants
|
||||
//
|
||||
#define UVM_PAGE_SIZE_DEFAULT 0x0
|
||||
#define UVM_PAGE_SIZE_4K 0x1000
|
||||
#define UVM_PAGE_SIZE_64K 0x10000
|
||||
#define UVM_PAGE_SIZE_128K 0x20000
|
||||
#define UVM_PAGE_SIZE_2M 0x200000
|
||||
#define UVM_PAGE_SIZE_512M 0x20000000
|
||||
#define UVM_PAGE_SIZE_DEFAULT 0x0ULL
|
||||
#define UVM_PAGE_SIZE_4K 0x1000ULL
|
||||
#define UVM_PAGE_SIZE_64K 0x10000ULL
|
||||
#define UVM_PAGE_SIZE_128K 0x20000ULL
|
||||
#define UVM_PAGE_SIZE_2M 0x200000ULL
|
||||
#define UVM_PAGE_SIZE_512M 0x20000000ULL
|
||||
|
||||
//
|
||||
// When modifying flags, make sure they are compatible with the mirrored
|
||||
@ -267,6 +267,7 @@ typedef struct UvmGpuChannelInfo_tag
|
||||
|
||||
// The errorNotifier is filled out when the channel hits an RC error.
|
||||
NvNotification *errorNotifier;
|
||||
NvNotification *keyRotationNotifier;
|
||||
|
||||
NvU32 hwRunlistId;
|
||||
NvU32 hwChannelId;
|
||||
@ -292,13 +293,13 @@ typedef struct UvmGpuChannelInfo_tag
|
||||
|
||||
// GPU VAs of both GPFIFO and GPPUT are needed in Confidential Computing
|
||||
// so a channel can be controlled via another channel (SEC2 or WLC/LCIC)
|
||||
NvU64 gpFifoGpuVa;
|
||||
NvU64 gpPutGpuVa;
|
||||
NvU64 gpGetGpuVa;
|
||||
NvU64 gpFifoGpuVa;
|
||||
NvU64 gpPutGpuVa;
|
||||
NvU64 gpGetGpuVa;
|
||||
// GPU VA of work submission offset is needed in Confidential Computing
|
||||
// so CE channels can ring doorbell of other channels as required for
|
||||
// WLC/LCIC work submission
|
||||
NvU64 workSubmissionOffsetGpuVa;
|
||||
NvU64 workSubmissionOffsetGpuVa;
|
||||
} UvmGpuChannelInfo;
|
||||
|
||||
typedef enum
|
||||
@ -1086,4 +1087,21 @@ typedef enum UvmCslOperation
|
||||
UVM_CSL_OPERATION_DECRYPT
|
||||
} UvmCslOperation;
|
||||
|
||||
typedef enum UVM_KEY_ROTATION_STATUS {
|
||||
// Key rotation complete/not in progress
|
||||
UVM_KEY_ROTATION_STATUS_IDLE = 0,
|
||||
// RM is waiting for clients to report their channels are idle for key rotation
|
||||
UVM_KEY_ROTATION_STATUS_PENDING = 1,
|
||||
// Key rotation is in progress
|
||||
UVM_KEY_ROTATION_STATUS_IN_PROGRESS = 2,
|
||||
// Key rotation timeout failure, RM will RC non-idle channels.
|
||||
// UVM should never see this status value.
|
||||
UVM_KEY_ROTATION_STATUS_FAILED_TIMEOUT = 3,
|
||||
// Key rotation failed because upper threshold was crossed, RM will RC non-idle channels
|
||||
UVM_KEY_ROTATION_STATUS_FAILED_THRESHOLD = 4,
|
||||
// Internal RM failure while rotating keys for a certain channel, RM will RC the channel.
|
||||
UVM_KEY_ROTATION_STATUS_FAILED_ROTATION = 5,
|
||||
UVM_KEY_ROTATION_STATUS_MAX_COUNT = 6,
|
||||
} UVM_KEY_ROTATION_STATUS;
|
||||
|
||||
#endif // _NV_UVM_TYPES_H_
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1993-2020 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@ -494,6 +494,23 @@ do \
|
||||
//
|
||||
#define NV_TWO_N_MINUS_ONE(n) (((1ULL<<(n/2))<<((n+1)/2))-1)
|
||||
|
||||
//
|
||||
// Create a 64b bitmask with n bits set
|
||||
// This is the same as ((1ULL<<n) - 1), but it doesn't overflow for n=64
|
||||
//
|
||||
// ...
|
||||
// n=-1, 0x0000000000000000
|
||||
// n=0, 0x0000000000000000
|
||||
// n=1, 0x0000000000000001
|
||||
// ...
|
||||
// n=63, 0x7FFFFFFFFFFFFFFF
|
||||
// n=64, 0xFFFFFFFFFFFFFFFF
|
||||
// n=65, 0xFFFFFFFFFFFFFFFF
|
||||
// n=66, 0xFFFFFFFFFFFFFFFF
|
||||
// ...
|
||||
//
|
||||
#define NV_BITMASK64(n) ((n<1) ? 0ULL : (NV_U64_MAX>>((n>64) ? 0 : (64-n))))
|
||||
|
||||
#define DRF_READ_1WORD_BS(d,r,f,v) \
|
||||
((DRF_EXTENT_MW(NV##d##r##f)<8)?DRF_READ_1BYTE_BS(NV##d##r##f,(v)): \
|
||||
((DRF_EXTENT_MW(NV##d##r##f)<16)?DRF_READ_2BYTE_BS(NV##d##r##f,(v)): \
|
||||
@ -574,6 +591,13 @@ nvMaskPos32(const NvU32 mask, const NvU32 bitIdx)
|
||||
n32 = BIT_IDX_32(LOWESTBIT(n32));\
|
||||
}
|
||||
|
||||
// Destructive operation on n64
|
||||
#define LOWESTBITIDX_64(n64) \
|
||||
{ \
|
||||
n64 = BIT_IDX_64(LOWESTBIT(n64));\
|
||||
}
|
||||
|
||||
|
||||
// Destructive operation on n32
|
||||
#define HIGHESTBITIDX_32(n32) \
|
||||
{ \
|
||||
@ -918,6 +942,11 @@ static NV_FORCEINLINE void *NV_NVUPTR_TO_PTR(NvUPtr address)
|
||||
// Use (lo) if (b) is less than 64, and (hi) if >= 64.
|
||||
//
|
||||
#define NV_BIT_SET_128(b, lo, hi) { nvAssert( (b) < 128 ); if ( (b) < 64 ) (lo) |= NVBIT64(b); else (hi) |= NVBIT64( b & 0x3F ); }
|
||||
//
|
||||
// Clear the bit at pos (b) for U64 which is < 128.
|
||||
// Use (lo) if (b) is less than 64, and (hi) if >= 64.
|
||||
//
|
||||
#define NV_BIT_CLEAR_128(b, lo, hi) { nvAssert( (b) < 128 ); if ( (b) < 64 ) (lo) &= ~NVBIT64(b); else (hi) &= ~NVBIT64( b & 0x3F ); }
|
||||
|
||||
// Get the number of elements the specified fixed-size array
|
||||
#define NV_ARRAY_ELEMENTS(x) ((sizeof(x)/sizeof((x)[0])))
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2014-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2014-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@ -152,6 +152,7 @@ NV_STATUS_CODE(NV_ERR_FABRIC_MANAGER_NOT_PRESENT, 0x0000007A, "Fabric Manag
|
||||
NV_STATUS_CODE(NV_ERR_ALREADY_SIGNALLED, 0x0000007B, "Semaphore Surface value already >= requested wait value")
|
||||
NV_STATUS_CODE(NV_ERR_QUEUE_TASK_SLOT_NOT_AVAILABLE, 0x0000007C, "PMU RPC error due to no queue slot available for this event")
|
||||
NV_STATUS_CODE(NV_ERR_KEY_ROTATION_IN_PROGRESS, 0x0000007D, "Operation not allowed as key rotation is in progress")
|
||||
NV_STATUS_CODE(NV_ERR_TEST_ONLY_CODE_NOT_ENABLED, 0x0000007E, "Test-only code path not enabled")
|
||||
|
||||
// Warnings:
|
||||
NV_STATUS_CODE(NV_WARN_HOT_SWITCH, 0x00010001, "WARNING Hot switch")
|
||||
|
@ -152,6 +152,12 @@ typedef signed short NvS16; /* -32768 to 32767 */
|
||||
(((NvU32)(c) & 0xff) << 8) | \
|
||||
(((NvU32)(d) & 0xff))))
|
||||
|
||||
// Macro to build an NvU64 from two DWORDS, listed from msb to lsb
|
||||
#define NvU64_BUILD(a, b) \
|
||||
((NvU64)( \
|
||||
(((NvU64)(a) & ~0U) << 32) | \
|
||||
(((NvU64)(b) & ~0U))))
|
||||
|
||||
#if NVTYPES_USE_STDINT
|
||||
typedef uint32_t NvV32; /* "void": enumerated or multiple fields */
|
||||
typedef uint32_t NvU32; /* 0 to 4294967295 */
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1999-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1999-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@ -101,9 +101,10 @@ NV_STATUS NV_API_CALL rm_gpu_ops_paging_channels_map(nvidia_stack_t *, nvgpuAdd
|
||||
void NV_API_CALL rm_gpu_ops_paging_channels_unmap(nvidia_stack_t *, nvgpuAddressSpaceHandle_t, NvU64, nvgpuDeviceHandle_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_paging_channel_push_stream(nvidia_stack_t *, nvgpuPagingChannelHandle_t, char *, NvU32);
|
||||
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_key_rotation_channel_disable(nvidia_stack_t *, nvgpuChannelHandle_t [], NvU32);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_context_init(nvidia_stack_t *, struct ccslContext_t **, nvgpuChannelHandle_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_context_clear(nvidia_stack_t *, struct ccslContext_t *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_context_update(nvidia_stack_t *, struct ccslContext_t *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_context_update(nvidia_stack_t *, UvmCslContext *[], NvU32);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_rotate_iv(nvidia_stack_t *, struct ccslContext_t *, NvU8);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_encrypt(nvidia_stack_t *, struct ccslContext_t *, NvU32, NvU8 const *, NvU8 *, NvU8 *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_encrypt_with_iv(nvidia_stack_t *, struct ccslContext_t *, NvU32, NvU8 const *, NvU8*, NvU8 *, NvU8 *);
|
||||
|
@ -1416,6 +1416,42 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_VFIO_REGISTER_EMULATED_IOMMU_DEV_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
bus_type_has_iommu_ops)
|
||||
#
|
||||
# Determine if 'bus_type' structure has a 'iommu_ops' field.
|
||||
#
|
||||
# This field was removed by commit 17de3f5fdd35 (iommu: Retire bus ops)
|
||||
# in v6.8
|
||||
#
|
||||
CODE="
|
||||
#include <linux/device.h>
|
||||
|
||||
int conftest_bus_type_has_iommu_ops(void) {
|
||||
return offsetof(struct bus_type, iommu_ops);
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_BUS_TYPE_HAS_IOMMU_OPS" "" "types"
|
||||
;;
|
||||
|
||||
eventfd_signal_has_counter_arg)
|
||||
#
|
||||
# Determine if eventfd_signal() function has an additional 'counter' argument.
|
||||
#
|
||||
# This argument was removed by commit 3652117f8548 (eventfd: simplify
|
||||
# eventfd_signal()) in v6.8
|
||||
#
|
||||
CODE="
|
||||
#include <linux/eventfd.h>
|
||||
|
||||
void conftest_eventfd_signal_has_counter_arg(void) {
|
||||
struct eventfd_ctx *ctx;
|
||||
|
||||
eventfd_signal(ctx, 1);
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_EVENTFD_SIGNAL_HAS_COUNTER_ARG" "" "types"
|
||||
;;
|
||||
|
||||
drm_available)
|
||||
# Determine if the DRM subsystem is usable
|
||||
CODE="
|
||||
@ -5520,7 +5556,8 @@ compile_test() {
|
||||
|
||||
of_dma_configure)
|
||||
#
|
||||
# Determine if of_dma_configure() function is present
|
||||
# Determine if of_dma_configure() function is present, and how
|
||||
# many arguments it takes.
|
||||
#
|
||||
# Added by commit 591c1ee465ce ("of: configure the platform
|
||||
# device dma parameters") in v3.16. However, it was a static,
|
||||
@ -5530,17 +5567,69 @@ compile_test() {
|
||||
# commit 1f5c69aa51f9 ("of: Move of_dma_configure() to device.c
|
||||
# to help re-use") in v4.1.
|
||||
#
|
||||
CODE="
|
||||
# It subsequently began taking a third parameter with commit
|
||||
# 3d6ce86ee794 ("drivers: remove force dma flag from buses")
|
||||
# in v4.18.
|
||||
#
|
||||
|
||||
echo "$CONFTEST_PREAMBLE
|
||||
#if defined(NV_LINUX_OF_DEVICE_H_PRESENT)
|
||||
#include <linux/of_device.h>
|
||||
#endif
|
||||
|
||||
void conftest_of_dma_configure(void)
|
||||
{
|
||||
of_dma_configure();
|
||||
}
|
||||
"
|
||||
" > conftest$$.c
|
||||
|
||||
compile_check_conftest "$CODE" "NV_OF_DMA_CONFIGURE_PRESENT" "" "functions"
|
||||
$CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
|
||||
rm -f conftest$$.c
|
||||
|
||||
if [ -f conftest$$.o ]; then
|
||||
rm -f conftest$$.o
|
||||
|
||||
echo "#undef NV_OF_DMA_CONFIGURE_PRESENT" | append_conftest "functions"
|
||||
echo "#undef NV_OF_DMA_CONFIGURE_ARGUMENT_COUNT" | append_conftest "functions"
|
||||
else
|
||||
echo "#define NV_OF_DMA_CONFIGURE_PRESENT" | append_conftest "functions"
|
||||
|
||||
echo "$CONFTEST_PREAMBLE
|
||||
#if defined(NV_LINUX_OF_DEVICE_H_PRESENT)
|
||||
#include <linux/of_device.h>
|
||||
#endif
|
||||
|
||||
void conftest_of_dma_configure(void) {
|
||||
of_dma_configure(NULL, NULL, false);
|
||||
}" > conftest$$.c
|
||||
|
||||
$CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
|
||||
rm -f conftest$$.c
|
||||
|
||||
if [ -f conftest$$.o ]; then
|
||||
rm -f conftest$$.o
|
||||
echo "#define NV_OF_DMA_CONFIGURE_ARGUMENT_COUNT 3" | append_conftest "functions"
|
||||
return
|
||||
fi
|
||||
|
||||
echo "$CONFTEST_PREAMBLE
|
||||
#if defined(NV_LINUX_OF_DEVICE_H_PRESENT)
|
||||
#include <linux/of_device.h>
|
||||
#endif
|
||||
|
||||
void conftest_of_dma_configure(void) {
|
||||
of_dma_configure(NULL, NULL);
|
||||
}" > conftest$$.c
|
||||
|
||||
$CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
|
||||
rm -f conftest$$.c
|
||||
|
||||
if [ -f conftest$$.o ]; then
|
||||
rm -f conftest$$.o
|
||||
echo "#define NV_OF_DMA_CONFIGURE_ARGUMENT_COUNT 2" | append_conftest "functions"
|
||||
return
|
||||
fi
|
||||
fi
|
||||
;;
|
||||
|
||||
icc_get)
|
||||
@ -6761,12 +6850,45 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_DRM_MODE_CREATE_DP_COLORSPACE_PROPERTY_HAS_SUPPORTED_COLORSPACES_ARG" "" "types"
|
||||
;;
|
||||
|
||||
drm_syncobj_features_present)
|
||||
# Determine if DRIVER_SYNCOBJ and DRIVER_SYNCOBJ_TIMELINE DRM
|
||||
# driver features are present. Timeline DRM synchronization objects
|
||||
# may only be used if both of these are supported by the driver.
|
||||
#
|
||||
# DRIVER_SYNCOBJ_TIMELINE Added by commit 060cebb20cdb ("drm:
|
||||
# introduce a capability flag for syncobj timeline support") in
|
||||
# v5.2
|
||||
#
|
||||
# DRIVER_SYNCOBJ Added by commit e9083420bbac ("drm: introduce
|
||||
# sync objects (v4)") in v4.12
|
||||
CODE="
|
||||
#if defined(NV_DRM_DRM_DRV_H_PRESENT)
|
||||
#include <drm/drm_drv.h>
|
||||
#endif
|
||||
int features = DRIVER_SYNCOBJ | DRIVER_SYNCOBJ_TIMELINE;"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_DRM_SYNCOBJ_FEATURES_PRESENT" "" "types"
|
||||
;;
|
||||
|
||||
stack_trace)
|
||||
# Determine if functions stack_trace_{save,print} are present.
|
||||
# Added by commit e9b98e162 ("stacktrace: Provide helpers for
|
||||
# common stack trace operations") in v5.2.
|
||||
CODE="
|
||||
#include <linux/stacktrace.h>
|
||||
void conftest_stack_trace(void) {
|
||||
stack_trace_save();
|
||||
stack_trace_print();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_STACK_TRACE_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
drm_unlocked_ioctl_flag_present)
|
||||
# Determine if DRM_UNLOCKED IOCTL flag is present.
|
||||
#
|
||||
# DRM_UNLOCKED was removed by commit 2798ffcc1d6a ("drm: Remove
|
||||
# locking for legacy ioctls and DRM_UNLOCKED") in Linux
|
||||
# next-20231208.
|
||||
# locking for legacy ioctls and DRM_UNLOCKED") in v6.8.
|
||||
#
|
||||
# DRM_UNLOCKED definition was moved from drmP.h to drm_ioctl.h by
|
||||
# commit 2640981f3600 ("drm: document drm_ioctl.[hc]") in v4.12.
|
||||
|
@ -52,6 +52,7 @@ NV_HEADER_PRESENCE_TESTS = \
|
||||
linux/dma-resv.h \
|
||||
soc/tegra/chip-id.h \
|
||||
soc/tegra/fuse.h \
|
||||
soc/tegra/fuse-helper.h \
|
||||
soc/tegra/tegra_bpmp.h \
|
||||
video/nv_internal.h \
|
||||
linux/platform/tegra/dce/dce-client-ipc.h \
|
||||
|
@ -176,12 +176,10 @@ cursor_plane_req_config_update(struct drm_plane *plane,
|
||||
return;
|
||||
}
|
||||
|
||||
*req_config = (struct NvKmsKapiCursorRequestedConfig) {
|
||||
.surface = to_nv_framebuffer(plane_state->fb)->pSurface,
|
||||
|
||||
.dstX = plane_state->crtc_x,
|
||||
.dstY = plane_state->crtc_y,
|
||||
};
|
||||
memset(req_config, 0, sizeof(*req_config));
|
||||
req_config->surface = to_nv_framebuffer(plane_state->fb)->pSurface;
|
||||
req_config->dstX = plane_state->crtc_x;
|
||||
req_config->dstY = plane_state->crtc_y;
|
||||
|
||||
#if defined(NV_DRM_ALPHA_BLENDING_AVAILABLE)
|
||||
if (plane->blend_mode_property != NULL && plane->alpha_property != NULL) {
|
||||
@ -275,24 +273,22 @@ plane_req_config_update(struct drm_plane *plane,
|
||||
return 0;
|
||||
}
|
||||
|
||||
*req_config = (struct NvKmsKapiLayerRequestedConfig) {
|
||||
.config = {
|
||||
.surface = to_nv_framebuffer(plane_state->fb)->pSurface,
|
||||
memset(req_config, 0, sizeof(*req_config));
|
||||
|
||||
/* Source values are 16.16 fixed point */
|
||||
.srcX = plane_state->src_x >> 16,
|
||||
.srcY = plane_state->src_y >> 16,
|
||||
.srcWidth = plane_state->src_w >> 16,
|
||||
.srcHeight = plane_state->src_h >> 16,
|
||||
req_config->config.surface = to_nv_framebuffer(plane_state->fb)->pSurface;
|
||||
|
||||
.dstX = plane_state->crtc_x,
|
||||
.dstY = plane_state->crtc_y,
|
||||
.dstWidth = plane_state->crtc_w,
|
||||
.dstHeight = plane_state->crtc_h,
|
||||
/* Source values are 16.16 fixed point */
|
||||
req_config->config.srcX = plane_state->src_x >> 16;
|
||||
req_config->config.srcY = plane_state->src_y >> 16;
|
||||
req_config->config.srcWidth = plane_state->src_w >> 16;
|
||||
req_config->config.srcHeight = plane_state->src_h >> 16;
|
||||
|
||||
.csc = old_config.csc
|
||||
},
|
||||
};
|
||||
req_config->config.dstX = plane_state->crtc_x;
|
||||
req_config->config.dstY = plane_state->crtc_y;
|
||||
req_config->config.dstWidth = plane_state->crtc_w;
|
||||
req_config->config.dstHeight = plane_state->crtc_h;
|
||||
|
||||
req_config->config.csc = old_config.csc;
|
||||
|
||||
#if defined(NV_DRM_ROTATION_AVAILABLE)
|
||||
/*
|
||||
@ -688,9 +684,7 @@ static int nv_drm_plane_atomic_set_property(
|
||||
to_nv_drm_plane_state(state);
|
||||
|
||||
if (property == nv_dev->nv_out_fence_property) {
|
||||
#if defined(NV_LINUX_NVHOST_H_PRESENT) && defined(CONFIG_TEGRA_GRHOST)
|
||||
nv_drm_plane_state->fd_user_ptr = u64_to_user_ptr(val);
|
||||
#endif
|
||||
nv_drm_plane_state->fd_user_ptr = (void __user *)(uintptr_t)(val);
|
||||
return 0;
|
||||
} else if (property == nv_dev->nv_input_colorspace_property) {
|
||||
nv_drm_plane_state->input_colorspace = val;
|
||||
@ -875,14 +869,12 @@ static inline void nv_drm_crtc_duplicate_req_head_modeset_config(
|
||||
* there is no change in new configuration yet with respect
|
||||
* to older one!
|
||||
*/
|
||||
*new = (struct NvKmsKapiHeadRequestedConfig) {
|
||||
.modeSetConfig = old->modeSetConfig,
|
||||
};
|
||||
memset(new, 0, sizeof(*new));
|
||||
new->modeSetConfig = old->modeSetConfig;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(old->layerRequestedConfig); i++) {
|
||||
new->layerRequestedConfig[i] = (struct NvKmsKapiLayerRequestedConfig) {
|
||||
.config = old->layerRequestedConfig[i].config,
|
||||
};
|
||||
new->layerRequestedConfig[i].config =
|
||||
old->layerRequestedConfig[i].config;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -373,19 +373,15 @@ static int nv_drm_create_properties(struct nv_drm_device *nv_dev)
|
||||
len++;
|
||||
}
|
||||
|
||||
#if defined(NV_LINUX_NVHOST_H_PRESENT) && defined(CONFIG_TEGRA_GRHOST)
|
||||
if (!nv_dev->supportsSyncpts) {
|
||||
return 0;
|
||||
if (nv_dev->supportsSyncpts) {
|
||||
nv_dev->nv_out_fence_property =
|
||||
drm_property_create_range(nv_dev->dev, DRM_MODE_PROP_ATOMIC,
|
||||
"NV_DRM_OUT_FENCE_PTR", 0, U64_MAX);
|
||||
if (nv_dev->nv_out_fence_property == NULL) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
}
|
||||
|
||||
nv_dev->nv_out_fence_property =
|
||||
drm_property_create_range(nv_dev->dev, DRM_MODE_PROP_ATOMIC,
|
||||
"NV_DRM_OUT_FENCE_PTR", 0, U64_MAX);
|
||||
if (nv_dev->nv_out_fence_property == NULL) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
#endif
|
||||
|
||||
nv_dev->nv_input_colorspace_property =
|
||||
drm_property_create_enum(nv_dev->dev, 0, "NV_INPUT_COLORSPACE",
|
||||
enum_list, len);
|
||||
@ -480,6 +476,22 @@ static int nv_drm_load(struct drm_device *dev, unsigned long flags)
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
#if defined(NV_DRM_FBDEV_GENERIC_AVAILABLE)
|
||||
/*
|
||||
* If fbdev is enabled, take modeset ownership now before other DRM clients
|
||||
* can take master (and thus NVKMS ownership).
|
||||
*/
|
||||
if (nv_drm_fbdev_module_param) {
|
||||
if (!nvKms->grabOwnership(pDevice)) {
|
||||
nvKms->freeDevice(pDevice);
|
||||
NV_DRM_DEV_LOG_ERR(nv_dev, "Failed to grab NVKMS modeset ownership");
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
nv_dev->hasFramebufferConsole = NV_TRUE;
|
||||
}
|
||||
#endif
|
||||
|
||||
mutex_lock(&nv_dev->lock);
|
||||
|
||||
/* Set NvKmsKapiDevice */
|
||||
@ -590,6 +602,15 @@ static void __nv_drm_unload(struct drm_device *dev)
|
||||
return;
|
||||
}
|
||||
|
||||
/* Release modeset ownership if fbdev is enabled */
|
||||
|
||||
#if defined(NV_DRM_FBDEV_GENERIC_AVAILABLE)
|
||||
if (nv_dev->hasFramebufferConsole) {
|
||||
drm_atomic_helper_shutdown(dev);
|
||||
nvKms->releaseOwnership(nv_dev->pDevice);
|
||||
}
|
||||
#endif
|
||||
|
||||
cancel_delayed_work_sync(&nv_dev->hotplug_event_work);
|
||||
mutex_lock(&nv_dev->lock);
|
||||
|
||||
@ -781,6 +802,14 @@ static int nv_drm_get_dev_info_ioctl(struct drm_device *dev,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int nv_drm_get_drm_file_unique_id_ioctl(struct drm_device *dev,
|
||||
void *data, struct drm_file *filep)
|
||||
{
|
||||
struct drm_nvidia_get_drm_file_unique_id_params *params = data;
|
||||
params->id = (u64)(filep->driver_priv);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int nv_drm_dmabuf_supported_ioctl(struct drm_device *dev,
|
||||
void *data, struct drm_file *filep)
|
||||
{
|
||||
@ -1279,6 +1308,17 @@ static void nv_drm_postclose(struct drm_device *dev, struct drm_file *filep)
|
||||
}
|
||||
#endif /* NV_DRM_ATOMIC_MODESET_AVAILABLE */
|
||||
|
||||
static int nv_drm_open(struct drm_device *dev, struct drm_file *filep)
|
||||
{
|
||||
_Static_assert(sizeof(filep->driver_priv) >= sizeof(u64),
|
||||
"filep->driver_priv can not hold an u64");
|
||||
static atomic64_t id = ATOMIC_INIT(0);
|
||||
|
||||
filep->driver_priv = (void *)atomic64_inc_return(&id);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#if defined(NV_DRM_MASTER_HAS_LEASES)
|
||||
static struct drm_master *nv_drm_find_lessee(struct drm_master *master,
|
||||
int lessee_id)
|
||||
@ -1522,6 +1562,9 @@ static const struct drm_ioctl_desc nv_drm_ioctls[] = {
|
||||
DRM_IOCTL_DEF_DRV(NVIDIA_GET_DEV_INFO,
|
||||
nv_drm_get_dev_info_ioctl,
|
||||
DRM_RENDER_ALLOW|DRM_UNLOCKED),
|
||||
DRM_IOCTL_DEF_DRV(NVIDIA_GET_DRM_FILE_UNIQUE_ID,
|
||||
nv_drm_get_drm_file_unique_id_ioctl,
|
||||
DRM_RENDER_ALLOW|DRM_UNLOCKED),
|
||||
|
||||
#if defined(NV_DRM_FENCE_AVAILABLE)
|
||||
DRM_IOCTL_DEF_DRV(NVIDIA_FENCE_SUPPORTED,
|
||||
@ -1604,6 +1647,9 @@ static struct drm_driver nv_drm_driver = {
|
||||
.driver_features =
|
||||
#if defined(NV_DRM_DRIVER_PRIME_FLAG_PRESENT)
|
||||
DRIVER_PRIME |
|
||||
#endif
|
||||
#if defined(NV_DRM_SYNCOBJ_FEATURES_PRESENT)
|
||||
DRIVER_SYNCOBJ | DRIVER_SYNCOBJ_TIMELINE |
|
||||
#endif
|
||||
DRIVER_GEM | DRIVER_RENDER,
|
||||
|
||||
@ -1615,14 +1661,14 @@ static struct drm_driver nv_drm_driver = {
|
||||
.num_ioctls = ARRAY_SIZE(nv_drm_ioctls),
|
||||
|
||||
/*
|
||||
* linux-next commit 71a7974ac701 ("drm/prime: Unexport helpers for fd/handle
|
||||
* conversion") unexports drm_gem_prime_handle_to_fd() and
|
||||
* Linux kernel v6.6 commit 71a7974ac701 ("drm/prime: Unexport helpers
|
||||
* for fd/handle conversion") unexports drm_gem_prime_handle_to_fd() and
|
||||
* drm_gem_prime_fd_to_handle().
|
||||
*
|
||||
* Prior linux-next commit 6b85aa68d9d5 ("drm: Enable PRIME import/export for
|
||||
* all drivers") made these helpers the default when .prime_handle_to_fd /
|
||||
* .prime_fd_to_handle are unspecified, so it's fine to just skip specifying
|
||||
* them if the helpers aren't present.
|
||||
* Prior Linux kernel v6.6 commit 6b85aa68d9d5 ("drm: Enable PRIME
|
||||
* import/export for all drivers") made these helpers the default when
|
||||
* .prime_handle_to_fd / .prime_fd_to_handle are unspecified, so it's fine
|
||||
* to just skip specifying them if the helpers aren't present.
|
||||
*/
|
||||
#if NV_IS_EXPORT_SYMBOL_PRESENT_drm_gem_prime_handle_to_fd
|
||||
.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
|
||||
@ -1656,6 +1702,7 @@ static struct drm_driver nv_drm_driver = {
|
||||
#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
|
||||
.postclose = nv_drm_postclose,
|
||||
#endif
|
||||
.open = nv_drm_open,
|
||||
|
||||
.fops = &nv_drm_fops,
|
||||
|
||||
@ -1714,6 +1761,7 @@ void nv_drm_register_drm_device(const nv_gpu_info_t *gpu_info)
|
||||
struct nv_drm_device *nv_dev = NULL;
|
||||
struct drm_device *dev = NULL;
|
||||
struct device *device = gpu_info->os_device_ptr;
|
||||
bool bus_is_pci;
|
||||
|
||||
DRM_DEBUG(
|
||||
"Registering device for NVIDIA GPU ID 0x08%x",
|
||||
@ -1747,7 +1795,7 @@ void nv_drm_register_drm_device(const nv_gpu_info_t *gpu_info)
|
||||
dev->dev_private = nv_dev;
|
||||
nv_dev->dev = dev;
|
||||
|
||||
bool bus_is_pci =
|
||||
bus_is_pci =
|
||||
#if defined(NV_LINUX)
|
||||
device->bus == &pci_bus_type;
|
||||
#elif defined(NV_BSD)
|
||||
@ -1771,11 +1819,6 @@ void nv_drm_register_drm_device(const nv_gpu_info_t *gpu_info)
|
||||
if (nv_drm_fbdev_module_param &&
|
||||
drm_core_check_feature(dev, DRIVER_MODESET)) {
|
||||
|
||||
if (!nvKms->grabOwnership(nv_dev->pDevice)) {
|
||||
NV_DRM_DEV_LOG_ERR(nv_dev, "Failed to grab NVKMS modeset ownership");
|
||||
goto failed_grab_ownership;
|
||||
}
|
||||
|
||||
if (bus_is_pci) {
|
||||
struct pci_dev *pdev = to_pci_dev(device);
|
||||
|
||||
@ -1786,8 +1829,6 @@ void nv_drm_register_drm_device(const nv_gpu_info_t *gpu_info)
|
||||
#endif
|
||||
}
|
||||
drm_fbdev_generic_setup(dev, 32);
|
||||
|
||||
nv_dev->hasFramebufferConsole = NV_TRUE;
|
||||
}
|
||||
#endif /* defined(NV_DRM_FBDEV_GENERIC_AVAILABLE) */
|
||||
|
||||
@ -1798,12 +1839,6 @@ void nv_drm_register_drm_device(const nv_gpu_info_t *gpu_info)
|
||||
|
||||
return; /* Success */
|
||||
|
||||
#if defined(NV_DRM_FBDEV_GENERIC_AVAILABLE)
|
||||
failed_grab_ownership:
|
||||
|
||||
drm_dev_unregister(dev);
|
||||
#endif
|
||||
|
||||
failed_drm_register:
|
||||
|
||||
nv_drm_dev_free(dev);
|
||||
@ -1870,12 +1905,6 @@ void nv_drm_remove_devices(void)
|
||||
struct nv_drm_device *next = dev_list->next;
|
||||
struct drm_device *dev = dev_list->dev;
|
||||
|
||||
#if defined(NV_DRM_FBDEV_GENERIC_AVAILABLE)
|
||||
if (dev_list->hasFramebufferConsole) {
|
||||
drm_atomic_helper_shutdown(dev);
|
||||
nvKms->releaseOwnership(dev_list->pDevice);
|
||||
}
|
||||
#endif
|
||||
drm_dev_unregister(dev);
|
||||
nv_drm_dev_free(dev);
|
||||
|
||||
|
@ -293,14 +293,12 @@ __nv_drm_prime_fence_context_new(
|
||||
* to check a return value.
|
||||
*/
|
||||
|
||||
*nv_prime_fence_context = (struct nv_drm_prime_fence_context) {
|
||||
.base.ops = &nv_drm_prime_fence_context_ops,
|
||||
.base.nv_dev = nv_dev,
|
||||
.base.context = nv_dma_fence_context_alloc(1),
|
||||
.base.fenceSemIndex = p->index,
|
||||
.pSemSurface = pSemSurface,
|
||||
.pLinearAddress = pLinearAddress,
|
||||
};
|
||||
nv_prime_fence_context->base.ops = &nv_drm_prime_fence_context_ops;
|
||||
nv_prime_fence_context->base.nv_dev = nv_dev;
|
||||
nv_prime_fence_context->base.context = nv_dma_fence_context_alloc(1);
|
||||
nv_prime_fence_context->base.fenceSemIndex = p->index;
|
||||
nv_prime_fence_context->pSemSurface = pSemSurface;
|
||||
nv_prime_fence_context->pLinearAddress = pLinearAddress;
|
||||
|
||||
INIT_LIST_HEAD(&nv_prime_fence_context->pending);
|
||||
|
||||
@ -1261,18 +1259,16 @@ __nv_drm_semsurf_fence_ctx_new(
|
||||
* to check a return value.
|
||||
*/
|
||||
|
||||
*ctx = (struct nv_drm_semsurf_fence_ctx) {
|
||||
.base.ops = &nv_drm_semsurf_fence_ctx_ops,
|
||||
.base.nv_dev = nv_dev,
|
||||
.base.context = nv_dma_fence_context_alloc(1),
|
||||
.base.fenceSemIndex = p->index,
|
||||
.pSemSurface = pSemSurface,
|
||||
.pSemMapping.pVoid = semMapping,
|
||||
.pMaxSubmittedMapping = (volatile NvU64 *)maxSubmittedMapping,
|
||||
.callback.local = NULL,
|
||||
.callback.nvKms = NULL,
|
||||
.current_wait_value = 0,
|
||||
};
|
||||
ctx->base.ops = &nv_drm_semsurf_fence_ctx_ops;
|
||||
ctx->base.nv_dev = nv_dev;
|
||||
ctx->base.context = nv_dma_fence_context_alloc(1);
|
||||
ctx->base.fenceSemIndex = p->index;
|
||||
ctx->pSemSurface = pSemSurface;
|
||||
ctx->pSemMapping.pVoid = semMapping;
|
||||
ctx->pMaxSubmittedMapping = (volatile NvU64 *)maxSubmittedMapping;
|
||||
ctx->callback.local = NULL;
|
||||
ctx->callback.nvKms = NULL;
|
||||
ctx->current_wait_value = 0;
|
||||
|
||||
spin_lock_init(&ctx->lock);
|
||||
INIT_LIST_HEAD(&ctx->pending_fences);
|
||||
|
@ -551,14 +551,12 @@ static struct drm_gem_object *__nv_drm_gem_nvkms_prime_dup(
|
||||
{
|
||||
struct nv_drm_device *nv_dev = to_nv_device(dev);
|
||||
const struct nv_drm_device *nv_dev_src;
|
||||
const struct nv_drm_gem_nvkms_memory *nv_nvkms_memory_src;
|
||||
struct nv_drm_gem_nvkms_memory *nv_nvkms_memory;
|
||||
struct NvKmsKapiMemory *pMemory;
|
||||
|
||||
BUG_ON(nv_gem_src == NULL || nv_gem_src->ops != &nv_gem_nvkms_memory_ops);
|
||||
|
||||
nv_dev_src = to_nv_device(nv_gem_src->base.dev);
|
||||
nv_nvkms_memory_src = to_nv_nvkms_memory_const(nv_gem_src);
|
||||
|
||||
if ((nv_nvkms_memory =
|
||||
nv_drm_calloc(1, sizeof(*nv_nvkms_memory))) == NULL) {
|
||||
|
@ -45,8 +45,7 @@
|
||||
|
||||
/*
|
||||
* The inclusion of drm_framebuffer.h was removed from drm_crtc.h by commit
|
||||
* 720cf96d8fecde29b72e1101f8a567a0ce99594f ("drm: Drop drm_framebuffer.h from
|
||||
* drm_crtc.h") in linux-next, expected in v5.19-rc7.
|
||||
* 720cf96d8fec ("drm: Drop drm_framebuffer.h from drm_crtc.h") in v6.0.
|
||||
*
|
||||
* We only need drm_framebuffer.h for drm_framebuffer_put(), and it is always
|
||||
* present (v4.9+) when drm_framebuffer_{put,get}() is present (v4.12+), so it
|
||||
|
@ -613,8 +613,8 @@ static inline int nv_drm_format_num_planes(uint32_t format)
|
||||
#endif /* defined(NV_DRM_FORMAT_MODIFIERS_PRESENT) */
|
||||
|
||||
/*
|
||||
* DRM_UNLOCKED was removed with linux-next commit 2798ffcc1d6a ("drm: Remove
|
||||
* locking for legacy ioctls and DRM_UNLOCKED"), but it was previously made
|
||||
* DRM_UNLOCKED was removed with commit 2798ffcc1d6a ("drm: Remove locking for
|
||||
* legacy ioctls and DRM_UNLOCKED") in v6.8, but it was previously made
|
||||
* implicit for all non-legacy DRM driver IOCTLs since Linux v4.10 commit
|
||||
* fa5386459f06 "drm: Used DRM_LEGACY for all legacy functions" (Linux v4.4
|
||||
* commit ea487835e887 "drm: Enforce unlocked ioctl operation for kms driver
|
||||
|
@ -52,6 +52,7 @@
|
||||
#define DRM_NVIDIA_SEMSURF_FENCE_CREATE 0x15
|
||||
#define DRM_NVIDIA_SEMSURF_FENCE_WAIT 0x16
|
||||
#define DRM_NVIDIA_SEMSURF_FENCE_ATTACH 0x17
|
||||
#define DRM_NVIDIA_GET_DRM_FILE_UNIQUE_ID 0x18
|
||||
|
||||
#define DRM_IOCTL_NVIDIA_GEM_IMPORT_NVKMS_MEMORY \
|
||||
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GEM_IMPORT_NVKMS_MEMORY), \
|
||||
@ -157,6 +158,11 @@
|
||||
DRM_NVIDIA_SEMSURF_FENCE_ATTACH), \
|
||||
struct drm_nvidia_semsurf_fence_attach_params)
|
||||
|
||||
#define DRM_IOCTL_NVIDIA_GET_DRM_FILE_UNIQUE_ID \
|
||||
DRM_IOWR((DRM_COMMAND_BASE + \
|
||||
DRM_NVIDIA_GET_DRM_FILE_UNIQUE_ID), \
|
||||
struct drm_nvidia_get_drm_file_unique_id_params)
|
||||
|
||||
struct drm_nvidia_gem_import_nvkms_memory_params {
|
||||
uint64_t mem_size; /* IN */
|
||||
|
||||
@ -385,4 +391,8 @@ struct drm_nvidia_semsurf_fence_attach_params {
|
||||
uint64_t wait_value; /* IN Semaphore value to reach before signal */
|
||||
};
|
||||
|
||||
struct drm_nvidia_get_drm_file_unique_id_params {
|
||||
uint64_t id; /* OUT Unique ID of the DRM file */
|
||||
};
|
||||
|
||||
#endif /* _UAPI_NVIDIA_DRM_IOCTL_H_ */
|
||||
|
@ -587,6 +587,9 @@ int nv_drm_atomic_commit(struct drm_device *dev,
|
||||
NV_DRM_DEV_LOG_ERR(
|
||||
nv_dev,
|
||||
"Flip event timeout on head %u", nv_crtc->head);
|
||||
while (!list_empty(&nv_crtc->flip_list)) {
|
||||
__nv_drm_handle_flip_event(nv_crtc);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -128,4 +128,5 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_has_dumb_destroy
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += fence_ops_use_64bit_seqno
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_aperture_remove_conflicting_pci_framebuffers_has_driver_arg
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_mode_create_dp_colorspace_property_has_supported_colorspaces_arg
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_syncobj_features_present
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_unlocked_ioctl_flag_present
|
||||
|
@ -77,10 +77,10 @@ module_param_named(disable_hdmi_frl, disable_hdmi_frl, bool, 0400);
|
||||
static bool disable_vrr_memclk_switch = false;
|
||||
module_param_named(disable_vrr_memclk_switch, disable_vrr_memclk_switch, bool, 0400);
|
||||
|
||||
static bool hdmi_deepcolor = false;
|
||||
static bool hdmi_deepcolor = true;
|
||||
module_param_named(hdmi_deepcolor, hdmi_deepcolor, bool, 0400);
|
||||
|
||||
static bool vblank_sem_control = false;
|
||||
static bool vblank_sem_control = true;
|
||||
module_param_named(vblank_sem_control, vblank_sem_control, bool, 0400);
|
||||
|
||||
static bool opportunistic_display_sync = true;
|
||||
@ -139,6 +139,20 @@ NvBool nvkms_opportunistic_display_sync(void)
|
||||
return opportunistic_display_sync;
|
||||
}
|
||||
|
||||
NvBool nvkms_kernel_supports_syncpts(void)
|
||||
{
|
||||
/*
|
||||
* Note this only checks that the kernel has the prerequisite
|
||||
* support for syncpts; callers must also check that the hardware
|
||||
* supports syncpts.
|
||||
*/
|
||||
#if (defined(CONFIG_TEGRA_GRHOST) || defined(NV_LINUX_HOST1X_NEXT_H_PRESENT))
|
||||
return NV_TRUE;
|
||||
#else
|
||||
return NV_FALSE;
|
||||
#endif
|
||||
}
|
||||
|
||||
#define NVKMS_SYNCPT_STUBS_NEEDED
|
||||
|
||||
/*************************************************************************
|
||||
@ -1234,6 +1248,26 @@ void nvkms_close_from_kapi(struct nvkms_per_open *popen)
|
||||
nvkms_close_pm_unlocked(popen);
|
||||
}
|
||||
|
||||
NvBool nvkms_ioctl_from_kapi_try_pmlock
|
||||
(
|
||||
struct nvkms_per_open *popen,
|
||||
NvU32 cmd, void *params_address, const size_t param_size
|
||||
)
|
||||
{
|
||||
NvBool ret;
|
||||
|
||||
if (nvkms_read_trylock_pm_lock()) {
|
||||
return NV_FALSE;
|
||||
}
|
||||
|
||||
ret = nvkms_ioctl_common(popen,
|
||||
cmd,
|
||||
(NvU64)(NvUPtr)params_address, param_size) == 0;
|
||||
nvkms_read_unlock_pm_lock();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
NvBool nvkms_ioctl_from_kapi
|
||||
(
|
||||
struct nvkms_per_open *popen,
|
||||
|
@ -304,6 +304,11 @@ NvU32 nvkms_enumerate_gpus(nv_gpu_info_t *gpu_info);
|
||||
|
||||
NvBool nvkms_allow_write_combining(void);
|
||||
|
||||
/*!
|
||||
* Check if OS supports syncpoints.
|
||||
*/
|
||||
NvBool nvkms_kernel_supports_syncpts(void);
|
||||
|
||||
/*!
|
||||
* Checks whether the fd is associated with an nvidia character device.
|
||||
*/
|
||||
@ -328,6 +333,16 @@ NvBool nvkms_ioctl_from_kapi
|
||||
NvU32 cmd, void *params_address, const size_t params_size
|
||||
);
|
||||
|
||||
/*!
|
||||
* Like nvkms_ioctl_from_kapi, but return NV_FALSE instead of waiting if the
|
||||
* power management read lock cannot be acquired.
|
||||
*/
|
||||
NvBool nvkms_ioctl_from_kapi_try_pmlock
|
||||
(
|
||||
struct nvkms_per_open *popen,
|
||||
NvU32 cmd, void *params_address, const size_t params_size
|
||||
);
|
||||
|
||||
/*!
|
||||
* APIs for locking.
|
||||
*/
|
||||
|
@ -105,3 +105,4 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += list_is_first
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ktime_get_real_ts64
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ktime_get_raw_ts64
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += acpi_video_backlight_use_native
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += kernel_read_has_pointer_pos_arg
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2013-2023 NVIDIA Corporation
|
||||
Copyright (c) 2013-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -3463,8 +3463,7 @@ NV_STATUS UvmToolsDestroySession(UvmToolsSessionHandle session);
|
||||
//
|
||||
|
||||
#if UVM_API_REV_IS_AT_MOST(10)
|
||||
// This is deprecated and replaced by sizeof(UvmToolsEventControlData_V1) or
|
||||
// sizeof(UvmToolsEventControlData_V2).
|
||||
// This is deprecated and replaced by sizeof(UvmToolsEventControlData).
|
||||
NvLength UvmToolsGetEventControlSize(void);
|
||||
|
||||
// This is deprecated and replaced by sizeof(UvmEventEntry_V1) or
|
||||
@ -3488,8 +3487,6 @@ NvLength UvmToolsGetNumberOfCounters(void);
|
||||
// version: (INPUT)
|
||||
// Requested version for events or counters.
|
||||
// See UvmEventEntry_V1 and UvmEventEntry_V2.
|
||||
// UvmToolsEventControlData_V2::version records the entry version that
|
||||
// will be generated.
|
||||
//
|
||||
// event_buffer: (INPUT)
|
||||
// User allocated buffer. Must be page-aligned. Must be large enough to
|
||||
@ -3502,8 +3499,7 @@ NvLength UvmToolsGetNumberOfCounters(void);
|
||||
//
|
||||
// event_control (INPUT)
|
||||
// User allocated buffer. Must be page-aligned. Must be large enough to
|
||||
// hold UvmToolsEventControlData_V1 if version is UvmEventEntry_V1 or
|
||||
// UvmToolsEventControlData_V2 (although single page-size allocation
|
||||
// hold UvmToolsEventControlData (although single page-size allocation
|
||||
// should be more than enough). Gets pinned until queue is destroyed.
|
||||
//
|
||||
// queue: (OUTPUT)
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2018-2023 NVIDIA Corporation
|
||||
Copyright (c) 2018-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -205,17 +205,18 @@ void uvm_hal_ampere_host_clear_faulted_channel_sw_method(uvm_push_t *push,
|
||||
CLEAR_FAULTED_B, HWVALUE(C076, CLEAR_FAULTED_B, INST_HI, instance_ptr_hi));
|
||||
}
|
||||
|
||||
// Copy from Pascal, this version sets TLB_INVALIDATE_INVAL_SCOPE.
|
||||
// Copy from Turing, this version sets TLB_INVALIDATE_INVAL_SCOPE.
|
||||
void uvm_hal_ampere_host_tlb_invalidate_all(uvm_push_t *push,
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
NvU32 depth,
|
||||
uvm_membar_t membar)
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
NvU32 depth,
|
||||
uvm_membar_t membar)
|
||||
{
|
||||
NvU32 aperture_value;
|
||||
NvU32 page_table_level;
|
||||
NvU32 pdb_lo;
|
||||
NvU32 pdb_hi;
|
||||
NvU32 ack_value = 0;
|
||||
NvU32 sysmembar_value = 0;
|
||||
|
||||
UVM_ASSERT_MSG(pdb.aperture == UVM_APERTURE_VID || pdb.aperture == UVM_APERTURE_SYS, "aperture: %u", pdb.aperture);
|
||||
|
||||
@ -230,8 +231,8 @@ void uvm_hal_ampere_host_tlb_invalidate_all(uvm_push_t *push,
|
||||
pdb_lo = pdb.address & HWMASK(C56F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO);
|
||||
pdb_hi = pdb.address >> HWSIZE(C56F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO);
|
||||
|
||||
// PDE3 is the highest level on Pascal, see the comment in uvm_pascal_mmu.c
|
||||
// for details.
|
||||
// PDE3 is the highest level on Pascal-Ampere, see the comment in
|
||||
// uvm_pascal_mmu.c for details.
|
||||
UVM_ASSERT_MSG(depth < NVC56F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE3, "depth %u", depth);
|
||||
page_table_level = NVC56F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE3 - depth;
|
||||
|
||||
@ -242,7 +243,12 @@ void uvm_hal_ampere_host_tlb_invalidate_all(uvm_push_t *push,
|
||||
ack_value = HWCONST(C56F, MEM_OP_C, TLB_INVALIDATE_ACK_TYPE, GLOBALLY);
|
||||
}
|
||||
|
||||
NV_PUSH_4U(C56F, MEM_OP_A, HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS) |
|
||||
if (membar == UVM_MEMBAR_SYS)
|
||||
sysmembar_value = HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, EN);
|
||||
else
|
||||
sysmembar_value = HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS);
|
||||
|
||||
NV_PUSH_4U(C56F, MEM_OP_A, sysmembar_value |
|
||||
HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_INVAL_SCOPE, NON_LINK_TLBS),
|
||||
MEM_OP_B, 0,
|
||||
MEM_OP_C, HWCONST(C56F, MEM_OP_C, TLB_INVALIDATE_PDB, ONE) |
|
||||
@ -255,16 +261,18 @@ void uvm_hal_ampere_host_tlb_invalidate_all(uvm_push_t *push,
|
||||
MEM_OP_D, HWCONST(C56F, MEM_OP_D, OPERATION, MMU_TLB_INVALIDATE) |
|
||||
HWVALUE(C56F, MEM_OP_D, TLB_INVALIDATE_PDB_ADDR_HI, pdb_hi));
|
||||
|
||||
uvm_hal_tlb_invalidate_membar(push, membar);
|
||||
// GPU membar still requires an explicit membar method.
|
||||
if (membar == UVM_MEMBAR_GPU)
|
||||
uvm_push_get_gpu(push)->parent->host_hal->membar_gpu(push);
|
||||
}
|
||||
|
||||
// Copy from Volta, this version sets TLB_INVALIDATE_INVAL_SCOPE.
|
||||
// Copy from Turing, this version sets TLB_INVALIDATE_INVAL_SCOPE.
|
||||
void uvm_hal_ampere_host_tlb_invalidate_va(uvm_push_t *push,
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
NvU32 depth,
|
||||
NvU64 base,
|
||||
NvU64 size,
|
||||
NvU32 page_size,
|
||||
NvU64 page_size,
|
||||
uvm_membar_t membar)
|
||||
{
|
||||
NvU32 aperture_value;
|
||||
@ -272,6 +280,7 @@ void uvm_hal_ampere_host_tlb_invalidate_va(uvm_push_t *push,
|
||||
NvU32 pdb_lo;
|
||||
NvU32 pdb_hi;
|
||||
NvU32 ack_value = 0;
|
||||
NvU32 sysmembar_value = 0;
|
||||
NvU32 va_lo;
|
||||
NvU32 va_hi;
|
||||
NvU64 end;
|
||||
@ -281,9 +290,9 @@ void uvm_hal_ampere_host_tlb_invalidate_va(uvm_push_t *push,
|
||||
NvU32 log2_invalidation_size;
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||||
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(page_size, 1 << 12), "page_size 0x%x\n", page_size);
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(base, page_size), "base 0x%llx page_size 0x%x\n", base, page_size);
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(size, page_size), "size 0x%llx page_size 0x%x\n", size, page_size);
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(page_size, 1 << 12), "page_size 0x%llx\n", page_size);
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(base, page_size), "base 0x%llx page_size 0x%llx\n", base, page_size);
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(size, page_size), "size 0x%llx page_size 0x%llx\n", size, page_size);
|
||||
UVM_ASSERT_MSG(size > 0, "size 0x%llx\n", size);
|
||||
|
||||
// The invalidation size must be a power-of-two number of pages containing
|
||||
@ -325,7 +334,7 @@ void uvm_hal_ampere_host_tlb_invalidate_va(uvm_push_t *push,
|
||||
pdb_lo = pdb.address & HWMASK(C56F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO);
|
||||
pdb_hi = pdb.address >> HWSIZE(C56F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO);
|
||||
|
||||
// PDE3 is the highest level on Pascal-Ampere , see the comment in
|
||||
// PDE3 is the highest level on Pascal-Ampere, see the comment in
|
||||
// uvm_pascal_mmu.c for details.
|
||||
UVM_ASSERT_MSG(depth < NVC56F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE3, "depth %u", depth);
|
||||
page_table_level = NVC56F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE3 - depth;
|
||||
@ -337,10 +346,15 @@ void uvm_hal_ampere_host_tlb_invalidate_va(uvm_push_t *push,
|
||||
ack_value = HWCONST(C56F, MEM_OP_C, TLB_INVALIDATE_ACK_TYPE, GLOBALLY);
|
||||
}
|
||||
|
||||
if (membar == UVM_MEMBAR_SYS)
|
||||
sysmembar_value = HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, EN);
|
||||
else
|
||||
sysmembar_value = HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS);
|
||||
|
||||
NV_PUSH_4U(C56F, MEM_OP_A, HWVALUE(C56F, MEM_OP_A, TLB_INVALIDATE_INVALIDATION_SIZE, log2_invalidation_size) |
|
||||
HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS) |
|
||||
HWVALUE(C56F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO, va_lo) |
|
||||
HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_INVAL_SCOPE, NON_LINK_TLBS),
|
||||
HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_INVAL_SCOPE, NON_LINK_TLBS) |
|
||||
sysmembar_value |
|
||||
HWVALUE(C56F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO, va_lo),
|
||||
MEM_OP_B, HWVALUE(C56F, MEM_OP_B, TLB_INVALIDATE_TARGET_ADDR_HI, va_hi),
|
||||
MEM_OP_C, HWCONST(C56F, MEM_OP_C, TLB_INVALIDATE_PDB, ONE) |
|
||||
HWVALUE(C56F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO, pdb_lo) |
|
||||
@ -352,21 +366,23 @@ void uvm_hal_ampere_host_tlb_invalidate_va(uvm_push_t *push,
|
||||
MEM_OP_D, HWCONST(C56F, MEM_OP_D, OPERATION, MMU_TLB_INVALIDATE_TARGETED) |
|
||||
HWVALUE(C56F, MEM_OP_D, TLB_INVALIDATE_PDB_ADDR_HI, pdb_hi));
|
||||
|
||||
uvm_hal_tlb_invalidate_membar(push, membar);
|
||||
// GPU membar still requires an explicit membar method.
|
||||
if (membar == UVM_MEMBAR_GPU)
|
||||
gpu->parent->host_hal->membar_gpu(push);
|
||||
}
|
||||
|
||||
// Copy from Pascal, this version sets TLB_INVALIDATE_INVAL_SCOPE.
|
||||
// Copy from Turing, this version sets TLB_INVALIDATE_INVAL_SCOPE.
|
||||
void uvm_hal_ampere_host_tlb_invalidate_test(uvm_push_t *push,
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
UVM_TEST_INVALIDATE_TLB_PARAMS *params)
|
||||
{
|
||||
NvU32 ack_value = 0;
|
||||
NvU32 sysmembar_value = 0;
|
||||
NvU32 invalidate_gpc_value = 0;
|
||||
NvU32 aperture_value = 0;
|
||||
NvU32 pdb_lo = 0;
|
||||
NvU32 pdb_hi = 0;
|
||||
NvU32 page_table_level = 0;
|
||||
uvm_membar_t membar;
|
||||
|
||||
UVM_ASSERT_MSG(pdb.aperture == UVM_APERTURE_VID || pdb.aperture == UVM_APERTURE_SYS, "aperture: %u", pdb.aperture);
|
||||
if (pdb.aperture == UVM_APERTURE_VID)
|
||||
@ -381,7 +397,7 @@ void uvm_hal_ampere_host_tlb_invalidate_test(uvm_push_t *push,
|
||||
pdb_hi = pdb.address >> HWSIZE(C56F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO);
|
||||
|
||||
if (params->page_table_level != UvmInvalidatePageTableLevelAll) {
|
||||
// PDE3 is the highest level on Pascal, see the comment in
|
||||
// PDE3 is the highest level on Pascal-Ampere, see the comment in
|
||||
// uvm_pascal_mmu.c for details.
|
||||
page_table_level = min((NvU32)UvmInvalidatePageTableLevelPde3, params->page_table_level) - 1;
|
||||
}
|
||||
@ -393,6 +409,11 @@ void uvm_hal_ampere_host_tlb_invalidate_test(uvm_push_t *push,
|
||||
ack_value = HWCONST(C56F, MEM_OP_C, TLB_INVALIDATE_ACK_TYPE, GLOBALLY);
|
||||
}
|
||||
|
||||
if (params->membar == UvmInvalidateTlbMemBarSys)
|
||||
sysmembar_value = HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, EN);
|
||||
else
|
||||
sysmembar_value = HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS);
|
||||
|
||||
if (params->disable_gpc_invalidate)
|
||||
invalidate_gpc_value = HWCONST(C56F, MEM_OP_C, TLB_INVALIDATE_GPC, DISABLE);
|
||||
else
|
||||
@ -403,9 +424,9 @@ void uvm_hal_ampere_host_tlb_invalidate_test(uvm_push_t *push,
|
||||
|
||||
NvU32 va_lo = va & HWMASK(C56F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO);
|
||||
NvU32 va_hi = va >> HWSIZE(C56F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO);
|
||||
NV_PUSH_4U(C56F, MEM_OP_A, HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS) |
|
||||
HWVALUE(C56F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO, va_lo) |
|
||||
HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_INVAL_SCOPE, NON_LINK_TLBS),
|
||||
NV_PUSH_4U(C56F, MEM_OP_A, sysmembar_value |
|
||||
HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_INVAL_SCOPE, NON_LINK_TLBS) |
|
||||
HWVALUE(C56F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO, va_lo),
|
||||
MEM_OP_B, HWVALUE(C56F, MEM_OP_B, TLB_INVALIDATE_TARGET_ADDR_HI, va_hi),
|
||||
MEM_OP_C, HWCONST(C56F, MEM_OP_C, TLB_INVALIDATE_REPLAY, NONE) |
|
||||
HWVALUE(C56F, MEM_OP_C, TLB_INVALIDATE_PAGE_TABLE_LEVEL, page_table_level) |
|
||||
@ -418,7 +439,7 @@ void uvm_hal_ampere_host_tlb_invalidate_test(uvm_push_t *push,
|
||||
HWVALUE(C56F, MEM_OP_D, TLB_INVALIDATE_PDB_ADDR_HI, pdb_hi));
|
||||
}
|
||||
else {
|
||||
NV_PUSH_4U(C56F, MEM_OP_A, HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS) |
|
||||
NV_PUSH_4U(C56F, MEM_OP_A, sysmembar_value |
|
||||
HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_INVAL_SCOPE, NON_LINK_TLBS),
|
||||
MEM_OP_B, 0,
|
||||
MEM_OP_C, HWCONST(C56F, MEM_OP_C, TLB_INVALIDATE_REPLAY, NONE) |
|
||||
@ -432,12 +453,7 @@ void uvm_hal_ampere_host_tlb_invalidate_test(uvm_push_t *push,
|
||||
HWVALUE(C56F, MEM_OP_D, TLB_INVALIDATE_PDB_ADDR_HI, pdb_hi));
|
||||
}
|
||||
|
||||
if (params->membar == UvmInvalidateTlbMemBarSys)
|
||||
membar = UVM_MEMBAR_SYS;
|
||||
else if (params->membar == UvmInvalidateTlbMemBarLocal)
|
||||
membar = UVM_MEMBAR_GPU;
|
||||
else
|
||||
membar = UVM_MEMBAR_NONE;
|
||||
|
||||
uvm_hal_tlb_invalidate_membar(push, membar);
|
||||
// GPU membar still requires an explicit membar method.
|
||||
if (params->membar == UvmInvalidateTlbMemBarLocal)
|
||||
uvm_push_get_gpu(push)->parent->host_hal->membar_gpu(push);
|
||||
}
|
||||
|
@ -51,7 +51,7 @@ uvm_mmu_engine_type_t uvm_hal_ampere_mmu_engine_id_to_type(NvU16 mmu_engine_id)
|
||||
return UVM_MMU_ENGINE_TYPE_GRAPHICS;
|
||||
}
|
||||
|
||||
static NvU32 page_table_depth_ampere(NvU32 page_size)
|
||||
static NvU32 page_table_depth_ampere(NvU64 page_size)
|
||||
{
|
||||
// The common-case is page_size == UVM_PAGE_SIZE_2M, hence the first check
|
||||
if (page_size == UVM_PAGE_SIZE_2M)
|
||||
@ -62,14 +62,14 @@ static NvU32 page_table_depth_ampere(NvU32 page_size)
|
||||
return 4;
|
||||
}
|
||||
|
||||
static NvU32 page_sizes_ampere(void)
|
||||
static NvU64 page_sizes_ampere(void)
|
||||
{
|
||||
return UVM_PAGE_SIZE_512M | UVM_PAGE_SIZE_2M | UVM_PAGE_SIZE_64K | UVM_PAGE_SIZE_4K;
|
||||
}
|
||||
|
||||
static uvm_mmu_mode_hal_t ampere_mmu_mode_hal;
|
||||
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_ampere(NvU32 big_page_size)
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_ampere(NvU64 big_page_size)
|
||||
{
|
||||
static bool initialized = false;
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2018-2021 NVIDIA Corporation
|
||||
Copyright (c) 2018-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2018-2021 NVIDIA Corporation
|
||||
Copyright (c) 2018-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -29,10 +29,9 @@
|
||||
#include "uvm_ats_ibm.h"
|
||||
#include "nv_uvm_types.h"
|
||||
#include "uvm_lock.h"
|
||||
#include "uvm_ats_sva.h"
|
||||
|
||||
#include "uvm_ats_sva.h"
|
||||
|
||||
#define UVM_ATS_SUPPORTED() (UVM_ATS_IBM_SUPPORTED() || UVM_ATS_SVA_SUPPORTED())
|
||||
#define UVM_ATS_SUPPORTED() (UVM_ATS_IBM_SUPPORTED() || UVM_ATS_SVA_SUPPORTED())
|
||||
|
||||
typedef struct
|
||||
{
|
||||
|
@ -1541,14 +1541,14 @@ static uvm_gpfifo_entry_t *uvm_channel_get_first_pending_entry(uvm_channel_t *ch
|
||||
NV_STATUS uvm_channel_get_status(uvm_channel_t *channel)
|
||||
{
|
||||
uvm_gpu_t *gpu;
|
||||
NvNotification *errorNotifier;
|
||||
NvNotification *error_notifier;
|
||||
|
||||
if (uvm_channel_is_proxy(channel))
|
||||
errorNotifier = channel->proxy.channel_info.shadowErrorNotifier;
|
||||
error_notifier = channel->proxy.channel_info.shadowErrorNotifier;
|
||||
else
|
||||
errorNotifier = channel->channel_info.errorNotifier;
|
||||
error_notifier = channel->channel_info.errorNotifier;
|
||||
|
||||
if (errorNotifier->status == 0)
|
||||
if (error_notifier->status == 0)
|
||||
return NV_OK;
|
||||
|
||||
// In case we hit a channel error, check the ECC error notifier as well so
|
||||
@ -2584,16 +2584,18 @@ out:
|
||||
|
||||
// Return the pool corresponding to the given CE index
|
||||
//
|
||||
// This function cannot be used to access the proxy pool in SR-IOV heavy.
|
||||
// Used to retrieve pools of type UVM_CHANNEL_POOL_TYPE_CE only.
|
||||
static uvm_channel_pool_t *channel_manager_ce_pool(uvm_channel_manager_t *manager, NvU32 ce)
|
||||
{
|
||||
uvm_channel_pool_t *pool;
|
||||
uvm_channel_pool_t *pool = uvm_channel_pool_first(manager, UVM_CHANNEL_POOL_TYPE_CE);
|
||||
|
||||
UVM_ASSERT(pool != NULL);
|
||||
UVM_ASSERT(test_bit(ce, manager->ce_mask));
|
||||
|
||||
// The index of the pool associated with 'ce' is the number of usable CEs
|
||||
// in [0, ce)
|
||||
pool = manager->channel_pools + bitmap_weight(manager->ce_mask, ce);
|
||||
// Pools of type UVM_CHANNEL_POOL_TYPE_CE are stored contiguously. The
|
||||
// offset of the pool associated with 'ce' is the number of usable CEs in
|
||||
// [0, ce).
|
||||
pool += bitmap_weight(manager->ce_mask, ce);
|
||||
|
||||
UVM_ASSERT(pool->pool_type == UVM_CHANNEL_POOL_TYPE_CE);
|
||||
UVM_ASSERT(pool->engine_index == ce);
|
||||
@ -2811,6 +2813,7 @@ static unsigned channel_manager_get_max_pools(uvm_channel_manager_t *manager)
|
||||
static NV_STATUS channel_manager_create_ce_pools(uvm_channel_manager_t *manager, unsigned *preferred_ce)
|
||||
{
|
||||
unsigned ce;
|
||||
unsigned type;
|
||||
|
||||
// A pool is created for each usable CE, even if it has not been selected as
|
||||
// the preferred CE for any type, because as more information is discovered
|
||||
@ -2818,18 +2821,20 @@ static NV_STATUS channel_manager_create_ce_pools(uvm_channel_manager_t *manager,
|
||||
// previously idle pools.
|
||||
for_each_set_bit(ce, manager->ce_mask, UVM_COPY_ENGINE_COUNT_MAX) {
|
||||
NV_STATUS status;
|
||||
unsigned type;
|
||||
uvm_channel_pool_t *pool = NULL;
|
||||
|
||||
status = channel_pool_add(manager, UVM_CHANNEL_POOL_TYPE_CE, ce, &pool);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
}
|
||||
|
||||
for (type = 0; type < UVM_CHANNEL_TYPE_CE_COUNT; type++) {
|
||||
// Set pool type if it hasn't been set before.
|
||||
if (preferred_ce[type] == ce && manager->pool_to_use.default_for_type[type] == NULL)
|
||||
manager->pool_to_use.default_for_type[type] = pool;
|
||||
}
|
||||
for (type = 0; type < UVM_CHANNEL_TYPE_CE_COUNT; type++) {
|
||||
// Avoid overwriting previously set defaults.
|
||||
if (manager->pool_to_use.default_for_type[type] != NULL)
|
||||
continue;
|
||||
|
||||
ce = preferred_ce[type];
|
||||
manager->pool_to_use.default_for_type[type] = channel_manager_ce_pool(manager, ce);
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
|
@ -218,8 +218,9 @@ static NV_STATUS alloc_and_init_address_space(uvm_gpu_t *gpu)
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
gpu->big_page.internal_size = gpu_address_space_info.bigPageSize;
|
||||
UVM_ASSERT(gpu_address_space_info.bigPageSize <= NV_U32_MAX);
|
||||
|
||||
gpu->big_page.internal_size = gpu_address_space_info.bigPageSize;
|
||||
gpu->time.time0_register = gpu_address_space_info.time0Offset;
|
||||
gpu->time.time1_register = gpu_address_space_info.time1Offset;
|
||||
|
||||
@ -458,6 +459,7 @@ static const char *uvm_gpu_virt_type_string(UVM_VIRT_MODE virtMode)
|
||||
|
||||
static const char *uvm_gpu_link_type_string(uvm_gpu_link_type_t link_type)
|
||||
{
|
||||
|
||||
BUILD_BUG_ON(UVM_GPU_LINK_MAX != 7);
|
||||
|
||||
switch (link_type) {
|
||||
@ -1082,9 +1084,6 @@ static NV_STATUS configure_address_space(uvm_gpu_t *gpu)
|
||||
gpu->parent->rm_va_size,
|
||||
va_per_entry);
|
||||
|
||||
UVM_ASSERT(uvm_mmu_page_size_supported(&gpu->address_space_tree, gpu->big_page.internal_size));
|
||||
UVM_ASSERT(uvm_mmu_page_size_supported(&gpu->address_space_tree, gpu->mem_info.max_vidmem_page_size));
|
||||
|
||||
tree_alloc = uvm_page_tree_pdb(&gpu->address_space_tree);
|
||||
status = uvm_rm_locked_call(nvUvmInterfaceSetPageDirectory(gpu->rm_address_space,
|
||||
tree_alloc->addr.address,
|
||||
@ -2364,9 +2363,7 @@ static NV_STATUS init_peer_access(uvm_gpu_t *gpu0,
|
||||
|
||||
// check for peer-to-peer compatibility (PCI-E or NvLink).
|
||||
peer_caps->link_type = get_gpu_link_type(p2p_caps_params->p2pLink);
|
||||
if (peer_caps->link_type == UVM_GPU_LINK_INVALID
|
||||
|| peer_caps->link_type == UVM_GPU_LINK_C2C
|
||||
)
|
||||
if (peer_caps->link_type == UVM_GPU_LINK_INVALID || peer_caps->link_type == UVM_GPU_LINK_C2C)
|
||||
return NV_ERR_NOT_SUPPORTED;
|
||||
|
||||
peer_caps->total_link_line_rate_mbyte_per_s = p2p_caps_params->totalLinkLineRateMBps;
|
||||
@ -3296,7 +3293,10 @@ void uvm_parent_gpu_dma_free_page(uvm_parent_gpu_t *parent_gpu, void *va, NvU64
|
||||
atomic64_sub(PAGE_SIZE, &parent_gpu->mapped_cpu_pages_size);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_parent_gpu_map_cpu_pages(uvm_parent_gpu_t *parent_gpu, struct page *page, size_t size, NvU64 *dma_address_out)
|
||||
NV_STATUS uvm_parent_gpu_map_cpu_pages(uvm_parent_gpu_t *parent_gpu,
|
||||
struct page *page,
|
||||
size_t size,
|
||||
NvU64 *dma_address_out)
|
||||
{
|
||||
NvU64 dma_addr;
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2023 NVIDIA Corporation
|
||||
Copyright (c) 2015-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2023 NVIDIA Corporation
|
||||
Copyright (c) 2015-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -251,6 +251,9 @@ static uvm_hal_class_ops_t host_table[] =
|
||||
.semaphore_release = uvm_hal_turing_host_semaphore_release,
|
||||
.clear_faulted_channel_method = uvm_hal_turing_host_clear_faulted_channel_method,
|
||||
.set_gpfifo_entry = uvm_hal_turing_host_set_gpfifo_entry,
|
||||
.tlb_invalidate_all = uvm_hal_turing_host_tlb_invalidate_all,
|
||||
.tlb_invalidate_va = uvm_hal_turing_host_tlb_invalidate_va,
|
||||
.tlb_invalidate_test = uvm_hal_turing_host_tlb_invalidate_test,
|
||||
}
|
||||
},
|
||||
{
|
||||
@ -632,13 +635,19 @@ NV_STATUS uvm_hal_init_table(void)
|
||||
return status;
|
||||
}
|
||||
|
||||
status = ops_init_from_parent(host_table, ARRAY_SIZE(host_table), HOST_OP_COUNT, offsetof(uvm_hal_class_ops_t, u.host_ops));
|
||||
status = ops_init_from_parent(host_table,
|
||||
ARRAY_SIZE(host_table),
|
||||
HOST_OP_COUNT,
|
||||
offsetof(uvm_hal_class_ops_t, u.host_ops));
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("ops_init_from_parent(host_table) failed: %s\n", nvstatusToString(status));
|
||||
return status;
|
||||
}
|
||||
|
||||
status = ops_init_from_parent(arch_table, ARRAY_SIZE(arch_table), ARCH_OP_COUNT, offsetof(uvm_hal_class_ops_t, u.arch_ops));
|
||||
status = ops_init_from_parent(arch_table,
|
||||
ARRAY_SIZE(arch_table),
|
||||
ARCH_OP_COUNT,
|
||||
offsetof(uvm_hal_class_ops_t, u.arch_ops));
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("ops_init_from_parent(arch_table) failed: %s\n", nvstatusToString(status));
|
||||
return status;
|
||||
@ -932,14 +941,16 @@ const char *uvm_mmu_engine_type_string(uvm_mmu_engine_type_t mmu_engine_type)
|
||||
void uvm_hal_print_fault_entry(const uvm_fault_buffer_entry_t *entry)
|
||||
{
|
||||
UVM_DBG_PRINT("fault_address: 0x%llx\n", entry->fault_address);
|
||||
UVM_DBG_PRINT(" fault_instance_ptr: {0x%llx:%s}\n", entry->instance_ptr.address,
|
||||
uvm_aperture_string(entry->instance_ptr.aperture));
|
||||
UVM_DBG_PRINT(" fault_instance_ptr: {0x%llx:%s}\n",
|
||||
entry->instance_ptr.address,
|
||||
uvm_aperture_string(entry->instance_ptr.aperture));
|
||||
UVM_DBG_PRINT(" fault_type: %s\n", uvm_fault_type_string(entry->fault_type));
|
||||
UVM_DBG_PRINT(" fault_access_type: %s\n", uvm_fault_access_type_string(entry->fault_access_type));
|
||||
UVM_DBG_PRINT(" is_replayable: %s\n", entry->is_replayable? "true": "false");
|
||||
UVM_DBG_PRINT(" is_virtual: %s\n", entry->is_virtual? "true": "false");
|
||||
UVM_DBG_PRINT(" in_protected_mode: %s\n", entry->in_protected_mode? "true": "false");
|
||||
UVM_DBG_PRINT(" fault_source.client_type: %s\n", uvm_fault_client_type_string(entry->fault_source.client_type));
|
||||
UVM_DBG_PRINT(" fault_source.client_type: %s\n",
|
||||
uvm_fault_client_type_string(entry->fault_source.client_type));
|
||||
UVM_DBG_PRINT(" fault_source.client_id: %d\n", entry->fault_source.client_id);
|
||||
UVM_DBG_PRINT(" fault_source.gpc_id: %d\n", entry->fault_source.gpc_id);
|
||||
UVM_DBG_PRINT(" fault_source.mmu_engine_id: %d\n", entry->fault_source.mmu_engine_id);
|
||||
@ -962,13 +973,15 @@ const char *uvm_access_counter_type_string(uvm_access_counter_type_t access_coun
|
||||
void uvm_hal_print_access_counter_buffer_entry(const uvm_access_counter_buffer_entry_t *entry)
|
||||
{
|
||||
if (!entry->address.is_virtual) {
|
||||
UVM_DBG_PRINT("physical address: {0x%llx:%s}\n", entry->address.address,
|
||||
uvm_aperture_string(entry->address.aperture));
|
||||
UVM_DBG_PRINT("physical address: {0x%llx:%s}\n",
|
||||
entry->address.address,
|
||||
uvm_aperture_string(entry->address.aperture));
|
||||
}
|
||||
else {
|
||||
UVM_DBG_PRINT("virtual address: 0x%llx\n", entry->address.address);
|
||||
UVM_DBG_PRINT(" instance_ptr {0x%llx:%s}\n", entry->virtual_info.instance_ptr.address,
|
||||
uvm_aperture_string(entry->virtual_info.instance_ptr.aperture));
|
||||
UVM_DBG_PRINT(" instance_ptr {0x%llx:%s}\n",
|
||||
entry->virtual_info.instance_ptr.address,
|
||||
uvm_aperture_string(entry->virtual_info.instance_ptr.aperture));
|
||||
UVM_DBG_PRINT(" mmu_engine_type %s\n", uvm_mmu_engine_type_string(entry->virtual_info.mmu_engine_type));
|
||||
UVM_DBG_PRINT(" mmu_engine_id %u\n", entry->virtual_info.mmu_engine_id);
|
||||
UVM_DBG_PRINT(" ve_id %u\n", entry->virtual_info.ve_id);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2023 NVIDIA Corporation
|
||||
Copyright (c) 2015-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -112,6 +112,10 @@ void uvm_hal_pascal_host_tlb_invalidate_all(uvm_push_t *push,
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
NvU32 depth,
|
||||
uvm_membar_t membar);
|
||||
void uvm_hal_turing_host_tlb_invalidate_all(uvm_push_t *push,
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
NvU32 depth,
|
||||
uvm_membar_t membar);
|
||||
void uvm_hal_ampere_host_tlb_invalidate_all(uvm_push_t *push,
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
NvU32 depth,
|
||||
@ -149,42 +153,49 @@ typedef void (*uvm_hal_host_tlb_invalidate_va_t)(uvm_push_t *push,
|
||||
NvU32 depth,
|
||||
NvU64 base,
|
||||
NvU64 size,
|
||||
NvU32 page_size,
|
||||
NvU64 page_size,
|
||||
uvm_membar_t membar);
|
||||
void uvm_hal_maxwell_host_tlb_invalidate_va(uvm_push_t *push,
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
NvU32 depth,
|
||||
NvU64 base,
|
||||
NvU64 size,
|
||||
NvU32 page_size,
|
||||
NvU64 page_size,
|
||||
uvm_membar_t membar);
|
||||
void uvm_hal_pascal_host_tlb_invalidate_va(uvm_push_t *push,
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
NvU32 depth,
|
||||
NvU64 base,
|
||||
NvU64 size,
|
||||
NvU32 page_size,
|
||||
NvU64 page_size,
|
||||
uvm_membar_t membar);
|
||||
void uvm_hal_volta_host_tlb_invalidate_va(uvm_push_t *push,
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
NvU32 depth,
|
||||
NvU64 base,
|
||||
NvU64 size,
|
||||
NvU32 page_size,
|
||||
NvU64 page_size,
|
||||
uvm_membar_t membar);
|
||||
void uvm_hal_turing_host_tlb_invalidate_va(uvm_push_t *push,
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
NvU32 depth,
|
||||
NvU64 base,
|
||||
NvU64 size,
|
||||
NvU64 page_size,
|
||||
uvm_membar_t membar);
|
||||
void uvm_hal_ampere_host_tlb_invalidate_va(uvm_push_t *push,
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
NvU32 depth,
|
||||
NvU64 base,
|
||||
NvU64 size,
|
||||
NvU32 page_size,
|
||||
NvU64 page_size,
|
||||
uvm_membar_t membar);
|
||||
void uvm_hal_hopper_host_tlb_invalidate_va(uvm_push_t *push,
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
NvU32 depth,
|
||||
NvU64 base,
|
||||
NvU64 size,
|
||||
NvU32 page_size,
|
||||
NvU64 page_size,
|
||||
uvm_membar_t membar);
|
||||
|
||||
typedef void (*uvm_hal_host_tlb_invalidate_test_t)(uvm_push_t *push,
|
||||
@ -196,6 +207,9 @@ void uvm_hal_maxwell_host_tlb_invalidate_test(uvm_push_t *push,
|
||||
void uvm_hal_pascal_host_tlb_invalidate_test(uvm_push_t *push,
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
UVM_TEST_INVALIDATE_TLB_PARAMS *params);
|
||||
void uvm_hal_turing_host_tlb_invalidate_test(uvm_push_t *push,
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
UVM_TEST_INVALIDATE_TLB_PARAMS *params);
|
||||
void uvm_hal_ampere_host_tlb_invalidate_test(uvm_push_t *push,
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
UVM_TEST_INVALIDATE_TLB_PARAMS *params);
|
||||
@ -445,15 +459,15 @@ void uvm_hal_ada_arch_init_properties(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_hal_hopper_arch_init_properties(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
// Retrieve the page-tree HAL for a given big page size
|
||||
typedef uvm_mmu_mode_hal_t *(*uvm_hal_lookup_mode_hal_t)(NvU32 big_page_size);
|
||||
typedef uvm_mmu_mode_hal_t *(*uvm_hal_lookup_mode_hal_t)(NvU64 big_page_size);
|
||||
typedef void (*uvm_hal_mmu_enable_prefetch_faults_t)(uvm_parent_gpu_t *parent_gpu);
|
||||
typedef void (*uvm_hal_mmu_disable_prefetch_faults_t)(uvm_parent_gpu_t *parent_gpu);
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_maxwell(NvU32 big_page_size);
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_pascal(NvU32 big_page_size);
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_volta(NvU32 big_page_size);
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_turing(NvU32 big_page_size);
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_ampere(NvU32 big_page_size);
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_hopper(NvU32 big_page_size);
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_maxwell(NvU64 big_page_size);
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_pascal(NvU64 big_page_size);
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_volta(NvU64 big_page_size);
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_turing(NvU64 big_page_size);
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_ampere(NvU64 big_page_size);
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_hopper(NvU64 big_page_size);
|
||||
void uvm_hal_maxwell_mmu_enable_prefetch_faults_unsupported(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_hal_maxwell_mmu_disable_prefetch_faults_unsupported(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_hal_pascal_mmu_enable_prefetch_faults(uvm_parent_gpu_t *parent_gpu);
|
||||
|
@ -1599,7 +1599,7 @@ static void hmm_va_block_cpu_unpopulate_chunk(uvm_va_block_t *va_block,
|
||||
UVM_ASSERT(uvm_cpu_chunk_get_size(chunk) == PAGE_SIZE);
|
||||
|
||||
uvm_cpu_chunk_remove_from_block(va_block, chunk_nid, page_index);
|
||||
uvm_va_block_unmap_cpu_chunk_on_gpus(va_block, chunk, page_index);
|
||||
uvm_va_block_unmap_cpu_chunk_on_gpus(va_block, chunk);
|
||||
uvm_cpu_chunk_free(chunk);
|
||||
}
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2020-2022 NVIDIA Corporation
|
||||
Copyright (c) 2020-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -157,6 +157,7 @@ void uvm_hal_hopper_host_tlb_invalidate_all(uvm_push_t *push,
|
||||
NvU32 pdb_lo;
|
||||
NvU32 pdb_hi;
|
||||
NvU32 ack_value = 0;
|
||||
NvU32 sysmembar_value = 0;
|
||||
|
||||
UVM_ASSERT_MSG(pdb.aperture == UVM_APERTURE_VID || pdb.aperture == UVM_APERTURE_SYS, "aperture: %u", pdb.aperture);
|
||||
|
||||
@ -183,7 +184,12 @@ void uvm_hal_hopper_host_tlb_invalidate_all(uvm_push_t *push,
|
||||
ack_value = HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_ACK_TYPE, GLOBALLY);
|
||||
}
|
||||
|
||||
NV_PUSH_4U(C86F, MEM_OP_A, HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS) |
|
||||
if (membar == UVM_MEMBAR_SYS)
|
||||
sysmembar_value = HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, EN);
|
||||
else
|
||||
sysmembar_value = HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS);
|
||||
|
||||
NV_PUSH_4U(C86F, MEM_OP_A, sysmembar_value |
|
||||
HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_INVAL_SCOPE, NON_LINK_TLBS),
|
||||
MEM_OP_B, 0,
|
||||
MEM_OP_C, HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_PDB, ONE) |
|
||||
@ -196,7 +202,9 @@ void uvm_hal_hopper_host_tlb_invalidate_all(uvm_push_t *push,
|
||||
MEM_OP_D, HWCONST(C86F, MEM_OP_D, OPERATION, MMU_TLB_INVALIDATE) |
|
||||
HWVALUE(C86F, MEM_OP_D, TLB_INVALIDATE_PDB_ADDR_HI, pdb_hi));
|
||||
|
||||
uvm_hal_tlb_invalidate_membar(push, membar);
|
||||
// GPU membar still requires an explicit membar method.
|
||||
if (membar == UVM_MEMBAR_GPU)
|
||||
uvm_push_get_gpu(push)->parent->host_hal->membar_gpu(push);
|
||||
}
|
||||
|
||||
void uvm_hal_hopper_host_tlb_invalidate_va(uvm_push_t *push,
|
||||
@ -204,7 +212,7 @@ void uvm_hal_hopper_host_tlb_invalidate_va(uvm_push_t *push,
|
||||
NvU32 depth,
|
||||
NvU64 base,
|
||||
NvU64 size,
|
||||
NvU32 page_size,
|
||||
NvU64 page_size,
|
||||
uvm_membar_t membar)
|
||||
{
|
||||
NvU32 aperture_value;
|
||||
@ -212,6 +220,7 @@ void uvm_hal_hopper_host_tlb_invalidate_va(uvm_push_t *push,
|
||||
NvU32 pdb_lo;
|
||||
NvU32 pdb_hi;
|
||||
NvU32 ack_value = 0;
|
||||
NvU32 sysmembar_value = 0;
|
||||
NvU32 va_lo;
|
||||
NvU32 va_hi;
|
||||
NvU64 end;
|
||||
@ -221,9 +230,9 @@ void uvm_hal_hopper_host_tlb_invalidate_va(uvm_push_t *push,
|
||||
NvU32 log2_invalidation_size;
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||||
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(page_size, 1 << 12), "page_size 0x%x\n", page_size);
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(base, page_size), "base 0x%llx page_size 0x%x\n", base, page_size);
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(size, page_size), "size 0x%llx page_size 0x%x\n", size, page_size);
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(page_size, 1 << 12), "page_size 0x%llx\n", page_size);
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(base, page_size), "base 0x%llx page_size 0x%llx\n", base, page_size);
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(size, page_size), "size 0x%llx page_size 0x%llx\n", size, page_size);
|
||||
UVM_ASSERT_MSG(size > 0, "size 0x%llx\n", size);
|
||||
|
||||
// The invalidation size must be a power-of-two number of pages containing
|
||||
@ -277,8 +286,13 @@ void uvm_hal_hopper_host_tlb_invalidate_va(uvm_push_t *push,
|
||||
ack_value = HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_ACK_TYPE, GLOBALLY);
|
||||
}
|
||||
|
||||
if (membar == UVM_MEMBAR_SYS)
|
||||
sysmembar_value = HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, EN);
|
||||
else
|
||||
sysmembar_value = HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS);
|
||||
|
||||
NV_PUSH_4U(C86F, MEM_OP_A, HWVALUE(C86F, MEM_OP_A, TLB_INVALIDATE_INVALIDATION_SIZE, log2_invalidation_size) |
|
||||
HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS) |
|
||||
sysmembar_value |
|
||||
HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_INVAL_SCOPE, NON_LINK_TLBS) |
|
||||
HWVALUE(C86F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO, va_lo),
|
||||
MEM_OP_B, HWVALUE(C86F, MEM_OP_B, TLB_INVALIDATE_TARGET_ADDR_HI, va_hi),
|
||||
@ -292,7 +306,9 @@ void uvm_hal_hopper_host_tlb_invalidate_va(uvm_push_t *push,
|
||||
MEM_OP_D, HWCONST(C86F, MEM_OP_D, OPERATION, MMU_TLB_INVALIDATE_TARGETED) |
|
||||
HWVALUE(C86F, MEM_OP_D, TLB_INVALIDATE_PDB_ADDR_HI, pdb_hi));
|
||||
|
||||
uvm_hal_tlb_invalidate_membar(push, membar);
|
||||
// GPU membar still requires an explicit membar method.
|
||||
if (membar == UVM_MEMBAR_GPU)
|
||||
gpu->parent->host_hal->membar_gpu(push);
|
||||
}
|
||||
|
||||
void uvm_hal_hopper_host_tlb_invalidate_test(uvm_push_t *push,
|
||||
@ -300,12 +316,12 @@ void uvm_hal_hopper_host_tlb_invalidate_test(uvm_push_t *push,
|
||||
UVM_TEST_INVALIDATE_TLB_PARAMS *params)
|
||||
{
|
||||
NvU32 ack_value = 0;
|
||||
NvU32 sysmembar_value = 0;
|
||||
NvU32 invalidate_gpc_value = 0;
|
||||
NvU32 aperture_value = 0;
|
||||
NvU32 pdb_lo = 0;
|
||||
NvU32 pdb_hi = 0;
|
||||
NvU32 page_table_level = 0;
|
||||
uvm_membar_t membar;
|
||||
|
||||
UVM_ASSERT_MSG(pdb.aperture == UVM_APERTURE_VID || pdb.aperture == UVM_APERTURE_SYS, "aperture: %u", pdb.aperture);
|
||||
if (pdb.aperture == UVM_APERTURE_VID)
|
||||
@ -332,6 +348,11 @@ void uvm_hal_hopper_host_tlb_invalidate_test(uvm_push_t *push,
|
||||
ack_value = HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_ACK_TYPE, GLOBALLY);
|
||||
}
|
||||
|
||||
if (params->membar == UvmInvalidateTlbMemBarSys)
|
||||
sysmembar_value = HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, EN);
|
||||
else
|
||||
sysmembar_value = HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS);
|
||||
|
||||
if (params->disable_gpc_invalidate)
|
||||
invalidate_gpc_value = HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_GPC, DISABLE);
|
||||
else
|
||||
@ -343,7 +364,7 @@ void uvm_hal_hopper_host_tlb_invalidate_test(uvm_push_t *push,
|
||||
NvU32 va_lo = va & HWMASK(C86F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO);
|
||||
NvU32 va_hi = va >> HWSIZE(C86F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO);
|
||||
|
||||
NV_PUSH_4U(C86F, MEM_OP_A, HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS) |
|
||||
NV_PUSH_4U(C86F, MEM_OP_A, sysmembar_value |
|
||||
HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_INVAL_SCOPE, NON_LINK_TLBS) |
|
||||
HWVALUE(C86F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO, va_lo),
|
||||
MEM_OP_B, HWVALUE(C86F, MEM_OP_B, TLB_INVALIDATE_TARGET_ADDR_HI, va_hi),
|
||||
@ -358,7 +379,7 @@ void uvm_hal_hopper_host_tlb_invalidate_test(uvm_push_t *push,
|
||||
HWVALUE(C86F, MEM_OP_D, TLB_INVALIDATE_PDB_ADDR_HI, pdb_hi));
|
||||
}
|
||||
else {
|
||||
NV_PUSH_4U(C86F, MEM_OP_A, HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS) |
|
||||
NV_PUSH_4U(C86F, MEM_OP_A, sysmembar_value |
|
||||
HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_INVAL_SCOPE, NON_LINK_TLBS),
|
||||
MEM_OP_B, 0,
|
||||
MEM_OP_C, HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_REPLAY, NONE) |
|
||||
@ -372,14 +393,9 @@ void uvm_hal_hopper_host_tlb_invalidate_test(uvm_push_t *push,
|
||||
HWVALUE(C86F, MEM_OP_D, TLB_INVALIDATE_PDB_ADDR_HI, pdb_hi));
|
||||
}
|
||||
|
||||
if (params->membar == UvmInvalidateTlbMemBarSys)
|
||||
membar = UVM_MEMBAR_SYS;
|
||||
else if (params->membar == UvmInvalidateTlbMemBarLocal)
|
||||
membar = UVM_MEMBAR_GPU;
|
||||
else
|
||||
membar = UVM_MEMBAR_NONE;
|
||||
|
||||
uvm_hal_tlb_invalidate_membar(push, membar);
|
||||
// GPU membar still requires an explicit membar method.
|
||||
if (params->membar == UvmInvalidateTlbMemBarLocal)
|
||||
uvm_push_get_gpu(push)->parent->host_hal->membar_gpu(push);
|
||||
}
|
||||
|
||||
void uvm_hal_hopper_host_set_gpfifo_pushbuffer_segment_base(NvU64 *fifo_entry, NvU64 pushbuffer_va)
|
||||
|
@ -61,7 +61,7 @@ uvm_mmu_engine_type_t uvm_hal_hopper_mmu_engine_id_to_type(NvU16 mmu_engine_id)
|
||||
return UVM_MMU_ENGINE_TYPE_GRAPHICS;
|
||||
}
|
||||
|
||||
static NvU32 page_table_depth_hopper(NvU32 page_size)
|
||||
static NvU32 page_table_depth_hopper(NvU64 page_size)
|
||||
{
|
||||
// The common-case is page_size == UVM_PAGE_SIZE_2M, hence the first check
|
||||
if (page_size == UVM_PAGE_SIZE_2M)
|
||||
@ -79,7 +79,7 @@ static NvU32 entries_per_index_hopper(NvU32 depth)
|
||||
return 1;
|
||||
}
|
||||
|
||||
static NvLength entry_offset_hopper(NvU32 depth, NvU32 page_size)
|
||||
static NvLength entry_offset_hopper(NvU32 depth, NvU64 page_size)
|
||||
{
|
||||
UVM_ASSERT(depth < 6);
|
||||
if ((page_size == UVM_PAGE_SIZE_4K) && (depth == 4))
|
||||
@ -92,7 +92,7 @@ static NvLength entry_size_hopper(NvU32 depth)
|
||||
return entries_per_index_hopper(depth) * 8;
|
||||
}
|
||||
|
||||
static NvU32 index_bits_hopper(NvU32 depth, NvU32 page_size)
|
||||
static NvU32 index_bits_hopper(NvU32 depth, NvU64 page_size)
|
||||
{
|
||||
static const NvU32 bit_widths[] = {1, 9, 9, 9, 8};
|
||||
|
||||
@ -120,7 +120,7 @@ static NvU32 num_va_bits_hopper(void)
|
||||
return 57;
|
||||
}
|
||||
|
||||
static NvLength allocation_size_hopper(NvU32 depth, NvU32 page_size)
|
||||
static NvLength allocation_size_hopper(NvU32 depth, NvU64 page_size)
|
||||
{
|
||||
UVM_ASSERT(depth < 6);
|
||||
if (depth == 5 && page_size == UVM_PAGE_SIZE_64K)
|
||||
@ -233,7 +233,7 @@ static NvU64 make_sparse_pte_hopper(void)
|
||||
HWCONST64(_MMU_VER3, PTE, PCF, SPARSE);
|
||||
}
|
||||
|
||||
static NvU64 unmapped_pte_hopper(NvU32 page_size)
|
||||
static NvU64 unmapped_pte_hopper(NvU64 page_size)
|
||||
{
|
||||
// Setting PCF to NO_VALID_4KB_PAGE on an otherwise-zeroed big PTE causes
|
||||
// the corresponding 4k PTEs to be ignored. This allows the invalidation of
|
||||
@ -490,7 +490,7 @@ static void make_pde_hopper(void *entry,
|
||||
|
||||
static uvm_mmu_mode_hal_t hopper_mmu_mode_hal;
|
||||
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_hopper(NvU32 big_page_size)
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_hopper(NvU64 big_page_size)
|
||||
{
|
||||
static bool initialized = false;
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2013-2023 NVidia Corporation
|
||||
Copyright (c) 2013-2024 NVidia Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -494,7 +494,7 @@ typedef struct
|
||||
NvU64 base NV_ALIGN_BYTES(8); // IN
|
||||
NvU64 length NV_ALIGN_BYTES(8); // IN
|
||||
NvU64 offset NV_ALIGN_BYTES(8); // IN
|
||||
UvmGpuMappingAttributes perGpuAttributes[UVM_MAX_GPUS_V2]; // IN
|
||||
UvmGpuMappingAttributes perGpuAttributes[UVM_MAX_GPUS]; // IN
|
||||
NvU64 gpuAttributesCount NV_ALIGN_BYTES(8); // IN
|
||||
NvS32 rmCtrlFd; // IN
|
||||
NvU32 hClient; // IN
|
||||
@ -952,7 +952,6 @@ typedef struct
|
||||
NvU32 version; // OUT
|
||||
} UVM_TOOLS_GET_PROCESSOR_UUID_TABLE_PARAMS;
|
||||
|
||||
|
||||
//
|
||||
// UvmMapDynamicParallelismRegion
|
||||
//
|
||||
@ -995,7 +994,7 @@ typedef struct
|
||||
{
|
||||
NvU64 base NV_ALIGN_BYTES(8); // IN
|
||||
NvU64 length NV_ALIGN_BYTES(8); // IN
|
||||
UvmGpuMappingAttributes perGpuAttributes[UVM_MAX_GPUS_V2]; // IN
|
||||
UvmGpuMappingAttributes perGpuAttributes[UVM_MAX_GPUS]; // IN
|
||||
NvU64 gpuAttributesCount NV_ALIGN_BYTES(8); // IN
|
||||
NV_STATUS rmStatus; // OUT
|
||||
} UVM_ALLOC_SEMAPHORE_POOL_PARAMS;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2016-2023 NVIDIA Corporation
|
||||
Copyright (c) 2016-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -39,6 +39,7 @@
|
||||
#include "uvm_pte_batch.h"
|
||||
#include "uvm_tlb_batch.h"
|
||||
#include "nv_uvm_interface.h"
|
||||
#include "nv_uvm_types.h"
|
||||
|
||||
#include "uvm_pushbuffer.h"
|
||||
|
||||
@ -60,7 +61,7 @@ typedef struct
|
||||
size_t buffer_size;
|
||||
|
||||
// Page size in bytes
|
||||
NvU32 page_size;
|
||||
NvU64 page_size;
|
||||
|
||||
// Size of a single PTE in bytes
|
||||
NvU32 pte_size;
|
||||
@ -90,7 +91,7 @@ static NV_STATUS uvm_pte_buffer_init(uvm_va_range_t *va_range,
|
||||
uvm_gpu_t *gpu,
|
||||
const uvm_map_rm_params_t *map_rm_params,
|
||||
NvU64 length,
|
||||
NvU32 page_size,
|
||||
NvU64 page_size,
|
||||
uvm_pte_buffer_t *pte_buffer)
|
||||
{
|
||||
uvm_gpu_va_space_t *gpu_va_space = uvm_gpu_va_space_get(va_range->va_space, gpu);
|
||||
@ -101,11 +102,11 @@ static NV_STATUS uvm_pte_buffer_init(uvm_va_range_t *va_range,
|
||||
|
||||
pte_buffer->va_range = va_range;
|
||||
pte_buffer->gpu = gpu;
|
||||
pte_buffer->mapping_info.cachingType = map_rm_params->caching_type;
|
||||
pte_buffer->mapping_info.mappingType = map_rm_params->mapping_type;
|
||||
pte_buffer->mapping_info.formatType = map_rm_params->format_type;
|
||||
pte_buffer->mapping_info.elementBits = map_rm_params->element_bits;
|
||||
pte_buffer->mapping_info.compressionType = map_rm_params->compression_type;
|
||||
pte_buffer->mapping_info.cachingType = (UvmRmGpuCachingType) map_rm_params->caching_type;
|
||||
pte_buffer->mapping_info.mappingType = (UvmRmGpuMappingType) map_rm_params->mapping_type;
|
||||
pte_buffer->mapping_info.formatType = (UvmRmGpuFormatType) map_rm_params->format_type;
|
||||
pte_buffer->mapping_info.elementBits = (UvmRmGpuFormatElementBits) map_rm_params->element_bits;
|
||||
pte_buffer->mapping_info.compressionType = (UvmRmGpuCompressionType) map_rm_params->compression_type;
|
||||
if (va_range->type == UVM_VA_RANGE_TYPE_EXTERNAL)
|
||||
pte_buffer->mapping_info.mappingPageSize = page_size;
|
||||
|
||||
@ -649,9 +650,7 @@ static NV_STATUS set_ext_gpu_map_location(uvm_ext_gpu_map_t *ext_gpu_map,
|
||||
return NV_OK;
|
||||
}
|
||||
// This is a local or peer allocation, so the owning GPU must have been
|
||||
// registered.
|
||||
// This also checks for if EGM owning GPU is registered.
|
||||
|
||||
// registered. This also checks for if EGM owning GPU is registered.
|
||||
owning_gpu = uvm_va_space_get_gpu_by_uuid(va_space, &mem_info->uuid);
|
||||
if (!owning_gpu)
|
||||
return NV_ERR_INVALID_DEVICE;
|
||||
@ -664,7 +663,6 @@ static NV_STATUS set_ext_gpu_map_location(uvm_ext_gpu_map_t *ext_gpu_map,
|
||||
// semantics of sysmem allocations.
|
||||
|
||||
// Check if peer access for peer memory is enabled.
|
||||
// This path also handles EGM allocations.
|
||||
if (owning_gpu != mapping_gpu && (!mem_info->sysmem || mem_info->egm)) {
|
||||
// TODO: Bug 1757136: In SLI, the returned UUID may be different but a
|
||||
// local mapping must be used. We need to query SLI groups to know
|
||||
@ -855,9 +853,10 @@ static NV_STATUS uvm_map_external_allocation_on_gpu(uvm_va_range_t *va_range,
|
||||
uvm_ext_gpu_range_tree_t *range_tree = uvm_ext_gpu_range_tree(va_range, mapping_gpu);
|
||||
UvmGpuMemoryInfo mem_info;
|
||||
uvm_gpu_va_space_t *gpu_va_space = uvm_gpu_va_space_get(va_space, mapping_gpu);
|
||||
NvU32 mapping_page_size;
|
||||
NvU64 mapping_page_size;
|
||||
NvU64 biggest_mapping_page_size;
|
||||
NvU64 alignments;
|
||||
NvU32 smallest_alignment;
|
||||
NvU64 smallest_alignment;
|
||||
NV_STATUS status;
|
||||
|
||||
uvm_assert_rwsem_locked_read(&va_space->lock);
|
||||
@ -946,9 +945,11 @@ static NV_STATUS uvm_map_external_allocation_on_gpu(uvm_va_range_t *va_range,
|
||||
|
||||
// Check for the maximum page size for the mapping of vidmem allocations,
|
||||
// the vMMU segment size may limit the range of page sizes.
|
||||
biggest_mapping_page_size = uvm_mmu_biggest_page_size_up_to(&gpu_va_space->page_tables,
|
||||
mapping_gpu->mem_info.max_vidmem_page_size);
|
||||
if (!ext_gpu_map->is_sysmem && (ext_gpu_map->gpu == ext_gpu_map->owning_gpu) &&
|
||||
(mapping_page_size > mapping_gpu->mem_info.max_vidmem_page_size))
|
||||
mapping_page_size = mapping_gpu->mem_info.max_vidmem_page_size;
|
||||
(mapping_page_size > biggest_mapping_page_size))
|
||||
mapping_page_size = biggest_mapping_page_size;
|
||||
|
||||
mem_info.pageSize = mapping_page_size;
|
||||
|
||||
@ -985,7 +986,7 @@ static NV_STATUS uvm_map_external_allocation(uvm_va_space_t *va_space, UVM_MAP_E
|
||||
if (uvm_api_range_invalid_4k(params->base, params->length))
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
|
||||
if (params->gpuAttributesCount == 0 || params->gpuAttributesCount > UVM_MAX_GPUS_V2)
|
||||
if (params->gpuAttributesCount == 0 || params->gpuAttributesCount > UVM_MAX_GPUS)
|
||||
return NV_ERR_INVALID_ARGUMENT;
|
||||
|
||||
mapped_gpus = uvm_processor_mask_cache_alloc();
|
||||
|
@ -108,7 +108,7 @@ void uvm_hal_maxwell_host_tlb_invalidate_va(uvm_push_t *push,
|
||||
NvU32 depth,
|
||||
NvU64 base,
|
||||
NvU64 size,
|
||||
NvU32 page_size,
|
||||
NvU64 page_size,
|
||||
uvm_membar_t membar)
|
||||
{
|
||||
// No per VA invalidate on Maxwell, redirect to invalidate all.
|
||||
|
@ -52,7 +52,7 @@ static NvU32 entries_per_index_maxwell(NvU32 depth)
|
||||
return 1;
|
||||
}
|
||||
|
||||
static NvLength entry_offset_maxwell(NvU32 depth, NvU32 page_size)
|
||||
static NvLength entry_offset_maxwell(NvU32 depth, NvU64 page_size)
|
||||
{
|
||||
UVM_ASSERT(depth < 2);
|
||||
if (page_size == UVM_PAGE_SIZE_4K && depth == 0)
|
||||
@ -128,7 +128,7 @@ static NvLength entry_size_maxwell(NvU32 depth)
|
||||
return 8;
|
||||
}
|
||||
|
||||
static NvU32 index_bits_maxwell_64(NvU32 depth, NvU32 page_size)
|
||||
static NvU32 index_bits_maxwell_64(NvU32 depth, NvU64 page_size)
|
||||
{
|
||||
UVM_ASSERT(depth < 2);
|
||||
UVM_ASSERT(page_size == UVM_PAGE_SIZE_4K ||
|
||||
@ -146,7 +146,7 @@ static NvU32 index_bits_maxwell_64(NvU32 depth, NvU32 page_size)
|
||||
}
|
||||
}
|
||||
|
||||
static NvU32 index_bits_maxwell_128(NvU32 depth, NvU32 page_size)
|
||||
static NvU32 index_bits_maxwell_128(NvU32 depth, NvU64 page_size)
|
||||
{
|
||||
UVM_ASSERT(depth < 2);
|
||||
UVM_ASSERT(page_size == UVM_PAGE_SIZE_4K ||
|
||||
@ -169,32 +169,32 @@ static NvU32 num_va_bits_maxwell(void)
|
||||
return 40;
|
||||
}
|
||||
|
||||
static NvLength allocation_size_maxwell_64(NvU32 depth, NvU32 page_size)
|
||||
static NvLength allocation_size_maxwell_64(NvU32 depth, NvU64 page_size)
|
||||
{
|
||||
return entry_size_maxwell(depth) << index_bits_maxwell_64(depth, page_size);
|
||||
}
|
||||
|
||||
static NvLength allocation_size_maxwell_128(NvU32 depth, NvU32 page_size)
|
||||
static NvLength allocation_size_maxwell_128(NvU32 depth, NvU64 page_size)
|
||||
{
|
||||
return entry_size_maxwell(depth) << index_bits_maxwell_128(depth, page_size);
|
||||
}
|
||||
|
||||
static NvU32 page_table_depth_maxwell(NvU32 page_size)
|
||||
static NvU32 page_table_depth_maxwell(NvU64 page_size)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
static NvU32 page_sizes_maxwell_128(void)
|
||||
static NvU64 page_sizes_maxwell_128(void)
|
||||
{
|
||||
return UVM_PAGE_SIZE_128K | UVM_PAGE_SIZE_4K;
|
||||
}
|
||||
|
||||
static NvU32 page_sizes_maxwell_64(void)
|
||||
static NvU64 page_sizes_maxwell_64(void)
|
||||
{
|
||||
return UVM_PAGE_SIZE_64K | UVM_PAGE_SIZE_4K;
|
||||
}
|
||||
|
||||
static NvU64 unmapped_pte_maxwell(NvU32 page_size)
|
||||
static NvU64 unmapped_pte_maxwell(NvU64 page_size)
|
||||
{
|
||||
// Setting the privilege bit on an otherwise-zeroed big PTE causes the
|
||||
// corresponding 4k PTEs to be ignored. This allows the invalidation of a
|
||||
@ -356,7 +356,7 @@ static uvm_mmu_mode_hal_t maxwell_128_mmu_mode_hal =
|
||||
.page_sizes = page_sizes_maxwell_128
|
||||
};
|
||||
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_maxwell(NvU32 big_page_size)
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_maxwell(NvU64 big_page_size)
|
||||
{
|
||||
UVM_ASSERT(big_page_size == UVM_PAGE_SIZE_64K || big_page_size == UVM_PAGE_SIZE_128K);
|
||||
if (big_page_size == UVM_PAGE_SIZE_64K)
|
||||
|
@ -290,15 +290,15 @@ uvm_chunk_sizes_mask_t uvm_mem_kernel_chunk_sizes(uvm_gpu_t *gpu)
|
||||
// Get the mmu mode hal directly as the internal address space tree has not
|
||||
// been created yet.
|
||||
uvm_mmu_mode_hal_t *hal = gpu->parent->arch_hal->mmu_mode_hal(gpu->big_page.internal_size);
|
||||
NvU32 page_sizes = hal->page_sizes();
|
||||
NvU64 page_sizes = hal->page_sizes();
|
||||
|
||||
return (uvm_chunk_sizes_mask_t)(page_sizes & UVM_CHUNK_SIZES_MASK);
|
||||
}
|
||||
|
||||
static NvU32 mem_pick_chunk_size(uvm_mem_t *mem)
|
||||
static NvU64 mem_pick_chunk_size(uvm_mem_t *mem)
|
||||
{
|
||||
NvU32 biggest_page_size;
|
||||
NvU32 chunk_size;
|
||||
NvU64 biggest_page_size;
|
||||
NvU64 chunk_size;
|
||||
|
||||
if (uvm_mem_is_sysmem(mem))
|
||||
return PAGE_SIZE;
|
||||
@ -315,12 +315,12 @@ static NvU32 mem_pick_chunk_size(uvm_mem_t *mem)
|
||||
// When UVM_PAGE_SIZE_DEFAULT is used on NUMA-enabled GPUs, we force
|
||||
// chunk_size to be PAGE_SIZE at least, to allow CPU mappings.
|
||||
if (mem->backing_gpu->mem_info.numa.enabled)
|
||||
chunk_size = max(chunk_size, (NvU32)PAGE_SIZE);
|
||||
chunk_size = max(chunk_size, (NvU64)PAGE_SIZE);
|
||||
|
||||
return chunk_size;
|
||||
}
|
||||
|
||||
static NvU32 mem_pick_gpu_page_size(uvm_mem_t *mem, uvm_gpu_t *gpu, uvm_page_tree_t *gpu_page_tree)
|
||||
static NvU64 mem_pick_gpu_page_size(uvm_mem_t *mem, uvm_gpu_t *gpu, uvm_page_tree_t *gpu_page_tree)
|
||||
{
|
||||
if (uvm_mem_is_vidmem(mem)) {
|
||||
// For vidmem allocations the chunk size is picked out of the supported
|
||||
@ -467,7 +467,7 @@ static NV_STATUS mem_alloc_sysmem_dma_chunks(uvm_mem_t *mem, gfp_t gfp_flags)
|
||||
NvU64 *dma_addrs;
|
||||
|
||||
UVM_ASSERT_MSG(mem->chunk_size == PAGE_SIZE,
|
||||
"mem->chunk_size is 0x%x. PAGE_SIZE is only supported.",
|
||||
"mem->chunk_size is 0x%llx. PAGE_SIZE is only supported.",
|
||||
mem->chunk_size);
|
||||
UVM_ASSERT(uvm_mem_is_sysmem_dma(mem));
|
||||
|
||||
@ -528,10 +528,9 @@ static NV_STATUS mem_alloc_sysmem_chunks(uvm_mem_t *mem, gfp_t gfp_flags)
|
||||
|
||||
// In case of failure, the caller is required to handle cleanup by calling
|
||||
// uvm_mem_free
|
||||
static NV_STATUS mem_alloc_vidmem_chunks(uvm_mem_t *mem, bool zero, bool is_unprotected)
|
||||
static NV_STATUS mem_alloc_vidmem_chunks(uvm_mem_t *mem, bool zero)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_pmm_gpu_memory_type_t mem_type;
|
||||
|
||||
UVM_ASSERT(uvm_mem_is_vidmem(mem));
|
||||
|
||||
@ -548,23 +547,15 @@ static NV_STATUS mem_alloc_vidmem_chunks(uvm_mem_t *mem, bool zero, bool is_unpr
|
||||
if (!mem->vidmem.chunks)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
// When CC is disabled the behavior is identical to that of PMM, and the
|
||||
// protection flag is ignored (squashed by PMM internally).
|
||||
if (is_unprotected)
|
||||
mem_type = UVM_PMM_GPU_MEMORY_TYPE_KERNEL_UNPROTECTED;
|
||||
else
|
||||
mem_type = UVM_PMM_GPU_MEMORY_TYPE_KERNEL_PROTECTED;
|
||||
|
||||
status = uvm_pmm_gpu_alloc(&mem->backing_gpu->pmm,
|
||||
mem->chunks_count,
|
||||
mem->chunk_size,
|
||||
mem_type,
|
||||
UVM_PMM_ALLOC_FLAGS_NONE,
|
||||
mem->vidmem.chunks,
|
||||
NULL);
|
||||
status = uvm_pmm_gpu_alloc_kernel(&mem->backing_gpu->pmm,
|
||||
mem->chunks_count,
|
||||
mem->chunk_size,
|
||||
UVM_PMM_ALLOC_FLAGS_NONE,
|
||||
mem->vidmem.chunks,
|
||||
NULL);
|
||||
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("uvm_pmm_gpu_alloc (count=%zd, size=0x%x) failed: %s\n",
|
||||
UVM_ERR_PRINT("uvm_pmm_gpu_alloc_kernel (count=%zd, size=0x%llx) failed: %s\n",
|
||||
mem->chunks_count,
|
||||
mem->chunk_size,
|
||||
nvstatusToString(status));
|
||||
@ -574,7 +565,7 @@ static NV_STATUS mem_alloc_vidmem_chunks(uvm_mem_t *mem, bool zero, bool is_unpr
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS mem_alloc_chunks(uvm_mem_t *mem, struct mm_struct *mm, bool zero, bool is_unprotected)
|
||||
static NV_STATUS mem_alloc_chunks(uvm_mem_t *mem, struct mm_struct *mm, bool zero)
|
||||
{
|
||||
if (uvm_mem_is_sysmem(mem)) {
|
||||
gfp_t gfp_flags;
|
||||
@ -596,7 +587,7 @@ static NV_STATUS mem_alloc_chunks(uvm_mem_t *mem, struct mm_struct *mm, bool zer
|
||||
return status;
|
||||
}
|
||||
|
||||
return mem_alloc_vidmem_chunks(mem, zero, is_unprotected);
|
||||
return mem_alloc_vidmem_chunks(mem, zero);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_mem_map_kernel(uvm_mem_t *mem, const uvm_processor_mask_t *mask)
|
||||
@ -626,7 +617,6 @@ NV_STATUS uvm_mem_alloc(const uvm_mem_alloc_params_t *params, uvm_mem_t **mem_ou
|
||||
NV_STATUS status;
|
||||
NvU64 physical_size;
|
||||
uvm_mem_t *mem = NULL;
|
||||
bool is_unprotected = false;
|
||||
|
||||
UVM_ASSERT(params->size > 0);
|
||||
|
||||
@ -648,12 +638,7 @@ NV_STATUS uvm_mem_alloc(const uvm_mem_alloc_params_t *params, uvm_mem_t **mem_ou
|
||||
physical_size = UVM_ALIGN_UP(mem->size, mem->chunk_size);
|
||||
mem->chunks_count = physical_size / mem->chunk_size;
|
||||
|
||||
if (params->is_unprotected)
|
||||
UVM_ASSERT(uvm_mem_is_vidmem(mem));
|
||||
|
||||
is_unprotected = params->is_unprotected;
|
||||
|
||||
status = mem_alloc_chunks(mem, params->mm, params->zero, is_unprotected);
|
||||
status = mem_alloc_chunks(mem, params->mm, params->zero);
|
||||
if (status != NV_OK)
|
||||
goto error;
|
||||
|
||||
@ -1050,7 +1035,7 @@ static NV_STATUS mem_map_gpu(uvm_mem_t *mem,
|
||||
uvm_page_table_range_vec_t **range_vec)
|
||||
{
|
||||
NV_STATUS status;
|
||||
NvU32 page_size;
|
||||
NvU64 page_size;
|
||||
uvm_pmm_alloc_flags_t pmm_flags = UVM_PMM_ALLOC_FLAGS_EVICT;
|
||||
|
||||
uvm_mem_pte_maker_data_t pte_maker_data = {
|
||||
@ -1059,7 +1044,7 @@ static NV_STATUS mem_map_gpu(uvm_mem_t *mem,
|
||||
};
|
||||
|
||||
page_size = mem_pick_gpu_page_size(mem, gpu, tree);
|
||||
UVM_ASSERT_MSG(uvm_mmu_page_size_supported(tree, page_size), "page_size 0x%x\n", page_size);
|
||||
UVM_ASSERT_MSG(uvm_mmu_page_size_supported(tree, page_size), "page_size 0x%llx\n", page_size);
|
||||
|
||||
// When the Confidential Computing feature is enabled, DMA allocations are
|
||||
// majoritarily allocated and managed by a per-GPU DMA buffer pool
|
||||
|
@ -126,12 +126,7 @@ typedef struct
|
||||
//
|
||||
// CPU mappings will always use PAGE_SIZE, so the physical allocation chunk
|
||||
// has to be aligned to PAGE_SIZE.
|
||||
NvU32 page_size;
|
||||
|
||||
// The protection flag is only observed for vidmem allocations when CC is
|
||||
// enabled. If set to true, the allocation returns unprotected vidmem;
|
||||
// otherwise, the allocation returns protected vidmem.
|
||||
bool is_unprotected;
|
||||
NvU64 page_size;
|
||||
|
||||
// If true, the allocation is zeroed (scrubbed).
|
||||
bool zero;
|
||||
@ -199,7 +194,7 @@ struct uvm_mem_struct
|
||||
size_t chunks_count;
|
||||
|
||||
// Size of each physical chunk (vidmem) or CPU page (sysmem)
|
||||
NvU32 chunk_size;
|
||||
NvU64 chunk_size;
|
||||
|
||||
// Size of the allocation
|
||||
NvU64 size;
|
||||
|
@ -153,7 +153,7 @@ static NV_STATUS check_accessible_from_gpu(uvm_gpu_t *gpu, uvm_mem_t *mem)
|
||||
|
||||
for (i = 0; i < verif_size / sizeof(*sys_verif); ++i) {
|
||||
if (sys_verif[i] != mem->size + i) {
|
||||
UVM_TEST_PRINT("Verif failed for %zd = 0x%llx instead of 0x%llx, verif_size=0x%llx mem(size=0x%llx, page_size=%u, processor=%u)\n",
|
||||
UVM_TEST_PRINT("Verif failed for %zd = 0x%llx instead of 0x%llx, verif_size=0x%llx mem(size=0x%llx, page_size=%llu, processor=%u)\n",
|
||||
i,
|
||||
sys_verif[i],
|
||||
(NvU64)(verif_size + i),
|
||||
@ -241,7 +241,7 @@ static NV_STATUS test_map_cpu(uvm_mem_t *mem)
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS test_alloc_sysmem(uvm_va_space_t *va_space, NvU32 page_size, size_t size, uvm_mem_t **mem_out)
|
||||
static NV_STATUS test_alloc_sysmem(uvm_va_space_t *va_space, NvU64 page_size, size_t size, uvm_mem_t **mem_out)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_mem_t *mem;
|
||||
@ -299,7 +299,7 @@ error:
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS test_alloc_vidmem(uvm_gpu_t *gpu, NvU32 page_size, size_t size, uvm_mem_t **mem_out)
|
||||
static NV_STATUS test_alloc_vidmem(uvm_gpu_t *gpu, NvU64 page_size, size_t size, uvm_mem_t **mem_out)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_mem_t *mem;
|
||||
@ -334,7 +334,7 @@ error:
|
||||
return status;
|
||||
}
|
||||
|
||||
static bool should_test_page_size(size_t alloc_size, NvU32 page_size)
|
||||
static bool should_test_page_size(size_t alloc_size, NvU64 page_size)
|
||||
{
|
||||
if (g_uvm_global.num_simulated_devices == 0)
|
||||
return true;
|
||||
@ -359,7 +359,7 @@ static NV_STATUS test_all(uvm_va_space_t *va_space)
|
||||
// size on pre-Pascal GPUs with 128K big page size.
|
||||
// Ampere+ also supports 512M PTEs, but since UVM's maximum chunk size is
|
||||
// 2M, we don't test for this page size.
|
||||
static const NvU32 cpu_chunk_sizes = PAGE_SIZE | UVM_PAGE_SIZE_64K | UVM_PAGE_SIZE_128K | UVM_PAGE_SIZE_2M;
|
||||
static const NvU64 cpu_chunk_sizes = PAGE_SIZE | UVM_PAGE_SIZE_64K | UVM_PAGE_SIZE_128K | UVM_PAGE_SIZE_2M;
|
||||
|
||||
// All supported page sizes will be tested, CPU has the most with 4 and +1
|
||||
// for the default.
|
||||
@ -494,41 +494,6 @@ done:
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS test_basic_vidmem_unprotected(uvm_gpu_t *gpu)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_mem_t *mem = NULL;
|
||||
|
||||
uvm_mem_alloc_params_t params = { 0 };
|
||||
params.size = UVM_PAGE_SIZE_4K;
|
||||
params.backing_gpu = gpu;
|
||||
params.page_size = UVM_PAGE_SIZE_4K;
|
||||
|
||||
// If CC is enabled, the protection flag is observed. Because currently all
|
||||
// vidmem is in the protected region, the allocation should succeed.
|
||||
//
|
||||
// If CC is disabled, the protection flag is ignored.
|
||||
params.is_unprotected = false;
|
||||
TEST_NV_CHECK_RET(uvm_mem_alloc(¶ms, &mem));
|
||||
|
||||
uvm_mem_free(mem);
|
||||
mem = NULL;
|
||||
|
||||
// If CC is enabled, the allocation should fail because currently the
|
||||
// unprotected region is empty.
|
||||
//
|
||||
// If CC is disabled, the behavior should be identical to that of a
|
||||
// protected allocation.
|
||||
params.is_unprotected = true;
|
||||
if (g_uvm_global.conf_computing_enabled)
|
||||
TEST_CHECK_RET(uvm_mem_alloc(¶ms, &mem) == NV_ERR_NO_MEMORY);
|
||||
else
|
||||
TEST_NV_CHECK_RET(uvm_mem_alloc(¶ms, &mem));
|
||||
|
||||
uvm_mem_free(mem);
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS test_basic_sysmem(void)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
@ -613,7 +578,6 @@ static NV_STATUS test_basic(uvm_va_space_t *va_space)
|
||||
for_each_va_space_gpu(gpu, va_space) {
|
||||
TEST_NV_CHECK_RET(test_basic_vidmem(gpu));
|
||||
TEST_NV_CHECK_RET(test_basic_sysmem_dma(gpu));
|
||||
TEST_NV_CHECK_RET(test_basic_vidmem_unprotected(gpu));
|
||||
TEST_NV_CHECK_RET(test_basic_dma_pool(gpu));
|
||||
}
|
||||
|
||||
|
@ -153,20 +153,17 @@ static NV_STATUS phys_mem_allocate_sysmem(uvm_page_tree_t *tree, NvLength size,
|
||||
// - UVM_APERTURE_VID biggest page size on vidmem mappings
|
||||
// - UVM_APERTURE_SYS biggest page size on sysmem mappings
|
||||
// - UVM_APERTURE_PEER_0-7 biggest page size on peer mappings
|
||||
static NvU32 mmu_biggest_page_size(uvm_page_tree_t *tree, uvm_aperture_t aperture)
|
||||
static NvU64 mmu_biggest_page_size(uvm_page_tree_t *tree, uvm_aperture_t aperture)
|
||||
{
|
||||
UVM_ASSERT(aperture < UVM_APERTURE_DEFAULT);
|
||||
|
||||
// There may be scenarios where the GMMU must use a subset of the supported
|
||||
// page sizes, e.g., to comply with the vMMU supported page sizes due to
|
||||
// segmentation sizes.
|
||||
if (aperture == UVM_APERTURE_VID) {
|
||||
UVM_ASSERT(tree->gpu->mem_info.max_vidmem_page_size <= NV_U32_MAX);
|
||||
return (NvU32) tree->gpu->mem_info.max_vidmem_page_size;
|
||||
}
|
||||
else {
|
||||
return 1 << __fls(tree->hal->page_sizes());
|
||||
}
|
||||
if (aperture == UVM_APERTURE_VID)
|
||||
return uvm_mmu_biggest_page_size_up_to(tree, tree->gpu->mem_info.max_vidmem_page_size);
|
||||
|
||||
return 1ULL << __fls(tree->hal->page_sizes());
|
||||
}
|
||||
|
||||
static NV_STATUS phys_mem_allocate_vidmem(uvm_page_tree_t *tree,
|
||||
@ -254,7 +251,7 @@ static void phys_mem_deallocate(uvm_page_tree_t *tree, uvm_mmu_page_table_alloc_
|
||||
}
|
||||
|
||||
static void page_table_range_init(uvm_page_table_range_t *range,
|
||||
NvU32 page_size,
|
||||
NvU64 page_size,
|
||||
uvm_page_directory_t *dir,
|
||||
NvU32 start_index,
|
||||
NvU32 end_index)
|
||||
@ -444,9 +441,9 @@ static void pde_fill(uvm_page_tree_t *tree,
|
||||
pde_fill_cpu(tree, directory, start_index, pde_count, phys_addr);
|
||||
}
|
||||
|
||||
static void phys_mem_init(uvm_page_tree_t *tree, NvU32 page_size, uvm_page_directory_t *dir, uvm_push_t *push)
|
||||
static void phys_mem_init(uvm_page_tree_t *tree, NvU64 page_size, uvm_page_directory_t *dir, uvm_push_t *push)
|
||||
{
|
||||
NvU32 entries_count = uvm_mmu_page_tree_entries(tree, dir->depth, page_size);
|
||||
NvU64 entries_count = uvm_mmu_page_tree_entries(tree, dir->depth, page_size);
|
||||
NvU8 max_pde_depth = tree->hal->page_table_depth(UVM_PAGE_SIZE_AGNOSTIC) - 1;
|
||||
|
||||
// Passing in NULL for the phys_allocs will mark the child entries as
|
||||
@ -497,7 +494,7 @@ static void phys_mem_init(uvm_page_tree_t *tree, NvU32 page_size, uvm_page_direc
|
||||
}
|
||||
|
||||
static uvm_page_directory_t *allocate_directory(uvm_page_tree_t *tree,
|
||||
NvU32 page_size,
|
||||
NvU64 page_size,
|
||||
NvU32 depth,
|
||||
uvm_pmm_alloc_flags_t pmm_flags)
|
||||
{
|
||||
@ -546,7 +543,7 @@ static inline NvU32 entry_index_from_vaddr(NvU64 vaddr, NvU32 addr_bit_shift, Nv
|
||||
return (NvU32)((vaddr >> addr_bit_shift) & mask);
|
||||
}
|
||||
|
||||
static inline NvU32 index_to_entry(uvm_mmu_mode_hal_t *hal, NvU32 entry_index, NvU32 depth, NvU32 page_size)
|
||||
static inline NvU32 index_to_entry(uvm_mmu_mode_hal_t *hal, NvU32 entry_index, NvU32 depth, NvU64 page_size)
|
||||
{
|
||||
return hal->entries_per_index(depth) * entry_index + hal->entry_offset(depth, page_size);
|
||||
}
|
||||
@ -583,7 +580,7 @@ static void pde_write(uvm_page_tree_t *tree,
|
||||
pde_fill(tree, dir, entry_index, 1, phys_allocs, push);
|
||||
}
|
||||
|
||||
static void host_pde_clear(uvm_page_tree_t *tree, uvm_page_directory_t *dir, NvU32 entry_index, NvU32 page_size)
|
||||
static void host_pde_clear(uvm_page_tree_t *tree, uvm_page_directory_t *dir, NvU32 entry_index, NvU64 page_size)
|
||||
{
|
||||
UVM_ASSERT(dir->ref_count > 0);
|
||||
|
||||
@ -594,35 +591,38 @@ static void host_pde_clear(uvm_page_tree_t *tree, uvm_page_directory_t *dir, NvU
|
||||
static void pde_clear(uvm_page_tree_t *tree,
|
||||
uvm_page_directory_t *dir,
|
||||
NvU32 entry_index,
|
||||
NvU32 page_size,
|
||||
NvU64 page_size,
|
||||
uvm_push_t *push)
|
||||
{
|
||||
host_pde_clear(tree, dir, entry_index, page_size);
|
||||
pde_write(tree, dir, entry_index, false, push);
|
||||
}
|
||||
|
||||
static uvm_chunk_sizes_mask_t allocation_sizes_for_big_page_size(uvm_parent_gpu_t *parent_gpu, NvU32 big_page_size)
|
||||
static uvm_chunk_sizes_mask_t allocation_sizes_for_big_page_size(uvm_parent_gpu_t *parent_gpu, NvU64 big_page_size)
|
||||
{
|
||||
uvm_chunk_sizes_mask_t alloc_sizes = 0;
|
||||
uvm_mmu_mode_hal_t *hal = parent_gpu->arch_hal->mmu_mode_hal(big_page_size);
|
||||
unsigned long page_sizes, page_size_log2;
|
||||
uvm_chunk_sizes_mask_t alloc_sizes;
|
||||
|
||||
if (hal != NULL) {
|
||||
unsigned long page_size_log2;
|
||||
unsigned long page_sizes = hal->page_sizes();
|
||||
BUILD_BUG_ON(sizeof(hal->page_sizes()) > sizeof(page_sizes));
|
||||
if (hal == NULL)
|
||||
return 0;
|
||||
|
||||
for_each_set_bit(page_size_log2, &page_sizes, BITS_PER_LONG) {
|
||||
NvU32 i;
|
||||
NvU32 page_size = (NvU32)(1ULL << page_size_log2);
|
||||
for (i = 0; i <= hal->page_table_depth(page_size); i++)
|
||||
alloc_sizes |= hal->allocation_size(i, page_size);
|
||||
}
|
||||
page_sizes = hal->page_sizes();
|
||||
alloc_sizes = 0;
|
||||
|
||||
BUILD_BUG_ON(sizeof(hal->page_sizes()) > sizeof(page_sizes));
|
||||
|
||||
for_each_set_bit(page_size_log2, &page_sizes, BITS_PER_LONG) {
|
||||
NvU32 i;
|
||||
NvU64 page_size = 1ULL << page_size_log2;
|
||||
for (i = 0; i <= hal->page_table_depth(page_size); i++)
|
||||
alloc_sizes |= hal->allocation_size(i, page_size);
|
||||
}
|
||||
|
||||
return alloc_sizes;
|
||||
}
|
||||
|
||||
static NvU32 page_sizes_for_big_page_size(uvm_parent_gpu_t *parent_gpu, NvU32 big_page_size)
|
||||
static NvU64 page_sizes_for_big_page_size(uvm_parent_gpu_t *parent_gpu, NvU64 big_page_size)
|
||||
{
|
||||
uvm_mmu_mode_hal_t *hal = parent_gpu->arch_hal->mmu_mode_hal(big_page_size);
|
||||
|
||||
@ -662,7 +662,7 @@ static NV_STATUS page_tree_end_and_wait(uvm_page_tree_t *tree, uvm_push_t *push)
|
||||
}
|
||||
|
||||
static NV_STATUS write_gpu_state_cpu(uvm_page_tree_t *tree,
|
||||
NvU32 page_size,
|
||||
NvU64 page_size,
|
||||
NvS32 invalidate_depth,
|
||||
NvU32 used_count,
|
||||
uvm_page_directory_t **dirs_used)
|
||||
@ -713,7 +713,7 @@ static NV_STATUS write_gpu_state_cpu(uvm_page_tree_t *tree,
|
||||
}
|
||||
|
||||
static NV_STATUS write_gpu_state_gpu(uvm_page_tree_t *tree,
|
||||
NvU32 page_size,
|
||||
NvU64 page_size,
|
||||
NvS32 invalidate_depth,
|
||||
NvU32 used_count,
|
||||
uvm_page_directory_t **dirs_used)
|
||||
@ -805,7 +805,7 @@ static NV_STATUS write_gpu_state_gpu(uvm_page_tree_t *tree,
|
||||
|
||||
// initialize new page tables and insert them into the tree
|
||||
static NV_STATUS write_gpu_state(uvm_page_tree_t *tree,
|
||||
NvU32 page_size,
|
||||
NvU64 page_size,
|
||||
NvS32 invalidate_depth,
|
||||
NvU32 used_count,
|
||||
uvm_page_directory_t **dirs_used)
|
||||
@ -842,7 +842,7 @@ static void free_unused_directories(uvm_page_tree_t *tree,
|
||||
}
|
||||
}
|
||||
|
||||
static NV_STATUS allocate_page_table(uvm_page_tree_t *tree, NvU32 page_size, uvm_mmu_page_table_alloc_t *out)
|
||||
static NV_STATUS allocate_page_table(uvm_page_tree_t *tree, NvU64 page_size, uvm_mmu_page_table_alloc_t *out)
|
||||
{
|
||||
NvU32 depth = tree->hal->page_table_depth(page_size);
|
||||
NvLength alloc_size = tree->hal->allocation_size(depth, page_size);
|
||||
@ -871,7 +871,7 @@ static NV_STATUS page_tree_ats_init(uvm_page_tree_t *tree)
|
||||
{
|
||||
NV_STATUS status;
|
||||
NvU64 min_va_upper, max_va_lower;
|
||||
NvU32 page_size;
|
||||
NvU64 page_size;
|
||||
|
||||
if (!page_tree_ats_init_required(tree))
|
||||
return NV_OK;
|
||||
@ -1090,7 +1090,7 @@ static void page_tree_set_location(uvm_page_tree_t *tree, uvm_aperture_t locatio
|
||||
NV_STATUS uvm_page_tree_init(uvm_gpu_t *gpu,
|
||||
uvm_gpu_va_space_t *gpu_va_space,
|
||||
uvm_page_tree_type_t type,
|
||||
NvU32 big_page_size,
|
||||
NvU64 big_page_size,
|
||||
uvm_aperture_t location,
|
||||
uvm_page_tree_t *tree)
|
||||
{
|
||||
@ -1110,7 +1110,7 @@ NV_STATUS uvm_page_tree_init(uvm_gpu_t *gpu,
|
||||
tree->gpu_va_space = gpu_va_space;
|
||||
tree->big_page_size = big_page_size;
|
||||
|
||||
UVM_ASSERT(gpu->mem_info.max_vidmem_page_size & tree->hal->page_sizes());
|
||||
UVM_ASSERT(uvm_mmu_page_size_supported(tree, big_page_size));
|
||||
|
||||
page_tree_set_location(tree, location);
|
||||
|
||||
@ -1347,7 +1347,7 @@ NV_STATUS uvm_page_tree_wait(uvm_page_tree_t *tree)
|
||||
}
|
||||
|
||||
static NV_STATUS try_get_ptes(uvm_page_tree_t *tree,
|
||||
NvU32 page_size,
|
||||
NvU64 page_size,
|
||||
NvU64 start,
|
||||
NvLength size,
|
||||
uvm_page_table_range_t *range,
|
||||
@ -1379,7 +1379,7 @@ static NV_STATUS try_get_ptes(uvm_page_tree_t *tree,
|
||||
// This algorithm will work with unaligned ranges, but the caller's intent
|
||||
// is unclear
|
||||
UVM_ASSERT_MSG(start % page_size == 0 && size % page_size == 0,
|
||||
"start 0x%llx size 0x%zx page_size 0x%x\n",
|
||||
"start 0x%llx size 0x%zx page_size 0x%llx\n",
|
||||
start,
|
||||
(size_t)size,
|
||||
page_size);
|
||||
@ -1448,7 +1448,7 @@ static NV_STATUS map_remap(uvm_page_tree_t *tree, NvU64 start, NvLength size, uv
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_push_t push;
|
||||
NvU32 page_sizes;
|
||||
NvU64 page_sizes;
|
||||
uvm_mmu_page_table_alloc_t *phys_alloc[1];
|
||||
|
||||
// TODO: Bug 2734399
|
||||
@ -1460,7 +1460,7 @@ static NV_STATUS map_remap(uvm_page_tree_t *tree, NvU64 start, NvLength size, uv
|
||||
status = page_tree_begin_acquire(tree,
|
||||
&tree->tracker,
|
||||
&push,
|
||||
"map remap: [0x%llx, 0x%llx), page_size: %d",
|
||||
"map remap: [0x%llx, 0x%llx), page_size: %lld",
|
||||
start,
|
||||
start + size,
|
||||
range->page_size);
|
||||
@ -1500,7 +1500,7 @@ static NV_STATUS map_remap(uvm_page_tree_t *tree, NvU64 start, NvLength size, uv
|
||||
}
|
||||
|
||||
NV_STATUS uvm_page_tree_get_ptes_async(uvm_page_tree_t *tree,
|
||||
NvU32 page_size,
|
||||
NvU64 page_size,
|
||||
NvU64 start,
|
||||
NvLength size,
|
||||
uvm_pmm_alloc_flags_t pmm_flags,
|
||||
@ -1545,7 +1545,7 @@ NV_STATUS uvm_page_tree_get_ptes_async(uvm_page_tree_t *tree,
|
||||
}
|
||||
|
||||
NV_STATUS uvm_page_tree_get_ptes(uvm_page_tree_t *tree,
|
||||
NvU32 page_size,
|
||||
NvU64 page_size,
|
||||
NvU64 start,
|
||||
NvLength size,
|
||||
uvm_pmm_alloc_flags_t pmm_flags,
|
||||
@ -1596,7 +1596,7 @@ void uvm_page_table_range_shrink(uvm_page_tree_t *tree, uvm_page_table_range_t *
|
||||
}
|
||||
|
||||
NV_STATUS uvm_page_tree_get_entry(uvm_page_tree_t *tree,
|
||||
NvU32 page_size,
|
||||
NvU64 page_size,
|
||||
NvU64 start,
|
||||
uvm_pmm_alloc_flags_t pmm_flags,
|
||||
uvm_page_table_range_t *single)
|
||||
@ -1621,7 +1621,7 @@ void uvm_page_tree_clear_pde(uvm_page_tree_t *tree, uvm_page_table_range_t *sing
|
||||
static NV_STATUS poison_ptes(uvm_page_tree_t *tree,
|
||||
uvm_page_directory_t *pte_dir,
|
||||
uvm_page_directory_t *parent,
|
||||
NvU32 page_size)
|
||||
NvU64 page_size)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_push_t push;
|
||||
@ -1633,7 +1633,7 @@ static NV_STATUS poison_ptes(uvm_page_tree_t *tree,
|
||||
// The flat mappings should always be set up when executing this path
|
||||
UVM_ASSERT(!uvm_mmu_use_cpu(tree));
|
||||
|
||||
status = page_tree_begin_acquire(tree, &tree->tracker, &push, "Poisoning child table of page size %u", page_size);
|
||||
status = page_tree_begin_acquire(tree, &tree->tracker, &push, "Poisoning child table of page size %llu", page_size);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
@ -1660,7 +1660,7 @@ static NV_STATUS poison_ptes(uvm_page_tree_t *tree,
|
||||
}
|
||||
|
||||
NV_STATUS uvm_page_tree_alloc_table(uvm_page_tree_t *tree,
|
||||
NvU32 page_size,
|
||||
NvU64 page_size,
|
||||
uvm_pmm_alloc_flags_t pmm_flags,
|
||||
uvm_page_table_range_t *single,
|
||||
uvm_page_table_range_t *children)
|
||||
@ -1768,7 +1768,7 @@ static size_t range_vec_calc_range_index(uvm_page_table_range_vec_t *range_vec,
|
||||
NV_STATUS uvm_page_table_range_vec_init(uvm_page_tree_t *tree,
|
||||
NvU64 start,
|
||||
NvU64 size,
|
||||
NvU32 page_size,
|
||||
NvU64 page_size,
|
||||
uvm_pmm_alloc_flags_t pmm_flags,
|
||||
uvm_page_table_range_vec_t *range_vec)
|
||||
{
|
||||
@ -1776,8 +1776,8 @@ NV_STATUS uvm_page_table_range_vec_init(uvm_page_tree_t *tree,
|
||||
size_t i;
|
||||
|
||||
UVM_ASSERT(size != 0);
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(start, page_size), "start 0x%llx page_size 0x%x\n", start, page_size);
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(size, page_size), "size 0x%llx page_size 0x%x\n", size, page_size);
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(start, page_size), "start 0x%llx page_size 0x%llx\n", start, page_size);
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(size, page_size), "size 0x%llx page_size 0x%llx\n", size, page_size);
|
||||
|
||||
range_vec->tree = tree;
|
||||
range_vec->page_size = page_size;
|
||||
@ -1826,7 +1826,7 @@ out:
|
||||
NV_STATUS uvm_page_table_range_vec_create(uvm_page_tree_t *tree,
|
||||
NvU64 start,
|
||||
NvU64 size,
|
||||
NvU32 page_size,
|
||||
NvU64 page_size,
|
||||
uvm_pmm_alloc_flags_t pmm_flags,
|
||||
uvm_page_table_range_vec_t **range_vec_out)
|
||||
{
|
||||
@ -1952,7 +1952,7 @@ static NV_STATUS uvm_page_table_range_vec_clear_ptes_gpu(uvm_page_table_range_ve
|
||||
size_t i;
|
||||
uvm_page_tree_t *tree = range_vec->tree;
|
||||
uvm_gpu_t *gpu = tree->gpu;
|
||||
NvU32 page_size = range_vec->page_size;
|
||||
NvU64 page_size = range_vec->page_size;
|
||||
NvU32 entry_size = uvm_mmu_pte_size(tree, page_size);
|
||||
NvU64 invalid_pte = 0;
|
||||
uvm_push_t push;
|
||||
@ -2237,7 +2237,7 @@ static NV_STATUS create_identity_mapping(uvm_gpu_t *gpu,
|
||||
NvU64 size,
|
||||
uvm_aperture_t aperture,
|
||||
NvU64 phys_offset,
|
||||
NvU32 page_size,
|
||||
NvU64 page_size,
|
||||
uvm_pmm_alloc_flags_t pmm_flags)
|
||||
{
|
||||
NV_STATUS status;
|
||||
@ -2312,7 +2312,7 @@ bool uvm_mmu_parent_gpu_needs_dynamic_sysmem_mapping(uvm_parent_gpu_t *parent_gp
|
||||
|
||||
NV_STATUS create_static_vidmem_mapping(uvm_gpu_t *gpu)
|
||||
{
|
||||
NvU32 page_size;
|
||||
NvU64 page_size;
|
||||
NvU64 size;
|
||||
uvm_aperture_t aperture = UVM_APERTURE_VID;
|
||||
NvU64 phys_offset = 0;
|
||||
@ -2351,7 +2351,7 @@ static void destroy_static_vidmem_mapping(uvm_gpu_t *gpu)
|
||||
|
||||
NV_STATUS uvm_mmu_create_peer_identity_mappings(uvm_gpu_t *gpu, uvm_gpu_t *peer)
|
||||
{
|
||||
NvU32 page_size;
|
||||
NvU64 page_size;
|
||||
NvU64 size;
|
||||
uvm_aperture_t aperture;
|
||||
NvU64 phys_offset;
|
||||
@ -2535,7 +2535,7 @@ static void root_chunk_mapping_destroy(uvm_gpu_t *gpu, uvm_gpu_root_chunk_mappin
|
||||
uvm_push_t push;
|
||||
NvU32 entry_size;
|
||||
uvm_pte_batch_t pte_batch;
|
||||
NvU32 page_size;
|
||||
NvU64 page_size;
|
||||
NvU64 size;
|
||||
NvU64 invalid_pte;
|
||||
uvm_page_table_range_t *range = root_chunk_mapping->range;
|
||||
@ -2585,7 +2585,7 @@ static NV_STATUS root_chunk_mapping_create(uvm_gpu_t *gpu, uvm_gpu_root_chunk_ma
|
||||
uvm_push_t push;
|
||||
NvU64 pte_bits;
|
||||
NvU32 entry_size;
|
||||
NvU32 page_size = UVM_CHUNK_SIZE_MAX;
|
||||
NvU64 page_size = UVM_CHUNK_SIZE_MAX;
|
||||
NvU64 size = UVM_CHUNK_SIZE_MAX;
|
||||
|
||||
range = uvm_kvmalloc_zero(sizeof(*range));
|
||||
@ -2852,7 +2852,7 @@ NV_STATUS uvm_mmu_sysmem_map(uvm_gpu_t *gpu, NvU64 pa, NvU64 size)
|
||||
if (sysmem_mapping->range_vec == NULL) {
|
||||
uvm_gpu_address_t virtual_address = uvm_parent_gpu_address_virtual_from_sysmem_phys(gpu->parent, curr_pa);
|
||||
NvU64 phys_offset = curr_pa;
|
||||
NvU32 page_size = mmu_biggest_page_size(&gpu->address_space_tree, UVM_APERTURE_SYS);
|
||||
NvU64 page_size = mmu_biggest_page_size(&gpu->address_space_tree, UVM_APERTURE_SYS);
|
||||
uvm_pmm_alloc_flags_t pmm_flags;
|
||||
|
||||
// No eviction is requested when allocating the page tree storage,
|
||||
|
@ -208,7 +208,7 @@ struct uvm_mmu_mode_hal_struct
|
||||
// This is an optimization which reduces TLB pressure, reduces the number of
|
||||
// TLB invalidates we must issue, and means we don't have to initialize the
|
||||
// 4k PTEs which are covered by big PTEs since the MMU will never read them.
|
||||
NvU64 (*unmapped_pte)(NvU32 page_size);
|
||||
NvU64 (*unmapped_pte)(NvU64 page_size);
|
||||
|
||||
// Bit pattern used for debug purposes to clobber PTEs which ought to be
|
||||
// unused. In practice this will generate a PRIV violation or a physical
|
||||
@ -234,23 +234,23 @@ struct uvm_mmu_mode_hal_struct
|
||||
// For dual PDEs, this is ether 1 or 0, depending on the page size.
|
||||
// This is used to index the host copy only. GPU PDEs are always entirely
|
||||
// re-written using make_pde.
|
||||
NvLength (*entry_offset)(NvU32 depth, NvU32 page_size);
|
||||
NvLength (*entry_offset)(NvU32 depth, NvU64 page_size);
|
||||
|
||||
// number of virtual address bits used to index the directory/table at a
|
||||
// given depth
|
||||
NvU32 (*index_bits)(NvU32 depth, NvU32 page_size);
|
||||
NvU32 (*index_bits)(NvU32 depth, NvU64 page_size);
|
||||
|
||||
// total number of bits that represent the virtual address space
|
||||
NvU32 (*num_va_bits)(void);
|
||||
|
||||
// the size, in bytes, of a directory/table at a given depth.
|
||||
NvLength (*allocation_size)(NvU32 depth, NvU32 page_size);
|
||||
NvLength (*allocation_size)(NvU32 depth, NvU64 page_size);
|
||||
|
||||
// the depth which corresponds to the page tables
|
||||
NvU32 (*page_table_depth)(NvU32 page_size);
|
||||
NvU32 (*page_table_depth)(NvU64 page_size);
|
||||
|
||||
// bitwise-or of supported page sizes
|
||||
NvU32 (*page_sizes)(void);
|
||||
NvU64 (*page_sizes)(void);
|
||||
};
|
||||
|
||||
struct uvm_page_table_range_struct
|
||||
@ -258,7 +258,7 @@ struct uvm_page_table_range_struct
|
||||
uvm_page_directory_t *table;
|
||||
NvU32 start_index;
|
||||
NvU32 entry_count;
|
||||
NvU32 page_size;
|
||||
NvU64 page_size;
|
||||
};
|
||||
|
||||
typedef enum
|
||||
@ -275,7 +275,7 @@ struct uvm_page_tree_struct
|
||||
uvm_page_directory_t *root;
|
||||
uvm_mmu_mode_hal_t *hal;
|
||||
uvm_page_tree_type_t type;
|
||||
NvU32 big_page_size;
|
||||
NvU64 big_page_size;
|
||||
|
||||
// Pointer to the GPU VA space containing the page tree.
|
||||
// This pointer is set only for page trees of type
|
||||
@ -325,7 +325,7 @@ struct uvm_page_table_range_vec_struct
|
||||
NvU64 size;
|
||||
|
||||
// Page size used for all the page table ranges
|
||||
NvU32 page_size;
|
||||
NvU64 page_size;
|
||||
|
||||
// Page table ranges covering the VA
|
||||
uvm_page_table_range_t *ranges;
|
||||
@ -352,7 +352,7 @@ void uvm_mmu_init_gpu_peer_addresses(uvm_gpu_t *gpu);
|
||||
NV_STATUS uvm_page_tree_init(uvm_gpu_t *gpu,
|
||||
uvm_gpu_va_space_t *gpu_va_space,
|
||||
uvm_page_tree_type_t type,
|
||||
NvU32 big_page_size,
|
||||
NvU64 big_page_size,
|
||||
uvm_aperture_t location,
|
||||
uvm_page_tree_t *tree_out);
|
||||
|
||||
@ -374,7 +374,7 @@ void uvm_page_tree_deinit(uvm_page_tree_t *tree);
|
||||
// an existing range or change the size of an existing range, use
|
||||
// uvm_page_table_range_get_upper() and/or uvm_page_table_range_shrink().
|
||||
NV_STATUS uvm_page_tree_get_ptes(uvm_page_tree_t *tree,
|
||||
NvU32 page_size,
|
||||
NvU64 page_size,
|
||||
NvU64 start,
|
||||
NvLength size,
|
||||
uvm_pmm_alloc_flags_t pmm_flags,
|
||||
@ -384,7 +384,7 @@ NV_STATUS uvm_page_tree_get_ptes(uvm_page_tree_t *tree,
|
||||
//
|
||||
// All pending operations can be waited on with uvm_page_tree_wait().
|
||||
NV_STATUS uvm_page_tree_get_ptes_async(uvm_page_tree_t *tree,
|
||||
NvU32 page_size,
|
||||
NvU64 page_size,
|
||||
NvU64 start,
|
||||
NvLength size,
|
||||
uvm_pmm_alloc_flags_t pmm_flags,
|
||||
@ -395,7 +395,7 @@ NV_STATUS uvm_page_tree_get_ptes_async(uvm_page_tree_t *tree,
|
||||
// This is equivalent to calling uvm_page_tree_get_ptes() with size equal to
|
||||
// page_size.
|
||||
NV_STATUS uvm_page_tree_get_entry(uvm_page_tree_t *tree,
|
||||
NvU32 page_size,
|
||||
NvU64 page_size,
|
||||
NvU64 start,
|
||||
uvm_pmm_alloc_flags_t pmm_flags,
|
||||
uvm_page_table_range_t *single);
|
||||
@ -426,7 +426,7 @@ void uvm_page_tree_clear_pde(uvm_page_tree_t *tree, uvm_page_table_range_t *sing
|
||||
// It is the caller's responsibility to initialize the returned table before
|
||||
// calling uvm_page_tree_write_pde.
|
||||
NV_STATUS uvm_page_tree_alloc_table(uvm_page_tree_t *tree,
|
||||
NvU32 page_size,
|
||||
NvU64 page_size,
|
||||
uvm_pmm_alloc_flags_t pmm_flags,
|
||||
uvm_page_table_range_t *single,
|
||||
uvm_page_table_range_t *children);
|
||||
@ -480,7 +480,7 @@ static uvm_mmu_page_table_alloc_t *uvm_page_tree_pdb(uvm_page_tree_t *tree)
|
||||
NV_STATUS uvm_page_table_range_vec_init(uvm_page_tree_t *tree,
|
||||
NvU64 start,
|
||||
NvU64 size,
|
||||
NvU32 page_size,
|
||||
NvU64 page_size,
|
||||
uvm_pmm_alloc_flags_t pmm_flags,
|
||||
uvm_page_table_range_vec_t *range_vec);
|
||||
|
||||
@ -489,7 +489,7 @@ NV_STATUS uvm_page_table_range_vec_init(uvm_page_tree_t *tree,
|
||||
NV_STATUS uvm_page_table_range_vec_create(uvm_page_tree_t *tree,
|
||||
NvU64 start,
|
||||
NvU64 size,
|
||||
NvU32 page_size,
|
||||
NvU64 page_size,
|
||||
uvm_pmm_alloc_flags_t pmm_flags,
|
||||
uvm_page_table_range_vec_t **range_vec_out);
|
||||
|
||||
@ -601,12 +601,12 @@ void uvm_mmu_chunk_unmap(uvm_gpu_chunk_t *chunk, uvm_tracker_t *tracker);
|
||||
// uvm_parent_gpu_map_cpu_pages for the given GPU.
|
||||
NV_STATUS uvm_mmu_sysmem_map(uvm_gpu_t *gpu, NvU64 pa, NvU64 size);
|
||||
|
||||
static NvU64 uvm_mmu_page_tree_entries(uvm_page_tree_t *tree, NvU32 depth, NvU32 page_size)
|
||||
static NvU64 uvm_mmu_page_tree_entries(uvm_page_tree_t *tree, NvU32 depth, NvU64 page_size)
|
||||
{
|
||||
return 1ull << tree->hal->index_bits(depth, page_size);
|
||||
}
|
||||
|
||||
static NvU64 uvm_mmu_pde_coverage(uvm_page_tree_t *tree, NvU32 page_size)
|
||||
static NvU64 uvm_mmu_pde_coverage(uvm_page_tree_t *tree, NvU64 page_size)
|
||||
{
|
||||
NvU32 depth = tree->hal->page_table_depth(page_size);
|
||||
return uvm_mmu_page_tree_entries(tree, depth, page_size) * page_size;
|
||||
@ -615,21 +615,21 @@ static NvU64 uvm_mmu_pde_coverage(uvm_page_tree_t *tree, NvU32 page_size)
|
||||
// Page sizes supported by the GPU. Use uvm_mmu_biggest_page_size() to retrieve
|
||||
// the largest page size supported in a given system, which considers the GMMU
|
||||
// and vMMU page sizes and segment sizes.
|
||||
static bool uvm_mmu_page_size_supported(uvm_page_tree_t *tree, NvU32 page_size)
|
||||
static bool uvm_mmu_page_size_supported(uvm_page_tree_t *tree, NvU64 page_size)
|
||||
{
|
||||
UVM_ASSERT_MSG(is_power_of_2(page_size), "0x%x\n", page_size);
|
||||
UVM_ASSERT_MSG(is_power_of_2(page_size), "0x%llx\n", page_size);
|
||||
|
||||
return (tree->hal->page_sizes() & page_size) != 0;
|
||||
}
|
||||
|
||||
static NvU32 uvm_mmu_biggest_page_size_up_to(uvm_page_tree_t *tree, NvU32 max_page_size)
|
||||
static NvU64 uvm_mmu_biggest_page_size_up_to(uvm_page_tree_t *tree, NvU64 max_page_size)
|
||||
{
|
||||
NvU32 gpu_page_sizes = tree->hal->page_sizes();
|
||||
NvU32 smallest_gpu_page_size = gpu_page_sizes & ~(gpu_page_sizes - 1);
|
||||
NvU32 page_sizes;
|
||||
NvU32 page_size;
|
||||
NvU64 gpu_page_sizes = tree->hal->page_sizes();
|
||||
NvU64 smallest_gpu_page_size = gpu_page_sizes & ~(gpu_page_sizes - 1);
|
||||
NvU64 page_sizes;
|
||||
NvU64 page_size;
|
||||
|
||||
UVM_ASSERT_MSG(is_power_of_2(max_page_size), "0x%x\n", max_page_size);
|
||||
UVM_ASSERT_MSG(is_power_of_2(max_page_size), "0x%llx\n", max_page_size);
|
||||
|
||||
if (max_page_size < smallest_gpu_page_size)
|
||||
return 0;
|
||||
@ -638,14 +638,14 @@ static NvU32 uvm_mmu_biggest_page_size_up_to(uvm_page_tree_t *tree, NvU32 max_pa
|
||||
page_sizes = gpu_page_sizes & (max_page_size | (max_page_size - 1));
|
||||
|
||||
// And pick the biggest one of them
|
||||
page_size = 1 << __fls(page_sizes);
|
||||
page_size = 1ULL << __fls(page_sizes);
|
||||
|
||||
UVM_ASSERT_MSG(uvm_mmu_page_size_supported(tree, page_size), "page_size 0x%x", page_size);
|
||||
UVM_ASSERT_MSG(uvm_mmu_page_size_supported(tree, page_size), "page_size 0x%llx", page_size);
|
||||
|
||||
return page_size;
|
||||
}
|
||||
|
||||
static NvU32 uvm_mmu_pte_size(uvm_page_tree_t *tree, NvU32 page_size)
|
||||
static NvU32 uvm_mmu_pte_size(uvm_page_tree_t *tree, NvU64 page_size)
|
||||
{
|
||||
return tree->hal->entry_size(tree->hal->page_table_depth(page_size));
|
||||
}
|
||||
|
@ -96,7 +96,7 @@ typedef struct
|
||||
{
|
||||
NvU64 base;
|
||||
NvU64 size;
|
||||
NvU32 page_size;
|
||||
NvU64 page_size;
|
||||
NvU32 depth;
|
||||
uvm_membar_t membar;
|
||||
} fake_tlb_invalidate_t;
|
||||
@ -153,7 +153,7 @@ static void fake_tlb_invalidate_va(uvm_push_t *push,
|
||||
NvU32 depth,
|
||||
NvU64 base,
|
||||
NvU64 size,
|
||||
NvU32 page_size,
|
||||
NvU64 page_size,
|
||||
uvm_membar_t membar)
|
||||
{
|
||||
if (!g_fake_tlb_invals_tracking_enabled)
|
||||
@ -249,7 +249,11 @@ static bool assert_last_invalidate_all(NvU32 expected_depth, bool expected_memba
|
||||
}
|
||||
|
||||
static bool assert_invalidate_range_specific(fake_tlb_invalidate_t *inval,
|
||||
NvU64 base, NvU64 size, NvU32 page_size, NvU32 expected_depth, bool expected_membar)
|
||||
NvU64 base,
|
||||
NvU64 size,
|
||||
NvU64 page_size,
|
||||
NvU32 expected_depth,
|
||||
bool expected_membar)
|
||||
{
|
||||
UVM_ASSERT(g_fake_tlb_invals_tracking_enabled);
|
||||
|
||||
@ -271,7 +275,7 @@ static bool assert_invalidate_range_specific(fake_tlb_invalidate_t *inval,
|
||||
return false;
|
||||
}
|
||||
if (inval->page_size != page_size && inval->base != 0 && inval->size != -1) {
|
||||
UVM_TEST_PRINT("Expected page size %u, got %u instead\n", page_size, inval->page_size);
|
||||
UVM_TEST_PRINT("Expected page size %llu, got %llu instead\n", page_size, inval->page_size);
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -280,7 +284,7 @@ static bool assert_invalidate_range_specific(fake_tlb_invalidate_t *inval,
|
||||
|
||||
static bool assert_invalidate_range(NvU64 base,
|
||||
NvU64 size,
|
||||
NvU32 page_size,
|
||||
NvU64 page_size,
|
||||
bool allow_inval_all,
|
||||
NvU32 range_depth,
|
||||
NvU32 all_depth,
|
||||
@ -325,7 +329,7 @@ static NV_STATUS test_page_tree_init_kernel(uvm_gpu_t *gpu, NvU32 big_page_size,
|
||||
}
|
||||
|
||||
static NV_STATUS test_page_tree_get_ptes(uvm_page_tree_t *tree,
|
||||
NvU32 page_size,
|
||||
NvU64 page_size,
|
||||
NvU64 start,
|
||||
NvLength size,
|
||||
uvm_page_table_range_t *range)
|
||||
@ -341,7 +345,7 @@ static NV_STATUS test_page_tree_get_ptes(uvm_page_tree_t *tree,
|
||||
}
|
||||
|
||||
static NV_STATUS test_page_tree_get_entry(uvm_page_tree_t *tree,
|
||||
NvU32 page_size,
|
||||
NvU64 page_size,
|
||||
NvU64 start,
|
||||
uvm_page_table_range_t *single)
|
||||
{
|
||||
@ -355,14 +359,14 @@ static NV_STATUS test_page_tree_get_entry(uvm_page_tree_t *tree,
|
||||
}
|
||||
|
||||
static NV_STATUS test_page_tree_alloc_table(uvm_page_tree_t *tree,
|
||||
NvU32 page_size,
|
||||
NvU64 page_size,
|
||||
uvm_page_table_range_t *single,
|
||||
uvm_page_table_range_t *children)
|
||||
{
|
||||
return uvm_page_tree_alloc_table(tree, page_size, UVM_PMM_ALLOC_FLAGS_NONE, single, children);
|
||||
}
|
||||
|
||||
static bool assert_entry_no_invalidate(uvm_page_tree_t *tree, NvU32 page_size, NvU64 start)
|
||||
static bool assert_entry_no_invalidate(uvm_page_tree_t *tree, NvU64 page_size, NvU64 start)
|
||||
{
|
||||
uvm_page_table_range_t entry;
|
||||
bool result = true;
|
||||
@ -378,7 +382,7 @@ static bool assert_entry_no_invalidate(uvm_page_tree_t *tree, NvU32 page_size, N
|
||||
return assert_no_invalidate() && result;
|
||||
}
|
||||
|
||||
static bool assert_entry_invalidate(uvm_page_tree_t *tree, NvU32 page_size, NvU64 start, NvU32 depth, bool membar)
|
||||
static bool assert_entry_invalidate(uvm_page_tree_t *tree, NvU64 page_size, NvU64 start, NvU32 depth, bool membar)
|
||||
{
|
||||
uvm_page_table_range_t entry;
|
||||
bool result = true;
|
||||
@ -932,8 +936,8 @@ static NV_STATUS split_and_free(uvm_gpu_t *gpu)
|
||||
|
||||
static NV_STATUS check_sizes(uvm_gpu_t *gpu)
|
||||
{
|
||||
NvU32 user_sizes = UVM_PAGE_SIZE_2M;
|
||||
NvU32 kernel_sizes = UVM_PAGE_SIZE_4K | 256;
|
||||
NvU64 user_sizes = UVM_PAGE_SIZE_2M;
|
||||
NvU64 kernel_sizes = UVM_PAGE_SIZE_4K | 256;
|
||||
|
||||
if (UVM_PAGE_SIZE_64K >= PAGE_SIZE)
|
||||
user_sizes |= UVM_PAGE_SIZE_64K;
|
||||
@ -1161,7 +1165,7 @@ static NV_STATUS test_tlb_batch_invalidates_case(uvm_page_tree_t *tree,
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS test_tlb_batch_invalidates(uvm_gpu_t *gpu, const NvU32 *page_sizes, const NvU32 page_sizes_count)
|
||||
static NV_STATUS test_tlb_batch_invalidates(uvm_gpu_t *gpu, const NvU64 *page_sizes, const NvU32 page_sizes_count)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_page_tree_t tree;
|
||||
@ -1177,8 +1181,8 @@ static NV_STATUS test_tlb_batch_invalidates(uvm_gpu_t *gpu, const NvU32 *page_si
|
||||
for (min_index = 0; min_index < page_sizes_count; ++min_index) {
|
||||
for (max_index = min_index; max_index < page_sizes_count; ++max_index) {
|
||||
for (size_index = 0; size_index < ARRAY_SIZE(sizes_in_max_pages); ++size_index) {
|
||||
NvU32 min_page_size = page_sizes[min_index];
|
||||
NvU32 max_page_size = page_sizes[max_index];
|
||||
NvU64 min_page_size = page_sizes[min_index];
|
||||
NvU64 max_page_size = page_sizes[max_index];
|
||||
NvU64 size = (NvU64)sizes_in_max_pages[size_index] * max_page_size;
|
||||
|
||||
TEST_CHECK_GOTO(test_tlb_batch_invalidates_case(&tree,
|
||||
@ -1282,7 +1286,7 @@ static NV_STATUS test_range_vec_clear_ptes(uvm_page_table_range_vec_t *range_vec
|
||||
static NV_STATUS test_range_vec_create(uvm_page_tree_t *tree,
|
||||
NvU64 start,
|
||||
NvU64 size,
|
||||
NvU32 page_size,
|
||||
NvU64 page_size,
|
||||
uvm_page_table_range_vec_t **range_vec_out)
|
||||
{
|
||||
uvm_page_table_range_vec_t *range_vec;
|
||||
@ -1303,7 +1307,7 @@ static NV_STATUS test_range_vec_create(uvm_page_tree_t *tree,
|
||||
// Test page table range vector APIs.
|
||||
// Notably the test leaks the page_tree and range_vec on error as it's hard to
|
||||
// clean up on failure and the destructors would likely assert.
|
||||
static NV_STATUS test_range_vec(uvm_gpu_t *gpu, NvU32 big_page_size, NvU32 page_size)
|
||||
static NV_STATUS test_range_vec(uvm_gpu_t *gpu, NvU32 big_page_size, NvU64 page_size)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_page_tree_t tree;
|
||||
@ -1511,7 +1515,7 @@ static uvm_mmu_page_table_alloc_t fake_table_alloc(uvm_aperture_t aperture, NvU6
|
||||
// Queries the supported page sizes of the GPU(uvm_gpu_t) and fills the
|
||||
// page_sizes array up to MAX_NUM_PAGE_SIZE. Returns the number of elements in
|
||||
// page_sizes;
|
||||
size_t get_page_sizes(uvm_gpu_t *gpu, NvU32 *page_sizes)
|
||||
size_t get_page_sizes(uvm_gpu_t *gpu, NvU64 *page_sizes)
|
||||
{
|
||||
unsigned long page_size_log2;
|
||||
unsigned long page_sizes_bitvec;
|
||||
@ -1524,7 +1528,7 @@ size_t get_page_sizes(uvm_gpu_t *gpu, NvU32 *page_sizes)
|
||||
page_sizes_bitvec = hal->page_sizes();
|
||||
|
||||
for_each_set_bit(page_size_log2, &page_sizes_bitvec, BITS_PER_LONG) {
|
||||
NvU32 page_size = (NvU32)(1ULL << page_size_log2);
|
||||
NvU64 page_size = 1ULL << page_size_log2;
|
||||
UVM_ASSERT(count < MAX_NUM_PAGE_SIZES);
|
||||
page_sizes[count++] = page_size;
|
||||
}
|
||||
@ -1572,7 +1576,7 @@ typedef NV_STATUS (*entry_test_page_size_func)(uvm_gpu_t *gpu, size_t page_size)
|
||||
|
||||
static NV_STATUS entry_test_maxwell(uvm_gpu_t *gpu)
|
||||
{
|
||||
static const NvU32 big_page_sizes[] = {UVM_PAGE_SIZE_64K, UVM_PAGE_SIZE_128K};
|
||||
static const NvU64 big_page_sizes[] = {UVM_PAGE_SIZE_64K, UVM_PAGE_SIZE_128K};
|
||||
NvU64 pde_bits;
|
||||
uvm_mmu_page_table_alloc_t *phys_allocs[2];
|
||||
uvm_mmu_page_table_alloc_t alloc_sys = fake_table_alloc(UVM_APERTURE_SYS, 0x9999999000LL);
|
||||
@ -1663,7 +1667,7 @@ static NV_STATUS entry_test_maxwell(uvm_gpu_t *gpu)
|
||||
|
||||
static NV_STATUS entry_test_pascal(uvm_gpu_t *gpu, entry_test_page_size_func entry_test_page_size)
|
||||
{
|
||||
NvU32 page_sizes[MAX_NUM_PAGE_SIZES];
|
||||
NvU64 page_sizes[MAX_NUM_PAGE_SIZES];
|
||||
NvU64 pde_bits[2];
|
||||
size_t i, num_page_sizes;
|
||||
uvm_mmu_page_table_alloc_t *phys_allocs[2] = {NULL, NULL};
|
||||
@ -1759,7 +1763,7 @@ static NV_STATUS entry_test_pascal(uvm_gpu_t *gpu, entry_test_page_size_func ent
|
||||
|
||||
static NV_STATUS entry_test_volta(uvm_gpu_t *gpu, entry_test_page_size_func entry_test_page_size)
|
||||
{
|
||||
NvU32 page_sizes[MAX_NUM_PAGE_SIZES];
|
||||
NvU64 page_sizes[MAX_NUM_PAGE_SIZES];
|
||||
NvU64 pde_bits[2];
|
||||
size_t i, num_page_sizes;
|
||||
uvm_mmu_page_table_alloc_t *phys_allocs[2] = {NULL, NULL};
|
||||
@ -1833,7 +1837,7 @@ static NV_STATUS entry_test_volta(uvm_gpu_t *gpu, entry_test_page_size_func entr
|
||||
|
||||
static NV_STATUS entry_test_ampere(uvm_gpu_t *gpu, entry_test_page_size_func entry_test_page_size)
|
||||
{
|
||||
NvU32 page_sizes[MAX_NUM_PAGE_SIZES];
|
||||
NvU64 page_sizes[MAX_NUM_PAGE_SIZES];
|
||||
NvU32 i, num_page_sizes;
|
||||
|
||||
num_page_sizes = get_page_sizes(gpu, page_sizes);
|
||||
@ -1847,7 +1851,7 @@ static NV_STATUS entry_test_ampere(uvm_gpu_t *gpu, entry_test_page_size_func ent
|
||||
static NV_STATUS entry_test_hopper(uvm_gpu_t *gpu, entry_test_page_size_func entry_test_page_size)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
NvU32 page_sizes[MAX_NUM_PAGE_SIZES];
|
||||
NvU64 page_sizes[MAX_NUM_PAGE_SIZES];
|
||||
NvU64 pde_bits[2];
|
||||
uvm_page_directory_t *dirs[5];
|
||||
size_t i, num_page_sizes;
|
||||
@ -2290,8 +2294,8 @@ static NV_STATUS fake_gpu_init_hopper(uvm_gpu_t *fake_gpu)
|
||||
static NV_STATUS maxwell_test_page_tree(uvm_gpu_t *maxwell)
|
||||
{
|
||||
// create a fake Maxwell GPU for this test.
|
||||
static const NvU32 big_page_sizes[] = {UVM_PAGE_SIZE_64K, UVM_PAGE_SIZE_128K};
|
||||
NvU32 i, j, big_page_size, page_size;
|
||||
static const NvU64 big_page_sizes[] = {UVM_PAGE_SIZE_64K, UVM_PAGE_SIZE_128K};
|
||||
NvU64 i, j, big_page_size, page_size;
|
||||
|
||||
TEST_CHECK_RET(fake_gpu_init_maxwell(maxwell) == NV_OK);
|
||||
|
||||
@ -2320,7 +2324,7 @@ static NV_STATUS pascal_test_page_tree(uvm_gpu_t *pascal)
|
||||
// create a fake Pascal GPU for this test.
|
||||
NvU32 tlb_batch_saved_max_pages;
|
||||
NvU32 i;
|
||||
NvU32 page_sizes[MAX_NUM_PAGE_SIZES];
|
||||
NvU64 page_sizes[MAX_NUM_PAGE_SIZES];
|
||||
size_t num_page_sizes;
|
||||
|
||||
TEST_CHECK_RET(fake_gpu_init_pascal(pascal) == NV_OK);
|
||||
@ -2381,7 +2385,7 @@ static NV_STATUS volta_test_page_tree(uvm_gpu_t *volta)
|
||||
static NV_STATUS ampere_test_page_tree(uvm_gpu_t *ampere)
|
||||
{
|
||||
NvU32 i, tlb_batch_saved_max_pages;
|
||||
NvU32 page_sizes[MAX_NUM_PAGE_SIZES];
|
||||
NvU64 page_sizes[MAX_NUM_PAGE_SIZES];
|
||||
size_t num_page_sizes;
|
||||
|
||||
TEST_CHECK_RET(fake_gpu_init_ampere(ampere) == NV_OK);
|
||||
|
@ -92,7 +92,13 @@ void uvm_hal_pascal_host_tlb_invalidate_all(uvm_push_t *push, uvm_gpu_phys_addre
|
||||
uvm_hal_tlb_invalidate_membar(push, membar);
|
||||
}
|
||||
|
||||
void uvm_hal_pascal_host_tlb_invalidate_va(uvm_push_t *push, uvm_gpu_phys_address_t pdb, NvU32 depth, NvU64 base, NvU64 size, NvU32 page_size, uvm_membar_t membar)
|
||||
void uvm_hal_pascal_host_tlb_invalidate_va(uvm_push_t *push,
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
NvU32 depth,
|
||||
NvU64 base,
|
||||
NvU64 size,
|
||||
NvU64 page_size,
|
||||
uvm_membar_t membar)
|
||||
{
|
||||
NvU32 aperture_value;
|
||||
NvU32 page_table_level;
|
||||
@ -127,9 +133,9 @@ void uvm_hal_pascal_host_tlb_invalidate_va(uvm_push_t *push, uvm_gpu_phys_addres
|
||||
ack_value = HWCONST(C06F, MEM_OP_C, TLB_INVALIDATE_ACK_TYPE, GLOBALLY);
|
||||
}
|
||||
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(page_size, 1 << 12), "page_size 0x%x\n", page_size);
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(base, page_size), "base 0x%llx page_size 0x%x\n", base, page_size);
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(size, page_size), "size 0x%llx page_size 0x%x\n", size, page_size);
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(page_size, 1 << 12), "page_size 0x%llx\n", page_size);
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(base, page_size), "base 0x%llx page_size 0x%llx\n", base, page_size);
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(size, page_size), "size 0x%llx page_size 0x%llx\n", size, page_size);
|
||||
UVM_ASSERT_MSG(size > 0, "size 0x%llx\n", size);
|
||||
|
||||
base >>= 12;
|
||||
|
@ -54,7 +54,7 @@ static NvU32 entries_per_index_pascal(NvU32 depth)
|
||||
return 1;
|
||||
}
|
||||
|
||||
static NvLength entry_offset_pascal(NvU32 depth, NvU32 page_size)
|
||||
static NvLength entry_offset_pascal(NvU32 depth, NvU64 page_size)
|
||||
{
|
||||
UVM_ASSERT(depth < 5);
|
||||
if (page_size == UVM_PAGE_SIZE_4K && depth == 3)
|
||||
@ -178,7 +178,7 @@ static NvLength entry_size_pascal(NvU32 depth)
|
||||
return 8;
|
||||
}
|
||||
|
||||
static NvU32 index_bits_pascal(NvU32 depth, NvU32 page_size)
|
||||
static NvU32 index_bits_pascal(NvU32 depth, NvU64 page_size)
|
||||
{
|
||||
static const NvU32 bit_widths[] = {2, 9, 9, 8};
|
||||
// some code paths keep on querying this until they get a 0, meaning only the page offset remains.
|
||||
@ -204,7 +204,7 @@ static NvU32 num_va_bits_pascal(void)
|
||||
return 49;
|
||||
}
|
||||
|
||||
static NvLength allocation_size_pascal(NvU32 depth, NvU32 page_size)
|
||||
static NvLength allocation_size_pascal(NvU32 depth, NvU64 page_size)
|
||||
{
|
||||
UVM_ASSERT(depth < 5);
|
||||
if (depth == 4 && page_size == UVM_PAGE_SIZE_64K)
|
||||
@ -213,7 +213,7 @@ static NvLength allocation_size_pascal(NvU32 depth, NvU32 page_size)
|
||||
return 4096;
|
||||
}
|
||||
|
||||
static NvU32 page_table_depth_pascal(NvU32 page_size)
|
||||
static NvU32 page_table_depth_pascal(NvU64 page_size)
|
||||
{
|
||||
if (page_size == UVM_PAGE_SIZE_2M)
|
||||
return 3;
|
||||
@ -221,12 +221,12 @@ static NvU32 page_table_depth_pascal(NvU32 page_size)
|
||||
return 4;
|
||||
}
|
||||
|
||||
static NvU32 page_sizes_pascal(void)
|
||||
static NvU64 page_sizes_pascal(void)
|
||||
{
|
||||
return UVM_PAGE_SIZE_2M | UVM_PAGE_SIZE_64K | UVM_PAGE_SIZE_4K;
|
||||
}
|
||||
|
||||
static NvU64 unmapped_pte_pascal(NvU32 page_size)
|
||||
static NvU64 unmapped_pte_pascal(NvU64 page_size)
|
||||
{
|
||||
// Setting the privilege bit on an otherwise-zeroed big PTE causes the
|
||||
// corresponding 4k PTEs to be ignored. This allows the invalidation of a
|
||||
@ -362,7 +362,7 @@ static uvm_mmu_mode_hal_t pascal_mmu_mode_hal =
|
||||
.page_sizes = page_sizes_pascal
|
||||
};
|
||||
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_pascal(NvU32 big_page_size)
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_pascal(NvU64 big_page_size)
|
||||
{
|
||||
UVM_ASSERT(big_page_size == UVM_PAGE_SIZE_64K || big_page_size == UVM_PAGE_SIZE_128K);
|
||||
|
||||
|
@ -162,7 +162,7 @@ static void grow_fault_granularity_if_no_thrashing(uvm_perf_prefetch_bitmap_tree
|
||||
}
|
||||
|
||||
static void grow_fault_granularity(uvm_perf_prefetch_bitmap_tree_t *bitmap_tree,
|
||||
NvU32 big_page_size,
|
||||
NvU64 big_page_size,
|
||||
uvm_va_block_region_t big_pages_region,
|
||||
uvm_va_block_region_t max_prefetch_region,
|
||||
const uvm_page_mask_t *faulted_pages,
|
||||
@ -245,7 +245,7 @@ static void update_bitmap_tree_from_va_block(uvm_perf_prefetch_bitmap_tree_t *bi
|
||||
uvm_va_block_region_t max_prefetch_region)
|
||||
|
||||
{
|
||||
NvU32 big_page_size;
|
||||
NvU64 big_page_size;
|
||||
uvm_va_block_region_t big_pages_region;
|
||||
uvm_va_space_t *va_space;
|
||||
const uvm_page_mask_t *thrashing_pages;
|
||||
|
@ -1987,21 +1987,12 @@ NV_STATUS uvm_perf_thrashing_init(void)
|
||||
UVM_PERF_THRASHING_PIN_THRESHOLD_DEFAULT,
|
||||
UVM_PERF_THRASHING_PIN_THRESHOLD_MAX);
|
||||
|
||||
|
||||
|
||||
// In Confidential Computing, the DMA path is slower due to cryptographic
|
||||
// operations & other associated overhead. Enforce a larger window to allow
|
||||
// the thrashing mitigation mechanisms to work properly.
|
||||
if (g_uvm_global.conf_computing_enabled)
|
||||
INIT_THRASHING_PARAMETER_NONZERO(uvm_perf_thrashing_lapse_usec, UVM_PERF_THRASHING_LAPSE_USEC_DEFAULT * 10);
|
||||
else
|
||||
INIT_THRASHING_PARAMETER_NONZERO(uvm_perf_thrashing_lapse_usec, UVM_PERF_THRASHING_LAPSE_USEC_DEFAULT);
|
||||
INIT_THRASHING_PARAMETER_NONZERO(uvm_perf_thrashing_lapse_usec, UVM_PERF_THRASHING_LAPSE_USEC_DEFAULT);
|
||||
|
||||
INIT_THRASHING_PARAMETER_NONZERO_MAX(uvm_perf_thrashing_nap,
|
||||
UVM_PERF_THRASHING_NAP_DEFAULT,
|
||||
UVM_PERF_THRASHING_NAP_MAX);
|
||||
|
||||
|
||||
INIT_THRASHING_PARAMETER_NONZERO(uvm_perf_thrashing_epoch, UVM_PERF_THRASHING_EPOCH_DEFAULT);
|
||||
|
||||
INIT_THRASHING_PARAMETER(uvm_perf_thrashing_pin, UVM_PERF_THRASHING_PIN_DEFAULT);
|
||||
|
@ -1890,8 +1890,11 @@ static uvm_gpu_chunk_t *claim_free_chunk(uvm_pmm_gpu_t *pmm, uvm_pmm_gpu_memory_
|
||||
if (!chunk)
|
||||
goto out;
|
||||
|
||||
UVM_ASSERT_MSG(uvm_gpu_chunk_get_size(chunk) == chunk_size, "chunk size %u expected %u\n",
|
||||
uvm_gpu_chunk_get_size(chunk), chunk_size);
|
||||
UVM_ASSERT_MSG(uvm_gpu_chunk_get_size(chunk) == chunk_size,
|
||||
"chunk size %u expected %u\n",
|
||||
uvm_gpu_chunk_get_size(chunk),
|
||||
chunk_size);
|
||||
|
||||
UVM_ASSERT(chunk->type == type);
|
||||
UVM_ASSERT(chunk->state == UVM_PMM_GPU_CHUNK_STATE_FREE);
|
||||
UVM_ASSERT(!chunk_is_in_eviction(pmm, chunk));
|
||||
@ -2756,7 +2759,7 @@ static bool uvm_pmm_should_inject_pma_eviction_error(uvm_pmm_gpu_t *pmm)
|
||||
// See the documentation of pmaEvictPagesCb_t in pma.h for details of the
|
||||
// expected semantics.
|
||||
static NV_STATUS uvm_pmm_gpu_pma_evict_pages(void *void_pmm,
|
||||
NvU32 page_size,
|
||||
NvU64 page_size,
|
||||
NvU64 *pages,
|
||||
NvU32 num_pages_to_evict,
|
||||
NvU64 phys_start,
|
||||
@ -2861,7 +2864,7 @@ error:
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_pmm_gpu_pma_evict_pages_wrapper(void *void_pmm,
|
||||
NvU32 page_size,
|
||||
NvU64 page_size,
|
||||
NvU64 *pages,
|
||||
NvU32 num_pages_to_evict,
|
||||
NvU64 phys_start,
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2023 NVIDIA Corporation
|
||||
Copyright (c) 2015-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -65,30 +65,30 @@
|
||||
|
||||
typedef enum
|
||||
{
|
||||
UVM_CHUNK_SIZE_1 = 1ULL,
|
||||
UVM_CHUNK_SIZE_2 = 2ULL,
|
||||
UVM_CHUNK_SIZE_4 = 4ULL,
|
||||
UVM_CHUNK_SIZE_8 = 8ULL,
|
||||
UVM_CHUNK_SIZE_16 = 16ULL,
|
||||
UVM_CHUNK_SIZE_32 = 32ULL,
|
||||
UVM_CHUNK_SIZE_64 = 64ULL,
|
||||
UVM_CHUNK_SIZE_128 = 128ULL,
|
||||
UVM_CHUNK_SIZE_256 = 256ULL,
|
||||
UVM_CHUNK_SIZE_512 = 512ULL,
|
||||
UVM_CHUNK_SIZE_1K = 1024ULL,
|
||||
UVM_CHUNK_SIZE_2K = 2*1024ULL,
|
||||
UVM_CHUNK_SIZE_4K = 4*1024ULL,
|
||||
UVM_CHUNK_SIZE_8K = 8*1024ULL,
|
||||
UVM_CHUNK_SIZE_16K = 16*1024ULL,
|
||||
UVM_CHUNK_SIZE_32K = 32*1024ULL,
|
||||
UVM_CHUNK_SIZE_64K = 64*1024ULL,
|
||||
UVM_CHUNK_SIZE_128K = 128*1024ULL,
|
||||
UVM_CHUNK_SIZE_256K = 256*1024ULL,
|
||||
UVM_CHUNK_SIZE_512K = 512*1024ULL,
|
||||
UVM_CHUNK_SIZE_1M = 1024*1024ULL,
|
||||
UVM_CHUNK_SIZE_2M = 2*1024*1024ULL,
|
||||
UVM_CHUNK_SIZE_1 = 1,
|
||||
UVM_CHUNK_SIZE_2 = 2,
|
||||
UVM_CHUNK_SIZE_4 = 4,
|
||||
UVM_CHUNK_SIZE_8 = 8,
|
||||
UVM_CHUNK_SIZE_16 = 16,
|
||||
UVM_CHUNK_SIZE_32 = 32,
|
||||
UVM_CHUNK_SIZE_64 = 64,
|
||||
UVM_CHUNK_SIZE_128 = 128,
|
||||
UVM_CHUNK_SIZE_256 = 256,
|
||||
UVM_CHUNK_SIZE_512 = 512,
|
||||
UVM_CHUNK_SIZE_1K = 1024,
|
||||
UVM_CHUNK_SIZE_2K = 2*1024,
|
||||
UVM_CHUNK_SIZE_4K = 4*1024,
|
||||
UVM_CHUNK_SIZE_8K = 8*1024,
|
||||
UVM_CHUNK_SIZE_16K = 16*1024,
|
||||
UVM_CHUNK_SIZE_32K = 32*1024,
|
||||
UVM_CHUNK_SIZE_64K = 64*1024,
|
||||
UVM_CHUNK_SIZE_128K = 128*1024,
|
||||
UVM_CHUNK_SIZE_256K = 256*1024,
|
||||
UVM_CHUNK_SIZE_512K = 512*1024,
|
||||
UVM_CHUNK_SIZE_1M = 1024*1024,
|
||||
UVM_CHUNK_SIZE_2M = 2*1024*1024,
|
||||
UVM_CHUNK_SIZE_MAX = UVM_CHUNK_SIZE_2M,
|
||||
UVM_CHUNK_SIZE_INVALID = UVM_CHUNK_SIZE_MAX * 2ULL
|
||||
UVM_CHUNK_SIZE_INVALID = UVM_CHUNK_SIZE_MAX * 2
|
||||
} uvm_chunk_size_t;
|
||||
|
||||
#define UVM_CHUNK_SIZES_MASK (uvm_chunk_sizes_mask_t)(UVM_CHUNK_SIZE_MAX | (UVM_CHUNK_SIZE_MAX-1))
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2017-2023 NVIDIA Corporation
|
||||
Copyright (c) 2017-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -43,7 +43,7 @@ NV_STATUS uvm_pmm_sysmem_init(void)
|
||||
// Ensure that only supported CPU chunk sizes are enabled.
|
||||
uvm_cpu_chunk_allocation_sizes &= UVM_CPU_CHUNK_SIZES;
|
||||
if (!uvm_cpu_chunk_allocation_sizes || !(uvm_cpu_chunk_allocation_sizes & PAGE_SIZE)) {
|
||||
pr_info("Invalid value for uvm_cpu_chunk_allocation_sizes = 0x%x, using 0x%lx instead\n",
|
||||
pr_info("Invalid value for uvm_cpu_chunk_allocation_sizes = 0x%x, using 0x%llx instead\n",
|
||||
uvm_cpu_chunk_allocation_sizes,
|
||||
UVM_CPU_CHUNK_SIZES);
|
||||
uvm_cpu_chunk_allocation_sizes = UVM_CPU_CHUNK_SIZES;
|
||||
@ -461,69 +461,12 @@ static NvU32 compute_gpu_mappings_entry_index(uvm_parent_processor_mask_t *dma_a
|
||||
return uvm_parent_processor_mask_get_gpu_count(&subset_mask);
|
||||
}
|
||||
|
||||
static void cpu_chunk_release(nv_kref_t *kref)
|
||||
{
|
||||
uvm_cpu_chunk_t *chunk = container_of(kref, uvm_cpu_chunk_t, refcount);
|
||||
uvm_parent_processor_mask_t *mapping_mask;
|
||||
uvm_parent_processor_id_t id;
|
||||
uvm_cpu_physical_chunk_t *phys_chunk = NULL;
|
||||
uvm_cpu_logical_chunk_t *logical_chunk = NULL;
|
||||
|
||||
if (uvm_cpu_chunk_is_physical(chunk)) {
|
||||
phys_chunk = uvm_cpu_chunk_to_physical(chunk);
|
||||
uvm_assert_mutex_unlocked(&phys_chunk->lock);
|
||||
mapping_mask = &phys_chunk->gpu_mappings.dma_addrs_mask;
|
||||
}
|
||||
else {
|
||||
logical_chunk = uvm_cpu_chunk_to_logical(chunk);
|
||||
mapping_mask = &logical_chunk->mapped_gpus;
|
||||
}
|
||||
|
||||
for_each_parent_id_in_mask(id, mapping_mask) {
|
||||
uvm_parent_gpu_t *parent_gpu = uvm_parent_gpu_get(id);
|
||||
uvm_cpu_chunk_unmap_parent_gpu_phys(chunk, parent_gpu);
|
||||
}
|
||||
|
||||
if (uvm_cpu_chunk_is_physical(chunk)) {
|
||||
if (phys_chunk->gpu_mappings.max_entries > 1)
|
||||
uvm_kvfree(phys_chunk->gpu_mappings.dynamic_entries);
|
||||
|
||||
if (uvm_cpu_chunk_get_size(chunk) > PAGE_SIZE &&
|
||||
!bitmap_empty(phys_chunk->dirty_bitmap, uvm_cpu_chunk_num_pages(chunk)))
|
||||
SetPageDirty(phys_chunk->common.page);
|
||||
|
||||
uvm_kvfree(phys_chunk->dirty_bitmap);
|
||||
|
||||
if (chunk->type != UVM_CPU_CHUNK_TYPE_HMM)
|
||||
put_page(phys_chunk->common.page);
|
||||
}
|
||||
else {
|
||||
uvm_cpu_chunk_free(logical_chunk->parent);
|
||||
}
|
||||
|
||||
uvm_kvfree(chunk);
|
||||
}
|
||||
|
||||
static void uvm_cpu_chunk_get(uvm_cpu_chunk_t *chunk)
|
||||
{
|
||||
UVM_ASSERT(chunk);
|
||||
nv_kref_get(&chunk->refcount);
|
||||
}
|
||||
|
||||
void uvm_cpu_chunk_free(uvm_cpu_chunk_t *chunk)
|
||||
{
|
||||
if (!chunk)
|
||||
return;
|
||||
|
||||
nv_kref_put(&chunk->refcount, cpu_chunk_release);
|
||||
}
|
||||
|
||||
static uvm_cpu_physical_chunk_t *get_physical_parent(uvm_cpu_chunk_t *chunk)
|
||||
{
|
||||
UVM_ASSERT(chunk);
|
||||
UVM_ASSERT(chunk->page);
|
||||
|
||||
while (!uvm_cpu_chunk_is_physical(chunk))
|
||||
while (uvm_cpu_chunk_is_logical(chunk))
|
||||
chunk = uvm_cpu_chunk_to_logical(chunk)->parent;
|
||||
|
||||
return uvm_cpu_chunk_to_physical(chunk);
|
||||
@ -581,6 +524,7 @@ static uvm_cpu_phys_mapping_t *chunk_phys_mapping_alloc(uvm_cpu_physical_chunk_t
|
||||
static uvm_cpu_phys_mapping_t *chunk_phys_mapping_get(uvm_cpu_physical_chunk_t *chunk, uvm_parent_gpu_id_t id)
|
||||
{
|
||||
uvm_assert_mutex_locked(&chunk->lock);
|
||||
|
||||
if (uvm_parent_processor_mask_test(&chunk->gpu_mappings.dma_addrs_mask, id)) {
|
||||
if (chunk->gpu_mappings.max_entries == 1) {
|
||||
return &chunk->gpu_mappings.static_entry;
|
||||
@ -598,7 +542,6 @@ static void chunk_inc_gpu_mapping(uvm_cpu_physical_chunk_t *chunk, uvm_parent_gp
|
||||
{
|
||||
uvm_cpu_phys_mapping_t *mapping;
|
||||
|
||||
uvm_assert_mutex_locked(&chunk->lock);
|
||||
mapping = chunk_phys_mapping_get(chunk, id);
|
||||
UVM_ASSERT(mapping);
|
||||
mapping->map_count++;
|
||||
@ -608,7 +551,6 @@ static void chunk_dec_gpu_mapping(uvm_cpu_physical_chunk_t *chunk, uvm_parent_gp
|
||||
{
|
||||
uvm_cpu_phys_mapping_t *mapping;
|
||||
|
||||
uvm_assert_mutex_locked(&chunk->lock);
|
||||
mapping = chunk_phys_mapping_get(chunk, id);
|
||||
UVM_ASSERT(mapping);
|
||||
UVM_ASSERT(mapping->dma_addr && mapping->map_count);
|
||||
@ -616,6 +558,8 @@ static void chunk_dec_gpu_mapping(uvm_cpu_physical_chunk_t *chunk, uvm_parent_gp
|
||||
if (mapping->map_count == 0) {
|
||||
uvm_parent_gpu_t *parent_gpu = uvm_parent_gpu_get(id);
|
||||
|
||||
UVM_ASSERT(uvm_sub_processor_mask_empty(&mapping->sub_processors));
|
||||
|
||||
uvm_parent_gpu_unmap_cpu_pages(parent_gpu, mapping->dma_addr, uvm_cpu_chunk_get_size(&chunk->common));
|
||||
mapping->dma_addr = 0;
|
||||
if (chunk->gpu_mappings.max_entries > 1) {
|
||||
@ -631,7 +575,7 @@ static void chunk_dec_gpu_mapping(uvm_cpu_physical_chunk_t *chunk, uvm_parent_gp
|
||||
}
|
||||
}
|
||||
|
||||
NvU64 uvm_cpu_chunk_get_parent_gpu_phys_addr(uvm_cpu_chunk_t *chunk, uvm_parent_gpu_t *parent_gpu)
|
||||
NvU64 uvm_cpu_chunk_get_gpu_phys_addr(uvm_cpu_chunk_t *chunk, uvm_gpu_t *gpu)
|
||||
{
|
||||
uvm_cpu_physical_chunk_t *phys_chunk = get_physical_parent(chunk);
|
||||
uvm_cpu_phys_mapping_t *mapping;
|
||||
@ -641,36 +585,41 @@ NvU64 uvm_cpu_chunk_get_parent_gpu_phys_addr(uvm_cpu_chunk_t *chunk, uvm_parent_
|
||||
if (uvm_cpu_chunk_is_logical(chunk)) {
|
||||
uvm_cpu_logical_chunk_t *logical_chunk = uvm_cpu_chunk_to_logical(chunk);
|
||||
|
||||
if (!uvm_parent_processor_mask_test(&logical_chunk->mapped_gpus, parent_gpu->id))
|
||||
if (!uvm_processor_mask_test(&logical_chunk->mapped_gpus, gpu->id))
|
||||
return 0;
|
||||
|
||||
parent_offset = cpu_chunk_get_phys_index(logical_chunk);
|
||||
}
|
||||
|
||||
uvm_mutex_lock(&phys_chunk->lock);
|
||||
mapping = chunk_phys_mapping_get(phys_chunk, parent_gpu->id);
|
||||
if (mapping)
|
||||
mapping = chunk_phys_mapping_get(phys_chunk, gpu->parent->id);
|
||||
if (mapping &&
|
||||
(uvm_cpu_chunk_is_logical(chunk) ||
|
||||
uvm_sub_processor_mask_test(&mapping->sub_processors, uvm_id_sub_processor_index(gpu->id))))
|
||||
dma_addr = mapping->dma_addr + (parent_offset * PAGE_SIZE);
|
||||
|
||||
uvm_mutex_unlock(&phys_chunk->lock);
|
||||
|
||||
return dma_addr;
|
||||
}
|
||||
|
||||
// Create a DMA mapping for the chunk on the given parent GPU. This will map the
|
||||
// entire parent physical chunk on the GPU.
|
||||
// Create a DMA mapping for the chunk on the given GPU. This will map the
|
||||
// entire physical chunk on the parent GPU and record that a given MIG
|
||||
// partition is using the mapping.
|
||||
//
|
||||
// Returns NV_OK on success. On error, any of the errors returned by
|
||||
// uvm_parent_gpu_map_cpu_pages() can be returned. In the case that the DMA
|
||||
// mapping structure could not be allocated, NV_ERR_NO_MEMORY is returned.
|
||||
static NV_STATUS cpu_chunk_map_parent_gpu_phys(uvm_cpu_chunk_t *chunk, uvm_parent_gpu_t *parent_gpu)
|
||||
static NV_STATUS cpu_chunk_map_gpu_phys(uvm_cpu_chunk_t *chunk, uvm_gpu_t *gpu)
|
||||
{
|
||||
uvm_parent_gpu_t *parent_gpu = gpu->parent;
|
||||
uvm_cpu_physical_chunk_t *phys_chunk;
|
||||
uvm_cpu_logical_chunk_t *logical_chunk = NULL;
|
||||
uvm_cpu_phys_mapping_t *mapping;
|
||||
NV_STATUS status = NV_OK;
|
||||
|
||||
if (uvm_cpu_chunk_is_logical(chunk)) {
|
||||
logical_chunk = uvm_cpu_chunk_to_logical(chunk);
|
||||
if (uvm_parent_processor_mask_test(&logical_chunk->mapped_gpus, parent_gpu->id))
|
||||
if (uvm_processor_mask_test(&logical_chunk->mapped_gpus, gpu->id))
|
||||
return status;
|
||||
}
|
||||
|
||||
@ -679,7 +628,6 @@ static NV_STATUS cpu_chunk_map_parent_gpu_phys(uvm_cpu_chunk_t *chunk, uvm_paren
|
||||
|
||||
if (!uvm_parent_processor_mask_test(&phys_chunk->gpu_mappings.dma_addrs_mask, parent_gpu->id)) {
|
||||
uvm_chunk_size_t chunk_size = uvm_cpu_chunk_get_size(&phys_chunk->common);
|
||||
uvm_cpu_phys_mapping_t *mapping;
|
||||
NvU64 dma_addr;
|
||||
|
||||
status = uvm_parent_gpu_map_cpu_pages(parent_gpu, phys_chunk->common.page, chunk_size, &dma_addr);
|
||||
@ -695,39 +643,59 @@ static NV_STATUS cpu_chunk_map_parent_gpu_phys(uvm_cpu_chunk_t *chunk, uvm_paren
|
||||
|
||||
mapping->dma_addr = dma_addr;
|
||||
mapping->map_count = 1;
|
||||
uvm_sub_processor_mask_zero(&mapping->sub_processors);
|
||||
if (!logical_chunk)
|
||||
uvm_sub_processor_mask_set(&mapping->sub_processors, uvm_id_sub_processor_index(gpu->id));
|
||||
|
||||
uvm_parent_processor_mask_set(&phys_chunk->gpu_mappings.dma_addrs_mask, parent_gpu->id);
|
||||
}
|
||||
else {
|
||||
// The mapping count on the physical chunk is only increased when
|
||||
// mapping logical chunks.
|
||||
if (uvm_cpu_chunk_is_logical(chunk))
|
||||
chunk_inc_gpu_mapping(phys_chunk, parent_gpu->id);
|
||||
mapping = chunk_phys_mapping_get(phys_chunk, parent_gpu->id);
|
||||
UVM_ASSERT(mapping);
|
||||
|
||||
// Increment the map_count for logical chunks or the first time a
|
||||
// MIG partition is sharing a physical chunk.
|
||||
if (logical_chunk ||
|
||||
!uvm_sub_processor_mask_test_and_set(&mapping->sub_processors, uvm_id_sub_processor_index(gpu->id)))
|
||||
mapping->map_count++;
|
||||
}
|
||||
|
||||
if (logical_chunk) {
|
||||
uvm_processor_mask_set(&logical_chunk->mapped_gpus, gpu->id);
|
||||
UVM_ASSERT(uvm_sub_processor_mask_empty(&mapping->sub_processors));
|
||||
}
|
||||
else {
|
||||
UVM_ASSERT(!uvm_sub_processor_mask_empty(&mapping->sub_processors));
|
||||
UVM_ASSERT(uvm_sub_processor_mask_get_count(&mapping->sub_processors) == mapping->map_count);
|
||||
}
|
||||
|
||||
done:
|
||||
uvm_mutex_unlock(&phys_chunk->lock);
|
||||
|
||||
if (status == NV_OK && uvm_cpu_chunk_is_logical(chunk))
|
||||
uvm_parent_processor_mask_set(&logical_chunk->mapped_gpus, parent_gpu->id);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
void uvm_cpu_chunk_unmap_parent_gpu_phys(uvm_cpu_chunk_t *chunk, uvm_parent_gpu_t *parent_gpu)
|
||||
static void cpu_chunk_unmap_gpu_phys(uvm_cpu_chunk_t *chunk, uvm_gpu_id_t gpu_id)
|
||||
{
|
||||
uvm_cpu_physical_chunk_t *phys_chunk;
|
||||
uvm_cpu_logical_chunk_t *logical_chunk;
|
||||
uvm_cpu_physical_chunk_t *phys_chunk = get_physical_parent(chunk);
|
||||
uvm_parent_gpu_id_t id = uvm_parent_gpu_id_from_gpu_id(gpu_id);
|
||||
|
||||
uvm_mutex_lock(&phys_chunk->lock);
|
||||
|
||||
if (uvm_cpu_chunk_is_logical(chunk)) {
|
||||
logical_chunk = uvm_cpu_chunk_to_logical(chunk);
|
||||
if (!uvm_parent_processor_mask_test_and_clear(&logical_chunk->mapped_gpus, parent_gpu->id))
|
||||
return;
|
||||
}
|
||||
uvm_processor_mask_t *mapping_mask = &uvm_cpu_chunk_to_logical(chunk)->mapped_gpus;
|
||||
|
||||
phys_chunk = get_physical_parent(chunk);
|
||||
uvm_mutex_lock(&phys_chunk->lock);
|
||||
if (uvm_parent_processor_mask_test(&phys_chunk->gpu_mappings.dma_addrs_mask, parent_gpu->id))
|
||||
chunk_dec_gpu_mapping(phys_chunk, parent_gpu->id);
|
||||
if (uvm_processor_mask_test_and_clear(mapping_mask, gpu_id))
|
||||
chunk_dec_gpu_mapping(phys_chunk, id);
|
||||
}
|
||||
else {
|
||||
if (uvm_parent_processor_mask_test(&phys_chunk->gpu_mappings.dma_addrs_mask, id)) {
|
||||
uvm_cpu_phys_mapping_t *mapping = chunk_phys_mapping_get(phys_chunk, id);
|
||||
|
||||
if (uvm_sub_processor_mask_test_and_clear(&mapping->sub_processors, uvm_id_sub_processor_index(gpu_id)))
|
||||
chunk_dec_gpu_mapping(phys_chunk, id);
|
||||
}
|
||||
}
|
||||
|
||||
uvm_mutex_unlock(&phys_chunk->lock);
|
||||
}
|
||||
@ -737,17 +705,112 @@ NV_STATUS uvm_cpu_chunk_map_gpu(uvm_cpu_chunk_t *chunk, uvm_gpu_t *gpu)
|
||||
NV_STATUS status;
|
||||
uvm_chunk_size_t chunk_size = uvm_cpu_chunk_get_size(chunk);
|
||||
|
||||
status = cpu_chunk_map_parent_gpu_phys(chunk, gpu->parent);
|
||||
status = cpu_chunk_map_gpu_phys(chunk, gpu);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
status = uvm_mmu_sysmem_map(gpu, uvm_cpu_chunk_get_parent_gpu_phys_addr(chunk, gpu->parent), chunk_size);
|
||||
status = uvm_mmu_sysmem_map(gpu, uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu), chunk_size);
|
||||
if (status != NV_OK)
|
||||
uvm_cpu_chunk_unmap_parent_gpu_phys(chunk, gpu->parent);
|
||||
cpu_chunk_unmap_gpu_phys(chunk, gpu->id);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
void uvm_cpu_chunk_unmap_gpu(uvm_cpu_chunk_t *chunk, uvm_gpu_t *gpu)
|
||||
{
|
||||
cpu_chunk_unmap_gpu_phys(chunk, gpu->id);
|
||||
|
||||
// Note: there is no corresponding uvm_mmu_sysmem_unmap() for
|
||||
// uvm_mmu_sysmem_map().
|
||||
}
|
||||
|
||||
static void cpu_logical_chunk_release(uvm_cpu_logical_chunk_t *logical_chunk)
|
||||
{
|
||||
uvm_cpu_physical_chunk_t *phys_chunk = get_physical_parent(logical_chunk->parent);
|
||||
uvm_processor_id_t gpu_id;
|
||||
|
||||
uvm_mutex_lock(&phys_chunk->lock);
|
||||
|
||||
for_each_id_in_mask(gpu_id, &logical_chunk->mapped_gpus)
|
||||
chunk_dec_gpu_mapping(phys_chunk, uvm_parent_gpu_id_from_gpu_id(gpu_id));
|
||||
|
||||
uvm_mutex_unlock(&phys_chunk->lock);
|
||||
|
||||
uvm_cpu_chunk_free(logical_chunk->parent);
|
||||
}
|
||||
|
||||
static void cpu_physical_chunk_release(uvm_cpu_chunk_t *chunk)
|
||||
{
|
||||
uvm_cpu_physical_chunk_t *phys_chunk = uvm_cpu_chunk_to_physical(chunk);
|
||||
uvm_parent_processor_id_t id;
|
||||
|
||||
uvm_assert_mutex_unlocked(&phys_chunk->lock);
|
||||
|
||||
// There should be no other threads using this chunk but we lock it because
|
||||
// of assertions in chunk_phys_mapping_get() and chunk_dec_gpu_mapping().
|
||||
uvm_mutex_lock(&phys_chunk->lock);
|
||||
|
||||
for_each_parent_id_in_mask(id, &phys_chunk->gpu_mappings.dma_addrs_mask) {
|
||||
uvm_cpu_phys_mapping_t *mapping = chunk_phys_mapping_get(phys_chunk, id);
|
||||
NvU32 count;
|
||||
|
||||
UVM_ASSERT(mapping);
|
||||
UVM_ASSERT(!uvm_sub_processor_mask_empty(&mapping->sub_processors));
|
||||
|
||||
// Get a count of set bits in the sub_processors mask then clear it so
|
||||
// that chunk_dec_gpu_mapping() sees an empty mask when map_count == 0.
|
||||
// Using for_each_sub_processor_in_mask could try to dereference
|
||||
// mapping after map_count == 0 in the loop below.
|
||||
count = uvm_sub_processor_mask_get_count(&mapping->sub_processors);
|
||||
uvm_sub_processor_mask_zero(&mapping->sub_processors);
|
||||
|
||||
for (; count; count--)
|
||||
chunk_dec_gpu_mapping(phys_chunk, id);
|
||||
}
|
||||
|
||||
uvm_mutex_unlock(&phys_chunk->lock);
|
||||
|
||||
UVM_ASSERT(uvm_parent_processor_mask_empty(&phys_chunk->gpu_mappings.dma_addrs_mask));
|
||||
|
||||
if (phys_chunk->gpu_mappings.max_entries > 1)
|
||||
uvm_kvfree(phys_chunk->gpu_mappings.dynamic_entries);
|
||||
|
||||
if (uvm_cpu_chunk_get_size(chunk) > PAGE_SIZE &&
|
||||
!bitmap_empty(phys_chunk->dirty_bitmap, uvm_cpu_chunk_num_pages(chunk)))
|
||||
SetPageDirty(chunk->page);
|
||||
|
||||
uvm_kvfree(phys_chunk->dirty_bitmap);
|
||||
|
||||
if (chunk->type != UVM_CPU_CHUNK_TYPE_HMM)
|
||||
put_page(chunk->page);
|
||||
}
|
||||
|
||||
static void cpu_chunk_release(nv_kref_t *kref)
|
||||
{
|
||||
uvm_cpu_chunk_t *chunk = container_of(kref, uvm_cpu_chunk_t, refcount);
|
||||
|
||||
if (uvm_cpu_chunk_is_logical(chunk))
|
||||
cpu_logical_chunk_release(uvm_cpu_chunk_to_logical(chunk));
|
||||
else
|
||||
cpu_physical_chunk_release(chunk);
|
||||
|
||||
uvm_kvfree(chunk);
|
||||
}
|
||||
|
||||
static void uvm_cpu_chunk_get(uvm_cpu_chunk_t *chunk)
|
||||
{
|
||||
UVM_ASSERT(chunk);
|
||||
nv_kref_get(&chunk->refcount);
|
||||
}
|
||||
|
||||
void uvm_cpu_chunk_free(uvm_cpu_chunk_t *chunk)
|
||||
{
|
||||
if (!chunk)
|
||||
return;
|
||||
|
||||
nv_kref_put(&chunk->refcount, cpu_chunk_release);
|
||||
}
|
||||
|
||||
static struct page *uvm_cpu_chunk_alloc_page(uvm_chunk_size_t alloc_size,
|
||||
int nid,
|
||||
uvm_cpu_chunk_alloc_flags_t alloc_flags)
|
||||
@ -876,14 +939,37 @@ int uvm_cpu_chunk_get_numa_node(uvm_cpu_chunk_t *chunk)
|
||||
return page_to_nid(chunk->page);
|
||||
}
|
||||
|
||||
// Convert the mask of DMA mapped parent GPUs and the sub-processor mask into
|
||||
// one uvm_processor_mask_t in 'dma_map_mask'.
|
||||
static void get_dma_map_mask(uvm_cpu_physical_chunk_t *chunk, uvm_processor_mask_t *dma_map_mask)
|
||||
{
|
||||
uvm_parent_processor_id_t id;
|
||||
NvU32 sub_index;
|
||||
|
||||
uvm_assert_mutex_locked(&chunk->lock);
|
||||
|
||||
for_each_parent_id_in_mask(id, &chunk->gpu_mappings.dma_addrs_mask) {
|
||||
uvm_cpu_phys_mapping_t *mapping = chunk_phys_mapping_get(chunk, id);
|
||||
|
||||
for_each_sub_processor_index_in_mask(sub_index, &mapping->sub_processors) {
|
||||
uvm_processor_id_t gpu_id = uvm_gpu_id_from_sub_processor(id, sub_index);
|
||||
|
||||
uvm_sub_processor_mask_clear(&mapping->sub_processors, sub_index);
|
||||
uvm_processor_mask_set(dma_map_mask, gpu_id);
|
||||
}
|
||||
|
||||
UVM_ASSERT(uvm_sub_processor_mask_empty(&mapping->sub_processors));
|
||||
}
|
||||
}
|
||||
|
||||
NV_STATUS uvm_cpu_chunk_split(uvm_cpu_chunk_t *chunk, uvm_cpu_chunk_t **new_chunks)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_cpu_logical_chunk_t *new_chunk;
|
||||
uvm_cpu_physical_chunk_t *phys_chunk = get_physical_parent(chunk);
|
||||
uvm_cpu_logical_chunk_t *logical_chunk = NULL;
|
||||
uvm_parent_processor_id_t id;
|
||||
uvm_parent_processor_mask_t *dma_map_mask;
|
||||
uvm_processor_id_t gpu_id;
|
||||
uvm_processor_mask_t *dma_map_mask = NULL;
|
||||
uvm_chunk_size_t new_size;
|
||||
size_t num_new_chunks;
|
||||
size_t num_subchunk_pages;
|
||||
@ -902,21 +988,20 @@ NV_STATUS uvm_cpu_chunk_split(uvm_cpu_chunk_t *chunk, uvm_cpu_chunk_t **new_chun
|
||||
|
||||
// Get the largest size below the size of the input chunk.
|
||||
new_size = uvm_chunk_find_prev_size(uvm_cpu_chunk_get_allocation_sizes(), uvm_cpu_chunk_get_size(chunk));
|
||||
UVM_ASSERT(new_size);
|
||||
UVM_ASSERT(new_size != UVM_CHUNK_SIZE_INVALID);
|
||||
num_new_chunks = uvm_cpu_chunk_get_size(chunk) / new_size;
|
||||
num_subchunk_pages = new_size / PAGE_SIZE;
|
||||
|
||||
if (uvm_cpu_chunk_is_physical(chunk)) {
|
||||
dma_map_mask = &phys_chunk->gpu_mappings.dma_addrs_mask;
|
||||
}
|
||||
else {
|
||||
if (uvm_cpu_chunk_is_logical(chunk)) {
|
||||
logical_chunk = uvm_cpu_chunk_to_logical(chunk);
|
||||
dma_map_mask = &logical_chunk->mapped_gpus;
|
||||
}
|
||||
|
||||
uvm_mutex_lock(&phys_chunk->lock);
|
||||
|
||||
for (i = 0; i < num_new_chunks; i++) {
|
||||
new_chunk = uvm_kvmalloc_zero(sizeof(*logical_chunk));
|
||||
new_chunk = uvm_kvmalloc_zero(sizeof(*new_chunk));
|
||||
if (!new_chunk) {
|
||||
uvm_mutex_unlock(&phys_chunk->lock);
|
||||
status = NV_ERR_NO_MEMORY;
|
||||
@ -929,19 +1014,25 @@ NV_STATUS uvm_cpu_chunk_split(uvm_cpu_chunk_t *chunk, uvm_cpu_chunk_t **new_chun
|
||||
nv_kref_init(&new_chunk->common.refcount);
|
||||
new_chunk->parent = chunk;
|
||||
uvm_cpu_chunk_get(new_chunk->parent);
|
||||
for_each_parent_id_in_mask(id, dma_map_mask)
|
||||
chunk_inc_gpu_mapping(phys_chunk, id);
|
||||
uvm_parent_processor_mask_copy(&new_chunk->mapped_gpus, dma_map_mask);
|
||||
if (i == 0 && !logical_chunk) {
|
||||
dma_map_mask = &new_chunk->mapped_gpus;
|
||||
get_dma_map_mask(phys_chunk, dma_map_mask);
|
||||
}
|
||||
else {
|
||||
uvm_processor_mask_copy(&new_chunk->mapped_gpus, dma_map_mask);
|
||||
}
|
||||
for_each_id_in_mask(gpu_id, dma_map_mask)
|
||||
chunk_inc_gpu_mapping(phys_chunk, uvm_parent_gpu_id_from_gpu_id(gpu_id));
|
||||
new_chunks[i] = &new_chunk->common;
|
||||
}
|
||||
|
||||
// Release the references that are held by the chunk being split.
|
||||
for_each_parent_id_in_mask(id, dma_map_mask)
|
||||
chunk_dec_gpu_mapping(phys_chunk, id);
|
||||
for_each_id_in_mask(gpu_id, dma_map_mask)
|
||||
chunk_dec_gpu_mapping(phys_chunk, uvm_parent_gpu_id_from_gpu_id(gpu_id));
|
||||
|
||||
// If the chunk being split is a logical chunk clear it's mapped_gpus mask.
|
||||
if (uvm_cpu_chunk_is_logical(chunk))
|
||||
uvm_parent_processor_mask_zero(&logical_chunk->mapped_gpus);
|
||||
if (logical_chunk)
|
||||
uvm_processor_mask_zero(&logical_chunk->mapped_gpus);
|
||||
|
||||
uvm_mutex_unlock(&phys_chunk->lock);
|
||||
|
||||
@ -963,7 +1054,7 @@ static bool verify_merging_chunks(uvm_cpu_chunk_t **chunks, size_t num_chunks)
|
||||
{
|
||||
uvm_cpu_logical_chunk_t *logical_chunk;
|
||||
uvm_cpu_chunk_t *first_chunk_parent;
|
||||
uvm_parent_processor_mask_t *first_chunk_mapped_gpus;
|
||||
uvm_processor_mask_t *first_chunk_mapped_gpus;
|
||||
uvm_chunk_size_t first_chunk_size;
|
||||
size_t i;
|
||||
|
||||
@ -994,7 +1085,7 @@ static bool verify_merging_chunks(uvm_cpu_chunk_t **chunks, size_t num_chunks)
|
||||
// 2.1 All mappings to GPUs in each of child chunks' masks that are
|
||||
// not also present in the parent chunk's mask are destroyed.
|
||||
// 2.2 mapped_gpus mask of the parent chunk remains unmodified.
|
||||
UVM_ASSERT(uvm_parent_processor_mask_equal(&logical_chunk->mapped_gpus, first_chunk_mapped_gpus));
|
||||
UVM_ASSERT(uvm_processor_mask_equal(&logical_chunk->mapped_gpus, first_chunk_mapped_gpus));
|
||||
}
|
||||
|
||||
return true;
|
||||
@ -1005,14 +1096,14 @@ uvm_cpu_chunk_t *uvm_cpu_chunk_merge(uvm_cpu_chunk_t **chunks)
|
||||
uvm_cpu_chunk_t *parent;
|
||||
uvm_cpu_logical_chunk_t *logical_chunk;
|
||||
uvm_cpu_physical_chunk_t *phys_chunk;
|
||||
uvm_parent_processor_id_t id;
|
||||
uvm_processor_id_t gpu_id;
|
||||
uvm_chunk_size_t chunk_size;
|
||||
uvm_chunk_size_t parent_chunk_size;
|
||||
size_t num_merge_chunks;
|
||||
size_t i;
|
||||
|
||||
UVM_ASSERT(chunks);
|
||||
UVM_ASSERT(!uvm_cpu_chunk_is_physical(chunks[0]));
|
||||
UVM_ASSERT(uvm_cpu_chunk_is_logical(chunks[0]));
|
||||
|
||||
logical_chunk = uvm_cpu_chunk_to_logical(chunks[0]);
|
||||
parent = logical_chunk->parent;
|
||||
@ -1033,11 +1124,22 @@ uvm_cpu_chunk_t *uvm_cpu_chunk_merge(uvm_cpu_chunk_t **chunks)
|
||||
phys_chunk = get_physical_parent(chunks[0]);
|
||||
|
||||
uvm_mutex_lock(&phys_chunk->lock);
|
||||
for_each_parent_id_in_mask(id, &logical_chunk->mapped_gpus)
|
||||
chunk_inc_gpu_mapping(phys_chunk, id);
|
||||
|
||||
if (!uvm_cpu_chunk_is_physical(parent))
|
||||
uvm_parent_processor_mask_copy(&uvm_cpu_chunk_to_logical(parent)->mapped_gpus, &logical_chunk->mapped_gpus);
|
||||
for_each_id_in_mask(gpu_id, &logical_chunk->mapped_gpus)
|
||||
chunk_inc_gpu_mapping(phys_chunk, uvm_parent_gpu_id_from_gpu_id(gpu_id));
|
||||
|
||||
if (uvm_cpu_chunk_is_logical(parent)) {
|
||||
uvm_processor_mask_copy(&uvm_cpu_chunk_to_logical(parent)->mapped_gpus, &logical_chunk->mapped_gpus);
|
||||
}
|
||||
else {
|
||||
// Restore the mapping->sub_processors mask for each mapped GPU.
|
||||
for_each_id_in_mask(gpu_id, &logical_chunk->mapped_gpus) {
|
||||
uvm_cpu_phys_mapping_t *mapping = chunk_phys_mapping_get(phys_chunk, uvm_parent_gpu_id_from_gpu_id(gpu_id));
|
||||
|
||||
UVM_ASSERT(mapping);
|
||||
uvm_sub_processor_mask_set(&mapping->sub_processors, uvm_id_sub_processor_index(gpu_id));
|
||||
}
|
||||
}
|
||||
|
||||
uvm_mutex_unlock(&phys_chunk->lock);
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2017-2023 NVIDIA Corporation
|
||||
Copyright (c) 2017-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -246,8 +246,19 @@ struct uvm_cpu_chunk_struct
|
||||
|
||||
typedef struct
|
||||
{
|
||||
// Physical GPU DMA address of the CPU chunk.
|
||||
NvU64 dma_addr;
|
||||
|
||||
// Reference count of all sub_processors using this mapping across logical
|
||||
// and physical chunks.
|
||||
NvU32 map_count;
|
||||
|
||||
// Mask of MIG instances or physical GPU.
|
||||
// This is only valid for physical CPU chunks that have not been split into
|
||||
// logical chunks. When the chunk is split, all the
|
||||
// uvm_cpu_logical_chunk_t::mapped_gpus masks have a bit set for each
|
||||
// count in map_count and sub_processors is set to zero.
|
||||
uvm_sub_processor_mask_t sub_processors;
|
||||
} uvm_cpu_phys_mapping_t;
|
||||
|
||||
typedef struct
|
||||
@ -304,7 +315,9 @@ typedef struct
|
||||
|
||||
// Pointer to the parent chunk (which could also be a logical chunk).
|
||||
uvm_cpu_chunk_t *parent;
|
||||
uvm_parent_processor_mask_t mapped_gpus;
|
||||
|
||||
// This is a reference per bit but also recorded in mapping->map_count.
|
||||
uvm_processor_mask_t mapped_gpus;
|
||||
} uvm_cpu_logical_chunk_t;
|
||||
|
||||
// Return the set of allowed CPU chunk allocation sizes.
|
||||
@ -417,15 +430,15 @@ void uvm_cpu_chunk_free(uvm_cpu_chunk_t *chunk);
|
||||
// For more details see uvm_mmu_sysmem_map().
|
||||
NV_STATUS uvm_cpu_chunk_map_gpu(uvm_cpu_chunk_t *chunk, uvm_gpu_t *gpu);
|
||||
|
||||
// Destroy a CPU chunk's DMA mapping for the parent GPU.
|
||||
// Destroy a CPU chunk's DMA mapping for the given GPU.
|
||||
// If chunk is a logical chunk, this call may not necessarily destroy the DMA
|
||||
// mapping of the parent physical chunk since all logical chunks share the
|
||||
// parent's DMA mapping.
|
||||
void uvm_cpu_chunk_unmap_parent_gpu_phys(uvm_cpu_chunk_t *chunk, uvm_parent_gpu_t *parent_gpu);
|
||||
// mapping of the parent physical chunk since all logical chunks and MIG
|
||||
// partitions share the parent's DMA mapping.
|
||||
void uvm_cpu_chunk_unmap_gpu(uvm_cpu_chunk_t *chunk, uvm_gpu_t *gpu);
|
||||
|
||||
// Get the CPU chunk's DMA mapping address for the specified GPU ID.
|
||||
// If there is no mapping for the GPU, 0 is returned.
|
||||
NvU64 uvm_cpu_chunk_get_parent_gpu_phys_addr(uvm_cpu_chunk_t *chunk, uvm_parent_gpu_t *parent_gpu);
|
||||
NvU64 uvm_cpu_chunk_get_gpu_phys_addr(uvm_cpu_chunk_t *chunk, uvm_gpu_t *gpu);
|
||||
|
||||
// Split a CPU chunk into a set of CPU chunks of the next size down from the set
|
||||
// of enabled CPU chunk sizes.
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2017-2023 NVIDIA Corporation
|
||||
Copyright (c) 2017-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -626,7 +626,7 @@ static NV_STATUS test_cpu_chunk_mapping_access(uvm_cpu_chunk_t *chunk, uvm_gpu_t
|
||||
TEST_NV_CHECK_RET(cpu_chunk_map_on_cpu(chunk, (void **)&cpu_addr));
|
||||
memset(cpu_addr, 0, chunk_size);
|
||||
|
||||
dma_addr = uvm_cpu_chunk_get_parent_gpu_phys_addr(chunk, gpu->parent);
|
||||
dma_addr = uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu);
|
||||
gpu_addr = uvm_gpu_address_copy(gpu, uvm_gpu_phys_address(UVM_APERTURE_SYS, dma_addr));
|
||||
|
||||
TEST_NV_CHECK_GOTO(uvm_push_begin_acquire(gpu->channel_manager,
|
||||
@ -733,21 +733,21 @@ static NV_STATUS test_cpu_chunk_mapping_basic_verify(uvm_gpu_t *gpu,
|
||||
// - no GPU mapping address.
|
||||
TEST_CHECK_GOTO(phys_chunk->gpu_mappings.max_entries == 1, done);
|
||||
TEST_CHECK_GOTO(uvm_parent_processor_mask_get_gpu_count(&phys_chunk->gpu_mappings.dma_addrs_mask) == 0, done);
|
||||
TEST_CHECK_GOTO(uvm_cpu_chunk_get_parent_gpu_phys_addr(chunk, gpu->parent) == 0, done);
|
||||
TEST_CHECK_GOTO(uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu) == 0, done);
|
||||
TEST_NV_CHECK_GOTO(uvm_cpu_chunk_map_gpu(chunk, gpu), done);
|
||||
|
||||
// Test basic access.
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(chunk, gpu), done);
|
||||
|
||||
// Test double map is harmless.
|
||||
dma_addr = uvm_cpu_chunk_get_parent_gpu_phys_addr(chunk, gpu->parent);
|
||||
dma_addr = uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu);
|
||||
TEST_NV_CHECK_GOTO(uvm_cpu_chunk_map_gpu(chunk, gpu), done);
|
||||
TEST_CHECK_GOTO(uvm_cpu_chunk_get_parent_gpu_phys_addr(chunk, gpu->parent) == dma_addr, done);
|
||||
TEST_CHECK_GOTO(uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu) == dma_addr, done);
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(chunk, gpu), done);
|
||||
|
||||
// Test unmap, remap.
|
||||
uvm_cpu_chunk_unmap_parent_gpu_phys(chunk, gpu->parent);
|
||||
TEST_CHECK_GOTO(uvm_cpu_chunk_get_parent_gpu_phys_addr(chunk, gpu->parent) == 0, done);
|
||||
uvm_cpu_chunk_unmap_gpu(chunk, gpu);
|
||||
TEST_CHECK_GOTO(uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu) == 0, done);
|
||||
TEST_NV_CHECK_GOTO(uvm_cpu_chunk_map_gpu(chunk, gpu), done);
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(chunk, gpu), done);
|
||||
|
||||
@ -768,6 +768,39 @@ static NV_STATUS test_cpu_chunk_mapping_basic(uvm_gpu_t *gpu, uvm_cpu_chunk_allo
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
// TODO: Bug 4351121: This won't actually test anything until uvm_test
|
||||
// enumerates multiple MIG instances.
|
||||
static NV_STATUS test_cpu_chunk_mig(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_cpu_chunk_t *chunk;
|
||||
uvm_cpu_physical_chunk_t *phys_chunk;
|
||||
NvU64 dma_addr_gpu0;
|
||||
|
||||
UVM_ASSERT(gpu0->parent == gpu1->parent);
|
||||
|
||||
TEST_NV_CHECK_RET(test_cpu_chunk_alloc(PAGE_SIZE, UVM_CPU_CHUNK_ALLOC_FLAGS_NONE, NUMA_NO_NODE, &chunk));
|
||||
phys_chunk = uvm_cpu_chunk_to_physical(chunk);
|
||||
|
||||
TEST_NV_CHECK_GOTO(uvm_cpu_chunk_map_gpu(chunk, gpu0), done);
|
||||
TEST_NV_CHECK_GOTO(uvm_cpu_chunk_map_gpu(chunk, gpu1), done);
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(chunk, gpu0), done);
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(chunk, gpu1), done);
|
||||
|
||||
// MIG instances in the same physical GPU share the same DMA addresses.
|
||||
dma_addr_gpu0 = uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu0);
|
||||
TEST_CHECK_GOTO(uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu1) == dma_addr_gpu0, done);
|
||||
|
||||
// Unmapping one GPU shouldn't affect the other.
|
||||
uvm_cpu_chunk_unmap_gpu(chunk, gpu0);
|
||||
TEST_CHECK_GOTO(uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu0) == 0, done);
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(chunk, gpu1), done);
|
||||
|
||||
done:
|
||||
uvm_cpu_chunk_free(chunk);
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS test_cpu_chunk_mapping_array(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1, uvm_gpu_t *gpu2)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
@ -783,8 +816,8 @@ static NV_STATUS test_cpu_chunk_mapping_array(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1,
|
||||
TEST_NV_CHECK_GOTO(uvm_cpu_chunk_map_gpu(chunk, gpu2), done);
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(chunk, gpu1), done);
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(chunk, gpu2), done);
|
||||
dma_addr_gpu1 = uvm_cpu_chunk_get_parent_gpu_phys_addr(chunk, gpu1->parent);
|
||||
uvm_cpu_chunk_unmap_parent_gpu_phys(chunk, gpu2->parent);
|
||||
dma_addr_gpu1 = uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu1);
|
||||
uvm_cpu_chunk_unmap_gpu(chunk, gpu2);
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(chunk, gpu1), done);
|
||||
TEST_NV_CHECK_GOTO(uvm_cpu_chunk_map_gpu(chunk, gpu0), done);
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(chunk, gpu0), done);
|
||||
@ -798,7 +831,9 @@ static NV_STATUS test_cpu_chunk_mapping_array(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1,
|
||||
// GPU1. It's true that we may get a false negative if both addresses
|
||||
// happened to alias and we had a bug in how the addresses are shifted in
|
||||
// the dense array, but that's better than intermittent failure.
|
||||
TEST_CHECK_GOTO(uvm_cpu_chunk_get_parent_gpu_phys_addr(chunk, gpu1->parent) == dma_addr_gpu1, done);
|
||||
// Also note that multiple MIG instances in the same physical GPU share the
|
||||
// parent's physical DMA mapping.
|
||||
TEST_CHECK_GOTO(uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu1) == dma_addr_gpu1, done);
|
||||
|
||||
done:
|
||||
uvm_cpu_chunk_free(chunk);
|
||||
@ -828,7 +863,7 @@ static NV_STATUS do_test_cpu_chunk_split_and_merge(uvm_cpu_chunk_t *chunk, uvm_g
|
||||
|
||||
TEST_NV_CHECK_GOTO(uvm_cpu_chunk_map_gpu(chunk, gpu), done_free);
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(chunk, gpu), done_free);
|
||||
uvm_cpu_chunk_unmap_parent_gpu_phys(chunk, gpu->parent);
|
||||
uvm_cpu_chunk_unmap_gpu(chunk, gpu);
|
||||
|
||||
TEST_NV_CHECK_GOTO(uvm_cpu_chunk_split(chunk, split_chunks), done_free);
|
||||
TEST_CHECK_GOTO(nv_kref_read(&chunk->refcount) == num_split_chunks, done);
|
||||
@ -845,13 +880,14 @@ static NV_STATUS do_test_cpu_chunk_split_and_merge(uvm_cpu_chunk_t *chunk, uvm_g
|
||||
merged_chunk = uvm_cpu_chunk_merge(split_chunks);
|
||||
TEST_CHECK_GOTO(uvm_cpu_chunk_get_size(merged_chunk) == size, done_free);
|
||||
TEST_CHECK_GOTO(merged_chunk == chunk, done_free);
|
||||
TEST_CHECK_GOTO(nv_kref_read(&chunk->refcount) == 1, done_free);
|
||||
|
||||
// Since all logical chunks were mapped, the entire merged chunk should
|
||||
// be accessible without needing to map it.
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(merged_chunk, gpu), done_free);
|
||||
|
||||
// Test that GPU mappings are transferred after a split
|
||||
phys_dma_addr = uvm_cpu_chunk_get_parent_gpu_phys_addr(chunk, gpu->parent);
|
||||
phys_dma_addr = uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu);
|
||||
|
||||
TEST_NV_CHECK_GOTO(uvm_cpu_chunk_split(chunk, split_chunks), done_free);
|
||||
|
||||
@ -859,9 +895,9 @@ static NV_STATUS do_test_cpu_chunk_split_and_merge(uvm_cpu_chunk_t *chunk, uvm_g
|
||||
NvU64 dma_addr;
|
||||
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(split_chunks[i], gpu), done);
|
||||
dma_addr = uvm_cpu_chunk_get_parent_gpu_phys_addr(split_chunks[i], gpu->parent);
|
||||
dma_addr = uvm_cpu_chunk_get_gpu_phys_addr(split_chunks[i], gpu);
|
||||
TEST_CHECK_GOTO(dma_addr == phys_dma_addr + (i * split_size), done);
|
||||
uvm_cpu_chunk_unmap_parent_gpu_phys(split_chunks[i], gpu->parent);
|
||||
uvm_cpu_chunk_unmap_gpu(split_chunks[i], gpu);
|
||||
}
|
||||
|
||||
// Test that mapping one logical chunk does not affect others.
|
||||
@ -871,7 +907,7 @@ static NV_STATUS do_test_cpu_chunk_split_and_merge(uvm_cpu_chunk_t *chunk, uvm_g
|
||||
|
||||
for (i = 0; i < num_split_chunks; i++) {
|
||||
if (i != map_chunk)
|
||||
TEST_CHECK_GOTO(uvm_cpu_chunk_get_parent_gpu_phys_addr(split_chunks[i], gpu->parent) == 0, done);
|
||||
TEST_CHECK_GOTO(uvm_cpu_chunk_get_gpu_phys_addr(split_chunks[i], gpu) == 0, done);
|
||||
}
|
||||
|
||||
if (split_size > PAGE_SIZE) {
|
||||
@ -927,6 +963,118 @@ static NV_STATUS test_cpu_chunk_split_and_merge(uvm_gpu_t *gpu)
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS do_test_cpu_chunk_split_and_merge_2(uvm_cpu_chunk_t *chunk, uvm_gpu_t *gpu0, uvm_gpu_t *gpu1)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_chunk_size_t size = uvm_cpu_chunk_get_size(chunk);
|
||||
uvm_chunk_sizes_mask_t alloc_sizes = uvm_cpu_chunk_get_allocation_sizes();
|
||||
size_t num_split_chunks;
|
||||
uvm_cpu_chunk_t **split_chunks;
|
||||
uvm_cpu_chunk_t *merged_chunk;
|
||||
uvm_chunk_size_t split_size;
|
||||
size_t i;
|
||||
|
||||
split_size = uvm_chunk_find_prev_size(alloc_sizes, size);
|
||||
UVM_ASSERT(split_size != UVM_CHUNK_SIZE_INVALID);
|
||||
num_split_chunks = size / split_size;
|
||||
split_chunks = uvm_kvmalloc_zero(num_split_chunks * sizeof(*split_chunks));
|
||||
|
||||
if (!split_chunks)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
// Map both GPUs.
|
||||
TEST_NV_CHECK_GOTO(uvm_cpu_chunk_map_gpu(chunk, gpu0), done_free);
|
||||
TEST_NV_CHECK_GOTO(uvm_cpu_chunk_map_gpu(chunk, gpu1), done_free);
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(chunk, gpu0), done_free);
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(chunk, gpu1), done_free);
|
||||
|
||||
// Then split.
|
||||
TEST_NV_CHECK_GOTO(uvm_cpu_chunk_split(chunk, split_chunks), done_free);
|
||||
TEST_CHECK_GOTO(nv_kref_read(&chunk->refcount) == num_split_chunks, done);
|
||||
|
||||
// Unmap gpu0 from all split chunks.
|
||||
for (i = 0; i < num_split_chunks; i++) {
|
||||
TEST_CHECK_GOTO(split_chunks[i], done);
|
||||
TEST_CHECK_GOTO(uvm_cpu_chunk_is_logical(split_chunks[i]), done);
|
||||
TEST_CHECK_GOTO(uvm_cpu_chunk_get_size(split_chunks[i]) == split_size, done);
|
||||
uvm_cpu_chunk_unmap_gpu(split_chunks[i], gpu0);
|
||||
TEST_CHECK_GOTO(uvm_cpu_chunk_get_gpu_phys_addr(split_chunks[i], gpu0) == 0, done);
|
||||
|
||||
// Test that gpu1 still has access.
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(split_chunks[i], gpu1), done);
|
||||
}
|
||||
|
||||
// Test CPU chunk merging.
|
||||
merged_chunk = uvm_cpu_chunk_merge(split_chunks);
|
||||
TEST_CHECK_GOTO(uvm_cpu_chunk_get_size(merged_chunk) == size, done_free);
|
||||
TEST_CHECK_GOTO(merged_chunk == chunk, done_free);
|
||||
TEST_CHECK_GOTO(nv_kref_read(&chunk->refcount) == 1, done_free);
|
||||
|
||||
// Since all logical chunks were mapped, the entire merged chunk should
|
||||
// be accessible without needing to map it.
|
||||
TEST_CHECK_GOTO(uvm_cpu_chunk_get_gpu_phys_addr(merged_chunk, gpu0) == 0, done_free);
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(merged_chunk, gpu1), done_free);
|
||||
|
||||
// Unmap gpu1 so we start with a fully unmapped physical chunk.
|
||||
uvm_cpu_chunk_unmap_gpu(chunk, gpu1);
|
||||
|
||||
// Split the physical chunk.
|
||||
TEST_NV_CHECK_GOTO(uvm_cpu_chunk_split(chunk, split_chunks), done_free);
|
||||
|
||||
// Now map everything.
|
||||
for (i = 0; i < num_split_chunks; i++) {
|
||||
TEST_NV_CHECK_GOTO(uvm_cpu_chunk_map_gpu(split_chunks[i], gpu0), done);
|
||||
TEST_NV_CHECK_GOTO(uvm_cpu_chunk_map_gpu(split_chunks[i], gpu1), done);
|
||||
}
|
||||
|
||||
// Test CPU chunk merging with everything mapped.
|
||||
merged_chunk = uvm_cpu_chunk_merge(split_chunks);
|
||||
|
||||
// At this point, all split chunks have been merged.
|
||||
num_split_chunks = 0;
|
||||
|
||||
TEST_CHECK_GOTO(uvm_cpu_chunk_get_size(merged_chunk) == size, done_free);
|
||||
TEST_CHECK_GOTO(merged_chunk == chunk, done_free);
|
||||
|
||||
// Since all logical chunks were mapped, the entire merged chunk should
|
||||
// be accessible without needing to map it.
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(merged_chunk, gpu0), done_free);
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(merged_chunk, gpu1), done_free);
|
||||
|
||||
done:
|
||||
for (i = 0; i < num_split_chunks; i++)
|
||||
uvm_cpu_chunk_free(split_chunks[i]);
|
||||
|
||||
done_free:
|
||||
uvm_kvfree(split_chunks);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS test_cpu_chunk_split_and_merge_2(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1)
|
||||
{
|
||||
uvm_chunk_sizes_mask_t alloc_sizes = uvm_cpu_chunk_get_allocation_sizes();
|
||||
uvm_chunk_size_t size;
|
||||
|
||||
size = uvm_chunk_find_next_size(alloc_sizes, PAGE_SIZE);
|
||||
for_each_chunk_size_from(size, alloc_sizes) {
|
||||
uvm_cpu_chunk_t *chunk;
|
||||
NV_STATUS status;
|
||||
|
||||
// It is possible that the allocation fails due to lack of large pages
|
||||
// rather than an API issue, which will result in a false negative.
|
||||
// However, that should be very rare.
|
||||
TEST_NV_CHECK_RET(test_cpu_chunk_alloc(size, UVM_CPU_CHUNK_ALLOC_FLAGS_NONE, NUMA_NO_NODE, &chunk));
|
||||
status = do_test_cpu_chunk_split_and_merge_2(chunk, gpu0, gpu1);
|
||||
uvm_cpu_chunk_free(chunk);
|
||||
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS test_cpu_chunk_dirty_split(uvm_cpu_chunk_t *chunk)
|
||||
{
|
||||
uvm_chunk_size_t size = uvm_cpu_chunk_get_size(chunk);
|
||||
@ -1072,7 +1220,9 @@ done:
|
||||
return status;
|
||||
}
|
||||
|
||||
NV_STATUS do_test_cpu_chunk_free(uvm_cpu_chunk_t *chunk, uvm_va_space_t *va_space, uvm_processor_mask_t *test_gpus)
|
||||
NV_STATUS do_test_cpu_chunk_free(uvm_cpu_chunk_t *chunk,
|
||||
uvm_va_space_t *va_space,
|
||||
const uvm_processor_mask_t *test_gpus)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_cpu_chunk_t **split_chunks;
|
||||
@ -1099,7 +1249,7 @@ NV_STATUS do_test_cpu_chunk_free(uvm_cpu_chunk_t *chunk, uvm_va_space_t *va_spac
|
||||
chunk = NULL;
|
||||
|
||||
// Map every other chunk.
|
||||
// The call to uvm_cpu_chunk_unmap_parent_gpu_phys() is here in case this
|
||||
// The call to uvm_cpu_chunk_unmap_gpu() is here in case this
|
||||
// is part of a double split (see below). In that case, the parent chunk
|
||||
// would be either mapped or unmapped.
|
||||
//
|
||||
@ -1111,7 +1261,7 @@ NV_STATUS do_test_cpu_chunk_free(uvm_cpu_chunk_t *chunk, uvm_va_space_t *va_spac
|
||||
if (i & (1 << uvm_id_gpu_index(gpu->id)))
|
||||
TEST_NV_CHECK_GOTO(uvm_cpu_chunk_map_gpu(split_chunks[i], gpu), done);
|
||||
else
|
||||
uvm_cpu_chunk_unmap_parent_gpu_phys(split_chunks[i], gpu->parent);
|
||||
uvm_cpu_chunk_unmap_gpu(split_chunks[i], gpu);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1147,9 +1297,9 @@ NV_STATUS do_test_cpu_chunk_free(uvm_cpu_chunk_t *chunk, uvm_va_space_t *va_spac
|
||||
TEST_CHECK_GOTO(uvm_cpu_chunk_get_size(split_chunks[j]) == split_size, done);
|
||||
for_each_va_space_gpu_in_mask(gpu, va_space, test_gpus) {
|
||||
if (j & (1 << uvm_id_gpu_index(gpu->id)))
|
||||
TEST_CHECK_GOTO(uvm_cpu_chunk_get_parent_gpu_phys_addr(split_chunks[j], gpu->parent), done);
|
||||
TEST_CHECK_GOTO(uvm_cpu_chunk_get_gpu_phys_addr(split_chunks[j], gpu), done);
|
||||
else
|
||||
TEST_CHECK_GOTO(!uvm_cpu_chunk_get_parent_gpu_phys_addr(split_chunks[j], gpu->parent), done);
|
||||
TEST_CHECK_GOTO(!uvm_cpu_chunk_get_gpu_phys_addr(split_chunks[j], gpu), done);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1168,7 +1318,8 @@ done_free:
|
||||
return status;
|
||||
}
|
||||
|
||||
NV_STATUS test_cpu_chunk_free(uvm_va_space_t *va_space, uvm_processor_mask_t *test_gpus)
|
||||
NV_STATUS test_cpu_chunk_free(uvm_va_space_t *va_space,
|
||||
const uvm_processor_mask_t *test_gpus)
|
||||
{
|
||||
uvm_cpu_chunk_t *chunk;
|
||||
uvm_chunk_sizes_mask_t alloc_sizes = uvm_cpu_chunk_get_allocation_sizes();
|
||||
@ -1204,6 +1355,50 @@ static NV_STATUS test_cpu_chunk_numa_alloc(uvm_va_space_t *va_space)
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static uvm_gpu_t *find_first_parent_gpu(const uvm_processor_mask_t *test_gpus,
|
||||
uvm_va_space_t *va_space)
|
||||
{
|
||||
return uvm_processor_mask_find_first_va_space_gpu(test_gpus, va_space);
|
||||
}
|
||||
|
||||
static uvm_gpu_t *find_next_parent_gpu(const uvm_processor_mask_t *test_gpus,
|
||||
uvm_va_space_t *va_space,
|
||||
uvm_gpu_t *gpu)
|
||||
{
|
||||
uvm_gpu_t *next_gpu = gpu;
|
||||
|
||||
while (next_gpu) {
|
||||
next_gpu = uvm_processor_mask_find_next_va_space_gpu(test_gpus, va_space, next_gpu);
|
||||
if (!next_gpu || next_gpu->parent != gpu->parent)
|
||||
break;
|
||||
}
|
||||
|
||||
return next_gpu;
|
||||
}
|
||||
|
||||
static void find_shared_gpu_pair(const uvm_processor_mask_t *test_gpus,
|
||||
uvm_va_space_t *va_space,
|
||||
uvm_gpu_t **out_gpu0,
|
||||
uvm_gpu_t **out_gpu1)
|
||||
{
|
||||
uvm_gpu_t *gpu0 = uvm_processor_mask_find_first_va_space_gpu(test_gpus, va_space);
|
||||
uvm_gpu_t *gpu1 = uvm_processor_mask_find_next_va_space_gpu(test_gpus, va_space, gpu0);
|
||||
|
||||
while (gpu1) {
|
||||
if (gpu0->parent == gpu1->parent) {
|
||||
*out_gpu0 = gpu0;
|
||||
*out_gpu1 = gpu1;
|
||||
return;
|
||||
}
|
||||
|
||||
gpu0 = gpu1;
|
||||
gpu1 = uvm_processor_mask_find_next_va_space_gpu(test_gpus, va_space, gpu0);
|
||||
}
|
||||
|
||||
*out_gpu0 = NULL;
|
||||
*out_gpu1 = NULL;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_test_cpu_chunk_api(UVM_TEST_CPU_CHUNK_API_PARAMS *params, struct file *filp)
|
||||
{
|
||||
uvm_va_space_t *va_space = uvm_va_space_get(filp);
|
||||
@ -1228,13 +1423,29 @@ NV_STATUS uvm_test_cpu_chunk_api(UVM_TEST_CPU_CHUNK_API_PARAMS *params, struct f
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_free(va_space, test_gpus), done);
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_numa_alloc(va_space), done);
|
||||
|
||||
if (uvm_processor_mask_get_gpu_count(test_gpus) >= 3) {
|
||||
uvm_gpu_t *gpu2, *gpu3;
|
||||
if (uvm_processor_mask_get_gpu_count(test_gpus) >= 2) {
|
||||
uvm_gpu_t *gpu2, *gpu3 = NULL;
|
||||
|
||||
gpu = uvm_processor_mask_find_first_va_space_gpu(test_gpus, va_space);
|
||||
gpu2 = uvm_processor_mask_find_next_va_space_gpu(test_gpus, va_space, gpu);
|
||||
gpu3 = uvm_processor_mask_find_next_va_space_gpu(test_gpus, va_space, gpu2);
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_array(gpu, gpu2, gpu3), done);
|
||||
// Look for a pair of GPUs that don't share a common parent.
|
||||
gpu = find_first_parent_gpu(test_gpus, va_space);
|
||||
gpu2 = find_next_parent_gpu(test_gpus, va_space, gpu);
|
||||
if (gpu2) {
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_split_and_merge_2(gpu, gpu2), done);
|
||||
|
||||
// Look for a third physical GPU.
|
||||
gpu3 = find_next_parent_gpu(test_gpus, va_space, gpu2);
|
||||
|
||||
if (gpu3)
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_array(gpu, gpu2, gpu3), done);
|
||||
}
|
||||
|
||||
// Look for a pair of GPUs that share a common parent.
|
||||
find_shared_gpu_pair(test_gpus, va_space, &gpu, &gpu2);
|
||||
if (gpu) {
|
||||
// Test MIG instances within the same parent GPU.
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_split_and_merge_2(gpu, gpu2), done);
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_mig(gpu, gpu2), done);
|
||||
}
|
||||
}
|
||||
|
||||
done:
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2023 NVIDIA Corporation
|
||||
Copyright (c) 2023-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -30,6 +30,8 @@ const uvm_processor_mask_t g_uvm_processor_mask_empty = { };
|
||||
|
||||
NV_STATUS uvm_processor_mask_cache_init(void)
|
||||
{
|
||||
BUILD_BUG_ON((8 * sizeof(((uvm_sub_processor_mask_t *)0)->bitmap)) < UVM_PARENT_ID_MAX_SUB_PROCESSORS);
|
||||
|
||||
g_uvm_processor_mask_cache = NV_KMEM_CACHE_CREATE("uvm_processor_mask_t", uvm_processor_mask_t);
|
||||
if (!g_uvm_processor_mask_cache)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2016-2023 NVIDIA Corporation
|
||||
Copyright (c) 2016-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -277,8 +277,6 @@ typedef uvm_processor_id_t uvm_gpu_id_t;
|
||||
#define UVM_PARENT_ID_MAX_GPUS NV_MAX_DEVICES
|
||||
#define UVM_PARENT_ID_MAX_PROCESSORS (UVM_PARENT_ID_MAX_GPUS + 1)
|
||||
|
||||
#define UVM_PARENT_ID_MAX_SUB_PROCESSORS 8
|
||||
|
||||
#define UVM_ID_MAX_GPUS (UVM_PARENT_ID_MAX_GPUS * UVM_PARENT_ID_MAX_SUB_PROCESSORS)
|
||||
#define UVM_ID_MAX_PROCESSORS (UVM_ID_MAX_GPUS + 1)
|
||||
#define UVM_MAX_UNIQUE_GPU_PAIRS SUM_FROM_0_TO_N(UVM_ID_MAX_GPUS - 1)
|
||||
@ -292,6 +290,9 @@ typedef uvm_processor_id_t uvm_gpu_id_t;
|
||||
|
||||
#define UVM_ID_CHECK_BOUNDS(id) UVM_ASSERT_MSG(id.val <= UVM_ID_MAX_PROCESSORS, "id %u\n", id.val)
|
||||
|
||||
#define UVM_SUB_PROCESSOR_INDEX_CHECK_BOUNDS(sub_index) \
|
||||
UVM_ASSERT_MSG((sub_index) < UVM_PARENT_ID_MAX_SUB_PROCESSORS, "sub_index %u\n", (sub_index))
|
||||
|
||||
static int uvm_parent_id_cmp(uvm_parent_processor_id_t id1, uvm_parent_processor_id_t id2)
|
||||
{
|
||||
UVM_PARENT_ID_CHECK_BOUNDS(id1);
|
||||
@ -493,11 +494,16 @@ static uvm_gpu_id_t uvm_gpu_id_from_parent_gpu_id(const uvm_parent_gpu_id_t id)
|
||||
static uvm_gpu_id_t uvm_gpu_id_from_sub_processor_index(NvU32 index, NvU32 sub_index)
|
||||
{
|
||||
UVM_ASSERT(index < UVM_PARENT_ID_MAX_GPUS);
|
||||
UVM_ASSERT(sub_index < UVM_PARENT_ID_MAX_SUB_PROCESSORS);
|
||||
UVM_SUB_PROCESSOR_INDEX_CHECK_BOUNDS(sub_index);
|
||||
|
||||
return uvm_gpu_id_from_index(index * UVM_PARENT_ID_MAX_SUB_PROCESSORS + sub_index);
|
||||
}
|
||||
|
||||
static uvm_gpu_id_t uvm_gpu_id_from_sub_processor(uvm_parent_gpu_id_t id, NvU32 sub_index)
|
||||
{
|
||||
return uvm_gpu_id_from_sub_processor_index(uvm_parent_id_gpu_index(id), sub_index);
|
||||
}
|
||||
|
||||
static uvm_parent_gpu_id_t uvm_parent_gpu_id_from_gpu_id(const uvm_gpu_id_t id)
|
||||
{
|
||||
UVM_ASSERT(UVM_ID_IS_GPU(id));
|
||||
@ -525,6 +531,71 @@ UVM_PROCESSOR_MASK(uvm_processor_mask_t, \
|
||||
extern const uvm_processor_mask_t g_uvm_processor_mask_cpu;
|
||||
extern const uvm_processor_mask_t g_uvm_processor_mask_empty;
|
||||
|
||||
// This is similar to uvm_parent_processor_mask_t and uvm_processor_mask_t
|
||||
// but defined as a NvU8 in order to save memory since DECLARE_BITMAP() uses
|
||||
// unsigned long. It also means we need to define our own bitops.
|
||||
// Note that these are not atomic operations.
|
||||
typedef struct
|
||||
{
|
||||
NvU8 bitmap;
|
||||
} uvm_sub_processor_mask_t;
|
||||
|
||||
static bool uvm_sub_processor_mask_test(const uvm_sub_processor_mask_t *mask, NvU32 sub_index)
|
||||
{
|
||||
UVM_SUB_PROCESSOR_INDEX_CHECK_BOUNDS(sub_index);
|
||||
|
||||
return mask->bitmap & (1 << sub_index);
|
||||
}
|
||||
|
||||
static void uvm_sub_processor_mask_set(uvm_sub_processor_mask_t *mask, NvU32 sub_index)
|
||||
{
|
||||
UVM_SUB_PROCESSOR_INDEX_CHECK_BOUNDS(sub_index);
|
||||
|
||||
mask->bitmap |= 1 << sub_index;
|
||||
}
|
||||
|
||||
static void uvm_sub_processor_mask_clear(uvm_sub_processor_mask_t *mask, NvU32 sub_index)
|
||||
{
|
||||
UVM_SUB_PROCESSOR_INDEX_CHECK_BOUNDS(sub_index);
|
||||
|
||||
mask->bitmap &= ~(1 << sub_index);
|
||||
}
|
||||
|
||||
static bool uvm_sub_processor_mask_test_and_set(uvm_sub_processor_mask_t *mask, NvU32 sub_index)
|
||||
{
|
||||
bool result = uvm_sub_processor_mask_test(mask, sub_index);
|
||||
|
||||
if (!result)
|
||||
uvm_sub_processor_mask_set(mask, sub_index);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static bool uvm_sub_processor_mask_test_and_clear(uvm_sub_processor_mask_t *mask, NvU32 sub_index)
|
||||
{
|
||||
bool result = uvm_sub_processor_mask_test(mask, sub_index);
|
||||
|
||||
if (result)
|
||||
uvm_sub_processor_mask_clear(mask, sub_index);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static void uvm_sub_processor_mask_zero(uvm_sub_processor_mask_t *mask)
|
||||
{
|
||||
mask->bitmap = 0;
|
||||
}
|
||||
|
||||
static bool uvm_sub_processor_mask_empty(const uvm_sub_processor_mask_t *mask)
|
||||
{
|
||||
return mask->bitmap == 0;
|
||||
}
|
||||
|
||||
static NvU32 uvm_sub_processor_mask_get_count(const uvm_sub_processor_mask_t *mask)
|
||||
{
|
||||
return hweight8(mask->bitmap);
|
||||
}
|
||||
|
||||
// Like uvm_processor_mask_subset() but ignores the CPU in the subset mask.
|
||||
// Returns whether the GPUs in subset are a subset of the GPUs in mask.
|
||||
bool uvm_processor_mask_gpu_subset(const uvm_processor_mask_t *subset,
|
||||
@ -571,8 +642,28 @@ void uvm_parent_gpus_from_processor_mask(uvm_parent_processor_mask_t *parent_mas
|
||||
i = uvm_gpu_id_next(i))
|
||||
|
||||
// Helper to iterate over all sub processor indexes.
|
||||
#define for_each_sub_processor_index(i) \
|
||||
for (i = 0; i < UVM_PARENT_ID_MAX_SUB_PROCESSORS; i++)
|
||||
#define for_each_sub_processor_index(sub_index) \
|
||||
for ((sub_index) = 0; (sub_index) < UVM_PARENT_ID_MAX_SUB_PROCESSORS; (sub_index)++)
|
||||
|
||||
static NvU32 uvm_sub_processor_mask_find_first_index(const uvm_sub_processor_mask_t *mask)
|
||||
{
|
||||
unsigned long bitmap = mask->bitmap;
|
||||
|
||||
return find_first_bit(&bitmap, UVM_PARENT_ID_MAX_SUB_PROCESSORS);
|
||||
}
|
||||
|
||||
static NvU32 uvm_sub_processor_mask_find_next_index(const uvm_sub_processor_mask_t *mask, NvU32 min_index)
|
||||
{
|
||||
unsigned long bitmap = mask->bitmap;
|
||||
|
||||
return find_next_bit(&bitmap, UVM_PARENT_ID_MAX_SUB_PROCESSORS, min_index);
|
||||
}
|
||||
|
||||
// Helper to iterate over all sub processor indexes in a given mask.
|
||||
#define for_each_sub_processor_index_in_mask(sub_index, sub_mask) \
|
||||
for ((sub_index) = uvm_sub_processor_mask_find_first_index((sub_mask)); \
|
||||
(sub_index) < UVM_PARENT_ID_MAX_SUB_PROCESSORS; \
|
||||
(sub_index) = uvm_sub_processor_mask_find_next_index((sub_mask), (sub_index) + 1))
|
||||
|
||||
// Helper to iterate over all valid processor ids.
|
||||
#define for_each_id(i) for (i = UVM_ID_CPU; UVM_ID_IS_VALID(i); i = uvm_id_next(i))
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2022 NVidia Corporation
|
||||
Copyright (c) 2015-2024 NVidia Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -191,7 +191,7 @@ typedef struct
|
||||
NvU32 read_duplication; // Out (UVM_TEST_READ_DUPLICATION_POLICY)
|
||||
NvProcessorUuid preferred_location; // Out
|
||||
NvS32 preferred_cpu_nid; // Out
|
||||
NvProcessorUuid accessed_by[UVM_MAX_PROCESSORS_V2]; // Out
|
||||
NvProcessorUuid accessed_by[UVM_MAX_PROCESSORS]; // Out
|
||||
NvU32 accessed_by_count; // Out
|
||||
NvU32 type; // Out (UVM_TEST_VA_RANGE_TYPE)
|
||||
union
|
||||
@ -624,7 +624,7 @@ typedef struct
|
||||
|
||||
// Array of processors which have a resident copy of the page containing
|
||||
// lookup_address.
|
||||
NvProcessorUuid resident_on[UVM_MAX_PROCESSORS_V2]; // Out
|
||||
NvProcessorUuid resident_on[UVM_MAX_PROCESSORS]; // Out
|
||||
NvU32 resident_on_count; // Out
|
||||
|
||||
// If the memory is resident on the CPU, the NUMA node on which the page
|
||||
@ -635,24 +635,24 @@ typedef struct
|
||||
// system-page-sized portion of this allocation which contains
|
||||
// lookup_address is guaranteed to be resident on the corresponding
|
||||
// processor.
|
||||
NvU32 resident_physical_size[UVM_MAX_PROCESSORS_V2]; // Out
|
||||
NvU32 resident_physical_size[UVM_MAX_PROCESSORS]; // Out
|
||||
|
||||
// The physical address of the physical allocation backing lookup_address.
|
||||
NvU64 resident_physical_address[UVM_MAX_PROCESSORS_V2] NV_ALIGN_BYTES(8); // Out
|
||||
NvU64 resident_physical_address[UVM_MAX_PROCESSORS] NV_ALIGN_BYTES(8); // Out
|
||||
|
||||
// Array of processors which have a virtual mapping covering lookup_address.
|
||||
NvProcessorUuid mapped_on[UVM_MAX_PROCESSORS_V2]; // Out
|
||||
NvU32 mapping_type[UVM_MAX_PROCESSORS_V2]; // Out
|
||||
NvU64 mapping_physical_address[UVM_MAX_PROCESSORS_V2] NV_ALIGN_BYTES(8); // Out
|
||||
NvProcessorUuid mapped_on[UVM_MAX_PROCESSORS]; // Out
|
||||
NvU32 mapping_type[UVM_MAX_PROCESSORS]; // Out
|
||||
NvU64 mapping_physical_address[UVM_MAX_PROCESSORS] NV_ALIGN_BYTES(8); // Out
|
||||
NvU32 mapped_on_count; // Out
|
||||
|
||||
// The size of the virtual mapping covering lookup_address on each
|
||||
// mapped_on processor.
|
||||
NvU32 page_size[UVM_MAX_PROCESSORS_V2]; // Out
|
||||
NvU32 page_size[UVM_MAX_PROCESSORS]; // Out
|
||||
|
||||
// Array of processors which have physical memory populated that would back
|
||||
// lookup_address if it was resident.
|
||||
NvProcessorUuid populated_on[UVM_MAX_PROCESSORS_V2]; // Out
|
||||
NvProcessorUuid populated_on[UVM_MAX_PROCESSORS]; // Out
|
||||
NvU32 populated_on_count; // Out
|
||||
|
||||
NV_STATUS rmStatus; // Out
|
||||
|
@ -30,18 +30,18 @@ void uvm_tlb_batch_begin(uvm_page_tree_t *tree, uvm_tlb_batch_t *batch)
|
||||
batch->tree = tree;
|
||||
}
|
||||
|
||||
static NvU32 smallest_page_size(NvU32 page_sizes)
|
||||
static NvU64 smallest_page_size(NvU64 page_sizes)
|
||||
{
|
||||
UVM_ASSERT(page_sizes != 0);
|
||||
|
||||
return 1u << __ffs(page_sizes);
|
||||
return 1ULL << __ffs(page_sizes);
|
||||
}
|
||||
|
||||
static NvU32 biggest_page_size(NvU32 page_sizes)
|
||||
static NvU64 biggest_page_size(NvU64 page_sizes)
|
||||
{
|
||||
UVM_ASSERT(page_sizes != 0);
|
||||
|
||||
return 1u << __fls(page_sizes);
|
||||
return 1ULL << __fls(page_sizes);
|
||||
}
|
||||
|
||||
static void tlb_batch_flush_invalidate_per_va(uvm_tlb_batch_t *batch, uvm_push_t *push)
|
||||
@ -53,8 +53,8 @@ static void tlb_batch_flush_invalidate_per_va(uvm_tlb_batch_t *batch, uvm_push_t
|
||||
|
||||
for (i = 0; i < batch->count; ++i) {
|
||||
uvm_tlb_batch_range_t *entry = &batch->ranges[i];
|
||||
NvU32 min_page_size = smallest_page_size(entry->page_sizes);
|
||||
NvU32 max_page_size = biggest_page_size(entry->page_sizes);
|
||||
NvU64 min_page_size = smallest_page_size(entry->page_sizes);
|
||||
NvU64 max_page_size = biggest_page_size(entry->page_sizes);
|
||||
|
||||
// Use the depth of the max page size as it's the broadest
|
||||
NvU32 depth = tree->hal->page_table_depth(max_page_size);
|
||||
@ -113,7 +113,7 @@ void uvm_tlb_batch_end(uvm_tlb_batch_t *batch, uvm_push_t *push, uvm_membar_t tl
|
||||
tlb_batch_flush_invalidate_per_va(batch, push);
|
||||
}
|
||||
|
||||
void uvm_tlb_batch_invalidate(uvm_tlb_batch_t *batch, NvU64 start, NvU64 size, NvU32 page_sizes, uvm_membar_t tlb_membar)
|
||||
void uvm_tlb_batch_invalidate(uvm_tlb_batch_t *batch, NvU64 start, NvU64 size, NvU64 page_sizes, uvm_membar_t tlb_membar)
|
||||
{
|
||||
uvm_tlb_batch_range_t *new_entry;
|
||||
|
||||
|
@ -41,7 +41,7 @@ typedef struct
|
||||
NvU64 size;
|
||||
|
||||
// Min and max page size ored together
|
||||
NvU32 page_sizes;
|
||||
NvU64 page_sizes;
|
||||
} uvm_tlb_batch_range_t;
|
||||
|
||||
struct uvm_tlb_batch_struct
|
||||
@ -63,7 +63,7 @@ struct uvm_tlb_batch_struct
|
||||
NvU32 count;
|
||||
|
||||
// Biggest page size across all queued up invalidates
|
||||
NvU32 biggest_page_size;
|
||||
NvU64 biggest_page_size;
|
||||
|
||||
// Max membar across all queued up invalidates
|
||||
uvm_membar_t membar;
|
||||
@ -81,7 +81,7 @@ void uvm_tlb_batch_begin(uvm_page_tree_t *tree, uvm_tlb_batch_t *batch);
|
||||
// If the membar parameter is not UVM_MEMBAR_NONE, the specified membar will
|
||||
// be performed logically after the TLB invalidate such that all physical memory
|
||||
// accesses using the old translations are ordered to the scope of the membar.
|
||||
void uvm_tlb_batch_invalidate(uvm_tlb_batch_t *batch, NvU64 start, NvU64 size, NvU32 page_sizes, uvm_membar_t tlb_membar);
|
||||
void uvm_tlb_batch_invalidate(uvm_tlb_batch_t *batch, NvU64 start, NvU64 size, NvU64 page_sizes, uvm_membar_t tlb_membar);
|
||||
|
||||
// End a TLB invalidate batch
|
||||
//
|
||||
@ -97,8 +97,12 @@ void uvm_tlb_batch_end(uvm_tlb_batch_t *batch, uvm_push_t *push, uvm_membar_t tl
|
||||
// Helper for invalidating a single range immediately.
|
||||
//
|
||||
// Internally begins and ends a TLB batch.
|
||||
static void uvm_tlb_batch_single_invalidate(uvm_page_tree_t *tree, uvm_push_t *push,
|
||||
NvU64 start, NvU64 size, NvU32 page_sizes, uvm_membar_t tlb_membar)
|
||||
static void uvm_tlb_batch_single_invalidate(uvm_page_tree_t *tree,
|
||||
uvm_push_t *push,
|
||||
NvU64 start,
|
||||
NvU64 size,
|
||||
NvU64 page_sizes,
|
||||
uvm_membar_t tlb_membar)
|
||||
{
|
||||
uvm_tlb_batch_t batch;
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2016-2023 NVIDIA Corporation
|
||||
Copyright (c) 2016-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -57,20 +57,12 @@ typedef struct
|
||||
struct list_head queue_nodes[UvmEventNumTypesAll];
|
||||
|
||||
struct page **queue_buffer_pages;
|
||||
union
|
||||
{
|
||||
UvmEventEntry_V1 *queue_v1;
|
||||
UvmEventEntry_V2 *queue_v2;
|
||||
};
|
||||
void *queue_buffer;
|
||||
NvU32 queue_buffer_count;
|
||||
NvU32 notification_threshold;
|
||||
|
||||
struct page **control_buffer_pages;
|
||||
union
|
||||
{
|
||||
UvmToolsEventControlData_V1 *control_v1;
|
||||
UvmToolsEventControlData_V2 *control_v2;
|
||||
};
|
||||
UvmToolsEventControlData *control;
|
||||
|
||||
wait_queue_head_t wait_queue;
|
||||
bool is_wakeup_get_valid;
|
||||
@ -398,16 +390,12 @@ static void destroy_event_tracker(uvm_tools_event_tracker_t *event_tracker)
|
||||
|
||||
if (event_tracker->is_queue) {
|
||||
uvm_tools_queue_t *queue = &event_tracker->queue;
|
||||
NvU64 buffer_size, control_size;
|
||||
NvU64 buffer_size;
|
||||
|
||||
if (event_tracker->version == UvmToolsEventQueueVersion_V1) {
|
||||
if (event_tracker->version == UvmToolsEventQueueVersion_V1)
|
||||
buffer_size = queue->queue_buffer_count * sizeof(UvmEventEntry_V1);
|
||||
control_size = sizeof(UvmToolsEventControlData_V1);
|
||||
}
|
||||
else {
|
||||
else
|
||||
buffer_size = queue->queue_buffer_count * sizeof(UvmEventEntry_V2);
|
||||
control_size = sizeof(UvmToolsEventControlData_V2);
|
||||
}
|
||||
|
||||
remove_event_tracker(va_space,
|
||||
queue->queue_nodes,
|
||||
@ -415,16 +403,16 @@ static void destroy_event_tracker(uvm_tools_event_tracker_t *event_tracker)
|
||||
queue->subscribed_queues,
|
||||
&queue->subscribed_queues);
|
||||
|
||||
if (queue->queue_v2 != NULL) {
|
||||
if (queue->queue_buffer != NULL) {
|
||||
unmap_user_pages(queue->queue_buffer_pages,
|
||||
queue->queue_v2,
|
||||
queue->queue_buffer,
|
||||
buffer_size);
|
||||
}
|
||||
|
||||
if (queue->control_v2 != NULL) {
|
||||
if (queue->control != NULL) {
|
||||
unmap_user_pages(queue->control_buffer_pages,
|
||||
queue->control_v2,
|
||||
control_size);
|
||||
queue->control,
|
||||
sizeof(UvmToolsEventControlData));
|
||||
}
|
||||
}
|
||||
else {
|
||||
@ -456,9 +444,9 @@ static void destroy_event_tracker(uvm_tools_event_tracker_t *event_tracker)
|
||||
kmem_cache_free(g_tools_event_tracker_cache, event_tracker);
|
||||
}
|
||||
|
||||
static void enqueue_event_v1(const UvmEventEntry_V1 *entry, uvm_tools_queue_t *queue)
|
||||
static void enqueue_event(const void *entry, size_t entry_size, NvU8 eventType, uvm_tools_queue_t *queue)
|
||||
{
|
||||
UvmToolsEventControlData_V1 *ctrl = queue->control_v1;
|
||||
UvmToolsEventControlData *ctrl = queue->control;
|
||||
uvm_tools_queue_snapshot_t sn;
|
||||
NvU32 queue_size = queue->queue_buffer_count;
|
||||
NvU32 queue_mask = queue_size - 1;
|
||||
@ -481,11 +469,11 @@ static void enqueue_event_v1(const UvmEventEntry_V1 *entry, uvm_tools_queue_t *q
|
||||
|
||||
// one free element means that the queue is full
|
||||
if (((queue_size + sn.get_behind - sn.put_behind) & queue_mask) == 1) {
|
||||
atomic64_inc((atomic64_t *)&ctrl->dropped + entry->eventData.eventType);
|
||||
atomic64_inc((atomic64_t *)&ctrl->dropped + eventType);
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
memcpy(queue->queue_v1 + sn.put_behind, entry, sizeof(*entry));
|
||||
memcpy((char *)queue->queue_buffer + sn.put_behind * entry_size, entry, entry_size);
|
||||
|
||||
sn.put_behind = sn.put_ahead;
|
||||
|
||||
@ -509,79 +497,45 @@ unlock:
|
||||
uvm_spin_unlock(&queue->lock);
|
||||
}
|
||||
|
||||
static void enqueue_event_v1(const UvmEventEntry_V1 *entry, uvm_tools_queue_t *queue)
|
||||
{
|
||||
enqueue_event(entry, sizeof(*entry), entry->eventData.eventType, queue);
|
||||
}
|
||||
|
||||
static void enqueue_event_v2(const UvmEventEntry_V2 *entry, uvm_tools_queue_t *queue)
|
||||
{
|
||||
UvmToolsEventControlData_V2 *ctrl = queue->control_v2;
|
||||
uvm_tools_queue_snapshot_t sn;
|
||||
NvU32 queue_size = queue->queue_buffer_count;
|
||||
NvU32 queue_mask = queue_size - 1;
|
||||
enqueue_event(entry, sizeof(*entry), entry->eventData.eventType, queue);
|
||||
}
|
||||
|
||||
// Prevent processor speculation prior to accessing user-mapped memory to
|
||||
// avoid leaking information from side-channel attacks. There are many
|
||||
// possible paths leading to this point and it would be difficult and error-
|
||||
// prone to audit all of them to determine whether user mode could guide
|
||||
// this access to kernel memory under speculative execution, so to be on the
|
||||
// safe side we'll just always block speculation.
|
||||
nv_speculation_barrier();
|
||||
static void uvm_tools_record_event(struct list_head *head,
|
||||
const void *entry,
|
||||
size_t entry_size,
|
||||
NvU8 eventType)
|
||||
{
|
||||
uvm_tools_queue_t *queue;
|
||||
|
||||
uvm_spin_lock(&queue->lock);
|
||||
UVM_ASSERT(eventType < UvmEventNumTypesAll);
|
||||
|
||||
// ctrl is mapped into user space with read and write permissions,
|
||||
// so its values cannot be trusted.
|
||||
sn.get_behind = atomic_read((atomic_t *)&ctrl->get_behind) & queue_mask;
|
||||
sn.put_behind = atomic_read((atomic_t *)&ctrl->put_behind) & queue_mask;
|
||||
sn.put_ahead = (sn.put_behind + 1) & queue_mask;
|
||||
|
||||
// one free element means that the queue is full
|
||||
if (((queue_size + sn.get_behind - sn.put_behind) & queue_mask) == 1) {
|
||||
atomic64_inc((atomic64_t *)&ctrl->dropped + entry->eventData.eventType);
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
memcpy(queue->queue_v2 + sn.put_behind, entry, sizeof(*entry));
|
||||
|
||||
sn.put_behind = sn.put_ahead;
|
||||
// put_ahead and put_behind will always be the same outside of queue->lock
|
||||
// this allows the user-space consumer to choose either a 2 or 4 pointer synchronization approach
|
||||
atomic_set((atomic_t *)&ctrl->put_ahead, sn.put_behind);
|
||||
atomic_set((atomic_t *)&ctrl->put_behind, sn.put_behind);
|
||||
|
||||
sn.get_ahead = atomic_read((atomic_t *)&ctrl->get_ahead);
|
||||
// if the queue needs to be woken up, only signal if we haven't signaled before for this value of get_ahead
|
||||
if (queue_needs_wakeup(queue, &sn) && !(queue->is_wakeup_get_valid && queue->wakeup_get == sn.get_ahead)) {
|
||||
queue->is_wakeup_get_valid = true;
|
||||
queue->wakeup_get = sn.get_ahead;
|
||||
wake_up_all(&queue->wait_queue);
|
||||
}
|
||||
|
||||
unlock:
|
||||
uvm_spin_unlock(&queue->lock);
|
||||
list_for_each_entry(queue, head + eventType, queue_nodes[eventType])
|
||||
enqueue_event(entry, entry_size, eventType, queue);
|
||||
}
|
||||
|
||||
static void uvm_tools_record_event_v1(uvm_va_space_t *va_space, const UvmEventEntry_V1 *entry)
|
||||
{
|
||||
NvU8 eventType = entry->eventData.eventType;
|
||||
uvm_tools_queue_t *queue;
|
||||
|
||||
UVM_ASSERT(eventType < UvmEventNumTypesAll);
|
||||
|
||||
uvm_assert_rwsem_locked(&va_space->tools.lock);
|
||||
|
||||
list_for_each_entry(queue, va_space->tools.queues_v1 + eventType, queue_nodes[eventType])
|
||||
enqueue_event_v1(entry, queue);
|
||||
uvm_tools_record_event(va_space->tools.queues_v1, entry, sizeof(*entry), eventType);
|
||||
}
|
||||
|
||||
static void uvm_tools_record_event_v2(uvm_va_space_t *va_space, const UvmEventEntry_V2 *entry)
|
||||
{
|
||||
NvU8 eventType = entry->eventData.eventType;
|
||||
uvm_tools_queue_t *queue;
|
||||
|
||||
UVM_ASSERT(eventType < UvmEventNumTypesAll);
|
||||
|
||||
uvm_assert_rwsem_locked(&va_space->tools.lock);
|
||||
|
||||
list_for_each_entry(queue, va_space->tools.queues_v2 + eventType, queue_nodes[eventType])
|
||||
enqueue_event_v2(entry, queue);
|
||||
uvm_tools_record_event(va_space->tools.queues_v2, entry, sizeof(*entry), eventType);
|
||||
}
|
||||
|
||||
static bool counter_matches_processor(UvmCounterName counter, const NvProcessorUuid *processor)
|
||||
@ -751,7 +705,7 @@ static unsigned uvm_tools_poll(struct file *filp, poll_table *wait)
|
||||
int flags = 0;
|
||||
uvm_tools_queue_snapshot_t sn;
|
||||
uvm_tools_event_tracker_t *event_tracker;
|
||||
UvmToolsEventControlData_V2 *ctrl;
|
||||
UvmToolsEventControlData *ctrl;
|
||||
|
||||
if (uvm_global_get_status() != NV_OK)
|
||||
return POLLERR;
|
||||
@ -763,7 +717,7 @@ static unsigned uvm_tools_poll(struct file *filp, poll_table *wait)
|
||||
uvm_spin_lock(&event_tracker->queue.lock);
|
||||
|
||||
event_tracker->queue.is_wakeup_get_valid = false;
|
||||
ctrl = event_tracker->queue.control_v2;
|
||||
ctrl = event_tracker->queue.control;
|
||||
sn.get_ahead = atomic_read((atomic_t *)&ctrl->get_ahead);
|
||||
sn.put_behind = atomic_read((atomic_t *)&ctrl->put_behind);
|
||||
|
||||
@ -878,6 +832,24 @@ static void record_gpu_fault_instance(uvm_gpu_t *gpu,
|
||||
}
|
||||
}
|
||||
|
||||
static void record_cpu_fault(UvmEventCpuFaultInfo *info, uvm_perf_event_data_t *event_data)
|
||||
{
|
||||
info->eventType = UvmEventTypeCpuFault;
|
||||
if (event_data->fault.cpu.is_write)
|
||||
info->accessType = UvmEventMemoryAccessTypeWrite;
|
||||
else
|
||||
info->accessType = UvmEventMemoryAccessTypeRead;
|
||||
|
||||
info->address = event_data->fault.cpu.fault_va;
|
||||
info->timeStamp = NV_GETTIME();
|
||||
// assume that current owns va_space
|
||||
info->pid = uvm_get_stale_process_id();
|
||||
info->threadId = uvm_get_stale_thread_id();
|
||||
info->pc = event_data->fault.cpu.pc;
|
||||
// TODO: Bug 4515381: set info->nid when we decide if it's NUMA node ID or
|
||||
// CPU ID.
|
||||
}
|
||||
|
||||
static void uvm_tools_record_fault(uvm_perf_event_t event_id, uvm_perf_event_data_t *event_data)
|
||||
{
|
||||
uvm_va_space_t *va_space = event_data->fault.space;
|
||||
@ -895,41 +867,17 @@ static void uvm_tools_record_fault(uvm_perf_event_t event_id, uvm_perf_event_dat
|
||||
if (UVM_ID_IS_CPU(event_data->fault.proc_id)) {
|
||||
if (tools_is_event_enabled_version(va_space, UvmEventTypeCpuFault, UvmToolsEventQueueVersion_V1)) {
|
||||
UvmEventEntry_V1 entry;
|
||||
UvmEventCpuFaultInfo_V1 *info = &entry.eventData.cpuFault;
|
||||
memset(&entry, 0, sizeof(entry));
|
||||
|
||||
info->eventType = UvmEventTypeCpuFault;
|
||||
if (event_data->fault.cpu.is_write)
|
||||
info->accessType = UvmEventMemoryAccessTypeWrite;
|
||||
else
|
||||
info->accessType = UvmEventMemoryAccessTypeRead;
|
||||
|
||||
info->address = event_data->fault.cpu.fault_va;
|
||||
info->timeStamp = NV_GETTIME();
|
||||
// assume that current owns va_space
|
||||
info->pid = uvm_get_stale_process_id();
|
||||
info->threadId = uvm_get_stale_thread_id();
|
||||
info->pc = event_data->fault.cpu.pc;
|
||||
record_cpu_fault(&entry.eventData.cpuFault, event_data);
|
||||
|
||||
uvm_tools_record_event_v1(va_space, &entry);
|
||||
}
|
||||
if (tools_is_event_enabled_version(va_space, UvmEventTypeCpuFault, UvmToolsEventQueueVersion_V2)) {
|
||||
UvmEventEntry_V2 entry;
|
||||
UvmEventCpuFaultInfo_V2 *info = &entry.eventData.cpuFault;
|
||||
memset(&entry, 0, sizeof(entry));
|
||||
|
||||
info->eventType = UvmEventTypeCpuFault;
|
||||
if (event_data->fault.cpu.is_write)
|
||||
info->accessType = UvmEventMemoryAccessTypeWrite;
|
||||
else
|
||||
info->accessType = UvmEventMemoryAccessTypeRead;
|
||||
|
||||
info->address = event_data->fault.cpu.fault_va;
|
||||
info->timeStamp = NV_GETTIME();
|
||||
// assume that current owns va_space
|
||||
info->pid = uvm_get_stale_process_id();
|
||||
info->threadId = uvm_get_stale_thread_id();
|
||||
info->pc = event_data->fault.cpu.pc;
|
||||
record_cpu_fault(&entry.eventData.cpuFault, event_data);
|
||||
|
||||
uvm_tools_record_event_v2(va_space, &entry);
|
||||
}
|
||||
@ -1834,7 +1782,7 @@ void uvm_tools_record_thrashing(uvm_va_space_t *va_space,
|
||||
info->size = region_size;
|
||||
info->timeStamp = NV_GETTIME();
|
||||
|
||||
BUILD_BUG_ON(UVM_MAX_PROCESSORS_V2 < UVM_ID_MAX_PROCESSORS);
|
||||
BUILD_BUG_ON(UVM_MAX_PROCESSORS < UVM_ID_MAX_PROCESSORS);
|
||||
bitmap_copy((long unsigned *)&info->processors, processors->bitmap, UVM_ID_MAX_PROCESSORS);
|
||||
|
||||
uvm_tools_record_event_v2(va_space, &entry);
|
||||
@ -2151,7 +2099,7 @@ NV_STATUS uvm_api_tools_init_event_tracker(UVM_TOOLS_INIT_EVENT_TRACKER_PARAMS *
|
||||
event_tracker->is_queue = params->queueBufferSize != 0;
|
||||
if (event_tracker->is_queue) {
|
||||
uvm_tools_queue_t *queue = &event_tracker->queue;
|
||||
NvU64 buffer_size, control_size;
|
||||
NvU64 buffer_size;
|
||||
|
||||
uvm_spin_lock_init(&queue->lock, UVM_LOCK_ORDER_LEAF);
|
||||
init_waitqueue_head(&queue->wait_queue);
|
||||
@ -2170,25 +2118,21 @@ NV_STATUS uvm_api_tools_init_event_tracker(UVM_TOOLS_INIT_EVENT_TRACKER_PARAMS *
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if (event_tracker->version == UvmToolsEventQueueVersion_V1) {
|
||||
if (event_tracker->version == UvmToolsEventQueueVersion_V1)
|
||||
buffer_size = queue->queue_buffer_count * sizeof(UvmEventEntry_V1);
|
||||
control_size = sizeof(UvmToolsEventControlData_V1);
|
||||
}
|
||||
else {
|
||||
else
|
||||
buffer_size = queue->queue_buffer_count * sizeof(UvmEventEntry_V2);
|
||||
control_size = sizeof(UvmToolsEventControlData_V2);
|
||||
}
|
||||
|
||||
status = map_user_pages(params->queueBuffer,
|
||||
buffer_size,
|
||||
(void **)&queue->queue_v2,
|
||||
&queue->queue_buffer,
|
||||
&queue->queue_buffer_pages);
|
||||
if (status != NV_OK)
|
||||
goto fail;
|
||||
|
||||
status = map_user_pages(params->controlBuffer,
|
||||
control_size,
|
||||
(void **)&queue->control_v2,
|
||||
sizeof(UvmToolsEventControlData),
|
||||
(void **)&queue->control,
|
||||
&queue->control_buffer_pages);
|
||||
|
||||
if (status != NV_OK)
|
||||
@ -2224,6 +2168,7 @@ NV_STATUS uvm_api_tools_set_notification_threshold(UVM_TOOLS_SET_NOTIFICATION_TH
|
||||
{
|
||||
uvm_tools_queue_snapshot_t sn;
|
||||
uvm_tools_event_tracker_t *event_tracker = tools_event_tracker(filp);
|
||||
UvmToolsEventControlData *ctrl;
|
||||
|
||||
if (!tracker_is_queue(event_tracker))
|
||||
return NV_ERR_INVALID_ARGUMENT;
|
||||
@ -2232,18 +2177,9 @@ NV_STATUS uvm_api_tools_set_notification_threshold(UVM_TOOLS_SET_NOTIFICATION_TH
|
||||
|
||||
event_tracker->queue.notification_threshold = params->notificationThreshold;
|
||||
|
||||
if (event_tracker->version == UvmToolsEventQueueVersion_V1) {
|
||||
UvmToolsEventControlData_V1 *ctrl = event_tracker->queue.control_v1;
|
||||
|
||||
sn.put_behind = atomic_read((atomic_t *)&ctrl->put_behind);
|
||||
sn.get_ahead = atomic_read((atomic_t *)&ctrl->get_ahead);
|
||||
}
|
||||
else {
|
||||
UvmToolsEventControlData_V2 *ctrl = event_tracker->queue.control_v2;
|
||||
|
||||
sn.put_behind = atomic_read((atomic_t *)&ctrl->put_behind);
|
||||
sn.get_ahead = atomic_read((atomic_t *)&ctrl->get_ahead);
|
||||
}
|
||||
ctrl = event_tracker->queue.control;
|
||||
sn.put_behind = atomic_read((atomic_t *)&ctrl->put_behind);
|
||||
sn.get_ahead = atomic_read((atomic_t *)&ctrl->get_ahead);
|
||||
|
||||
if (queue_needs_wakeup(&event_tracker->queue, &sn))
|
||||
wake_up_all(&event_tracker->queue.wait_queue);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2017-2021 NVIDIA Corporation
|
||||
Copyright (c) 2017-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -104,3 +104,248 @@ void uvm_hal_turing_host_set_gpfifo_entry(NvU64 *fifo_entry,
|
||||
*fifo_entry = fifo_entry_value;
|
||||
}
|
||||
|
||||
void uvm_hal_turing_host_tlb_invalidate_all(uvm_push_t *push,
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
NvU32 depth,
|
||||
uvm_membar_t membar)
|
||||
{
|
||||
NvU32 aperture_value;
|
||||
NvU32 page_table_level;
|
||||
NvU32 pdb_lo;
|
||||
NvU32 pdb_hi;
|
||||
NvU32 ack_value = 0;
|
||||
NvU32 sysmembar_value = 0;
|
||||
|
||||
UVM_ASSERT_MSG(pdb.aperture == UVM_APERTURE_VID || pdb.aperture == UVM_APERTURE_SYS, "aperture: %u", pdb.aperture);
|
||||
|
||||
if (pdb.aperture == UVM_APERTURE_VID)
|
||||
aperture_value = HWCONST(C46F, MEM_OP_C, TLB_INVALIDATE_PDB_APERTURE, VID_MEM);
|
||||
else
|
||||
aperture_value = HWCONST(C46F, MEM_OP_C, TLB_INVALIDATE_PDB_APERTURE, SYS_MEM_COHERENT);
|
||||
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(pdb.address, 1 << 12), "pdb 0x%llx\n", pdb.address);
|
||||
pdb.address >>= 12;
|
||||
|
||||
pdb_lo = pdb.address & HWMASK(C46F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO);
|
||||
pdb_hi = pdb.address >> HWSIZE(C46F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO);
|
||||
|
||||
// PDE3 is the highest level on Pascal-Turing, see the comment in
|
||||
// uvm_pascal_mmu.c for details.
|
||||
UVM_ASSERT_MSG(depth < NVC46F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE3, "depth %u", depth);
|
||||
page_table_level = NVC46F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE3 - depth;
|
||||
|
||||
if (membar != UVM_MEMBAR_NONE) {
|
||||
// If a GPU or SYS membar is needed, ACK_TYPE needs to be set to
|
||||
// GLOBALLY to make sure all the pending accesses can be picked up by
|
||||
// the membar.
|
||||
ack_value = HWCONST(C46F, MEM_OP_C, TLB_INVALIDATE_ACK_TYPE, GLOBALLY);
|
||||
}
|
||||
|
||||
if (membar == UVM_MEMBAR_SYS)
|
||||
sysmembar_value = HWCONST(C46F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, EN);
|
||||
else
|
||||
sysmembar_value = HWCONST(C46F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS);
|
||||
|
||||
NV_PUSH_4U(C46F, MEM_OP_A, sysmembar_value,
|
||||
MEM_OP_B, 0,
|
||||
MEM_OP_C, HWCONST(C46F, MEM_OP_C, TLB_INVALIDATE_PDB, ONE) |
|
||||
HWVALUE(C46F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO, pdb_lo) |
|
||||
HWCONST(C46F, MEM_OP_C, TLB_INVALIDATE_GPC, ENABLE) |
|
||||
HWCONST(C46F, MEM_OP_C, TLB_INVALIDATE_REPLAY, NONE) |
|
||||
HWVALUE(C46F, MEM_OP_C, TLB_INVALIDATE_PAGE_TABLE_LEVEL, page_table_level) |
|
||||
aperture_value |
|
||||
ack_value,
|
||||
MEM_OP_D, HWCONST(C46F, MEM_OP_D, OPERATION, MMU_TLB_INVALIDATE) |
|
||||
HWVALUE(C46F, MEM_OP_D, TLB_INVALIDATE_PDB_ADDR_HI, pdb_hi));
|
||||
|
||||
// GPU membar still requires an explicit membar method.
|
||||
if (membar == UVM_MEMBAR_GPU)
|
||||
uvm_push_get_gpu(push)->parent->host_hal->membar_gpu(push);
|
||||
}
|
||||
|
||||
void uvm_hal_turing_host_tlb_invalidate_va(uvm_push_t *push,
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
NvU32 depth,
|
||||
NvU64 base,
|
||||
NvU64 size,
|
||||
NvU64 page_size,
|
||||
uvm_membar_t membar)
|
||||
{
|
||||
NvU32 aperture_value;
|
||||
NvU32 page_table_level;
|
||||
NvU32 pdb_lo;
|
||||
NvU32 pdb_hi;
|
||||
NvU32 ack_value = 0;
|
||||
NvU32 sysmembar_value = 0;
|
||||
NvU32 va_lo;
|
||||
NvU32 va_hi;
|
||||
NvU64 end;
|
||||
NvU64 actual_base;
|
||||
NvU64 actual_size;
|
||||
NvU64 actual_end;
|
||||
NvU32 log2_invalidation_size;
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||||
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(page_size, 1 << 12), "page_size 0x%llx\n", page_size);
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(base, page_size), "base 0x%llx page_size 0x%llx\n", base, page_size);
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(size, page_size), "size 0x%llx page_size 0x%llx\n", size, page_size);
|
||||
UVM_ASSERT_MSG(size > 0, "size 0x%llx\n", size);
|
||||
|
||||
// The invalidation size must be a power-of-two number of pages containing
|
||||
// the passed interval
|
||||
end = base + size - 1;
|
||||
log2_invalidation_size = __fls((unsigned long)(end ^ base)) + 1;
|
||||
|
||||
if (log2_invalidation_size == 64) {
|
||||
// Invalidate everything
|
||||
gpu->parent->host_hal->tlb_invalidate_all(push, pdb, depth, membar);
|
||||
return;
|
||||
}
|
||||
|
||||
// The hardware aligns the target address down to the invalidation size.
|
||||
actual_size = 1ULL << log2_invalidation_size;
|
||||
actual_base = UVM_ALIGN_DOWN(base, actual_size);
|
||||
actual_end = actual_base + actual_size - 1;
|
||||
UVM_ASSERT(actual_end >= end);
|
||||
|
||||
// The invalidation size field expects log2(invalidation size in 4K), not
|
||||
// log2(invalidation size in bytes)
|
||||
log2_invalidation_size -= 12;
|
||||
|
||||
// Address to invalidate, as a multiple of 4K.
|
||||
base >>= 12;
|
||||
va_lo = base & HWMASK(C46F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO);
|
||||
va_hi = base >> HWSIZE(C46F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO);
|
||||
|
||||
UVM_ASSERT_MSG(pdb.aperture == UVM_APERTURE_VID || pdb.aperture == UVM_APERTURE_SYS, "aperture: %u", pdb.aperture);
|
||||
|
||||
if (pdb.aperture == UVM_APERTURE_VID)
|
||||
aperture_value = HWCONST(C46F, MEM_OP_C, TLB_INVALIDATE_PDB_APERTURE, VID_MEM);
|
||||
else
|
||||
aperture_value = HWCONST(C46F, MEM_OP_C, TLB_INVALIDATE_PDB_APERTURE, SYS_MEM_COHERENT);
|
||||
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(pdb.address, 1 << 12), "pdb 0x%llx\n", pdb.address);
|
||||
pdb.address >>= 12;
|
||||
|
||||
pdb_lo = pdb.address & HWMASK(C46F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO);
|
||||
pdb_hi = pdb.address >> HWSIZE(C46F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO);
|
||||
|
||||
// PDE3 is the highest level on Pascal-Turing, see the comment in
|
||||
// uvm_pascal_mmu.c for details.
|
||||
UVM_ASSERT_MSG(depth < NVC46F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE3, "depth %u", depth);
|
||||
page_table_level = NVC46F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE3 - depth;
|
||||
|
||||
if (membar != UVM_MEMBAR_NONE) {
|
||||
// If a GPU or SYS membar is needed, ACK_TYPE needs to be set to
|
||||
// GLOBALLY to make sure all the pending accesses can be picked up by
|
||||
// the membar.
|
||||
ack_value = HWCONST(C46F, MEM_OP_C, TLB_INVALIDATE_ACK_TYPE, GLOBALLY);
|
||||
}
|
||||
|
||||
if (membar == UVM_MEMBAR_SYS)
|
||||
sysmembar_value = HWCONST(C46F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, EN);
|
||||
else
|
||||
sysmembar_value = HWCONST(C46F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS);
|
||||
|
||||
NV_PUSH_4U(C46F, MEM_OP_A, HWVALUE(C46F, MEM_OP_A, TLB_INVALIDATE_INVALIDATION_SIZE, log2_invalidation_size) |
|
||||
sysmembar_value |
|
||||
HWVALUE(C46F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO, va_lo),
|
||||
MEM_OP_B, HWVALUE(C46F, MEM_OP_B, TLB_INVALIDATE_TARGET_ADDR_HI, va_hi),
|
||||
MEM_OP_C, HWCONST(C46F, MEM_OP_C, TLB_INVALIDATE_PDB, ONE) |
|
||||
HWVALUE(C46F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO, pdb_lo) |
|
||||
HWCONST(C46F, MEM_OP_C, TLB_INVALIDATE_GPC, ENABLE) |
|
||||
HWCONST(C46F, MEM_OP_C, TLB_INVALIDATE_REPLAY, NONE) |
|
||||
HWVALUE(C46F, MEM_OP_C, TLB_INVALIDATE_PAGE_TABLE_LEVEL, page_table_level) |
|
||||
aperture_value |
|
||||
ack_value,
|
||||
MEM_OP_D, HWCONST(C46F, MEM_OP_D, OPERATION, MMU_TLB_INVALIDATE_TARGETED) |
|
||||
HWVALUE(C46F, MEM_OP_D, TLB_INVALIDATE_PDB_ADDR_HI, pdb_hi));
|
||||
|
||||
// GPU membar still requires an explicit membar method.
|
||||
if (membar == UVM_MEMBAR_GPU)
|
||||
gpu->parent->host_hal->membar_gpu(push);
|
||||
}
|
||||
|
||||
void uvm_hal_turing_host_tlb_invalidate_test(uvm_push_t *push,
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
UVM_TEST_INVALIDATE_TLB_PARAMS *params)
|
||||
{
|
||||
NvU32 ack_value = 0;
|
||||
NvU32 sysmembar_value = 0;
|
||||
NvU32 invalidate_gpc_value = 0;
|
||||
NvU32 aperture_value = 0;
|
||||
NvU32 pdb_lo = 0;
|
||||
NvU32 pdb_hi = 0;
|
||||
NvU32 page_table_level = 0;
|
||||
|
||||
UVM_ASSERT_MSG(pdb.aperture == UVM_APERTURE_VID || pdb.aperture == UVM_APERTURE_SYS, "aperture: %u", pdb.aperture);
|
||||
if (pdb.aperture == UVM_APERTURE_VID)
|
||||
aperture_value = HWCONST(C46F, MEM_OP_C, TLB_INVALIDATE_PDB_APERTURE, VID_MEM);
|
||||
else
|
||||
aperture_value = HWCONST(C46F, MEM_OP_C, TLB_INVALIDATE_PDB_APERTURE, SYS_MEM_COHERENT);
|
||||
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(pdb.address, 1 << 12), "pdb 0x%llx\n", pdb.address);
|
||||
pdb.address >>= 12;
|
||||
|
||||
pdb_lo = pdb.address & HWMASK(C46F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO);
|
||||
pdb_hi = pdb.address >> HWSIZE(C46F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO);
|
||||
|
||||
if (params->page_table_level != UvmInvalidatePageTableLevelAll) {
|
||||
// PDE3 is the highest level on Pascal-Turing, see the comment in
|
||||
// uvm_pascal_mmu.c for details.
|
||||
page_table_level = min((NvU32)UvmInvalidatePageTableLevelPde3, params->page_table_level) - 1;
|
||||
}
|
||||
|
||||
if (params->membar != UvmInvalidateTlbMemBarNone) {
|
||||
// If a GPU or SYS membar is needed, ack_value needs to be set to
|
||||
// GLOBALLY to make sure all the pending accesses can be picked up by
|
||||
// the membar.
|
||||
ack_value = HWCONST(C46F, MEM_OP_C, TLB_INVALIDATE_ACK_TYPE, GLOBALLY);
|
||||
}
|
||||
|
||||
if (params->membar == UvmInvalidateTlbMemBarSys)
|
||||
sysmembar_value = HWCONST(C46F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, EN);
|
||||
else
|
||||
sysmembar_value = HWCONST(C46F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS);
|
||||
|
||||
if (params->disable_gpc_invalidate)
|
||||
invalidate_gpc_value = HWCONST(C46F, MEM_OP_C, TLB_INVALIDATE_GPC, DISABLE);
|
||||
else
|
||||
invalidate_gpc_value = HWCONST(C46F, MEM_OP_C, TLB_INVALIDATE_GPC, ENABLE);
|
||||
|
||||
if (params->target_va_mode == UvmTargetVaModeTargeted) {
|
||||
NvU64 va = params->va >> 12;
|
||||
|
||||
NvU32 va_lo = va & HWMASK(C46F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO);
|
||||
NvU32 va_hi = va >> HWSIZE(C46F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO);
|
||||
NV_PUSH_4U(C46F, MEM_OP_A, sysmembar_value |
|
||||
HWVALUE(C46F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO, va_lo),
|
||||
MEM_OP_B, HWVALUE(C46F, MEM_OP_B, TLB_INVALIDATE_TARGET_ADDR_HI, va_hi),
|
||||
MEM_OP_C, HWCONST(C46F, MEM_OP_C, TLB_INVALIDATE_REPLAY, NONE) |
|
||||
HWVALUE(C46F, MEM_OP_C, TLB_INVALIDATE_PAGE_TABLE_LEVEL, page_table_level) |
|
||||
HWCONST(C46F, MEM_OP_C, TLB_INVALIDATE_PDB, ONE) |
|
||||
HWVALUE(C46F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO, pdb_lo) |
|
||||
invalidate_gpc_value |
|
||||
aperture_value |
|
||||
ack_value,
|
||||
MEM_OP_D, HWCONST(C46F, MEM_OP_D, OPERATION, MMU_TLB_INVALIDATE_TARGETED) |
|
||||
HWVALUE(C46F, MEM_OP_D, TLB_INVALIDATE_PDB_ADDR_HI, pdb_hi));
|
||||
}
|
||||
else {
|
||||
NV_PUSH_4U(C46F, MEM_OP_A, sysmembar_value,
|
||||
MEM_OP_B, 0,
|
||||
MEM_OP_C, HWCONST(C46F, MEM_OP_C, TLB_INVALIDATE_REPLAY, NONE) |
|
||||
HWVALUE(C46F, MEM_OP_C, TLB_INVALIDATE_PAGE_TABLE_LEVEL, page_table_level) |
|
||||
HWCONST(C46F, MEM_OP_C, TLB_INVALIDATE_PDB, ONE) |
|
||||
HWVALUE(C46F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO, pdb_lo) |
|
||||
invalidate_gpc_value |
|
||||
aperture_value |
|
||||
ack_value,
|
||||
MEM_OP_D, HWCONST(C46F, MEM_OP_D, OPERATION, MMU_TLB_INVALIDATE) |
|
||||
HWVALUE(C46F, MEM_OP_D, TLB_INVALIDATE_PDB_ADDR_HI, pdb_hi));
|
||||
}
|
||||
|
||||
// GPU membar still requires an explicit membar method.
|
||||
if (params->membar == UvmInvalidateTlbMemBarLocal)
|
||||
uvm_push_get_gpu(push)->parent->host_hal->membar_gpu(push);
|
||||
}
|
||||
|
@ -138,7 +138,7 @@ static NvU64 poisoned_pte_turing(void)
|
||||
|
||||
static uvm_mmu_mode_hal_t turing_mmu_mode_hal;
|
||||
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_turing(NvU32 big_page_size)
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_turing(NvU64 big_page_size)
|
||||
{
|
||||
static bool initialized = false;
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2013-2023 NVidia Corporation
|
||||
Copyright (c) 2013-2024 NVidia Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -52,19 +52,18 @@ typedef enum
|
||||
|
||||
typedef unsigned long long UvmStream;
|
||||
|
||||
// The maximum number of sub-processors per parent GPU.
|
||||
#define UVM_PARENT_ID_MAX_SUB_PROCESSORS 8
|
||||
|
||||
// The maximum number of GPUs changed when multiple MIG instances per
|
||||
// uvm_parent_gpu_t were added. See UvmEventQueueCreate().
|
||||
// uvm_parent_gpu_t were added. The old version is kept as a convenience
|
||||
// for code that needs to maintain forward compatibility.
|
||||
#define UVM_MAX_GPUS_V1 NV_MAX_DEVICES
|
||||
#define UVM_MAX_PROCESSORS_V1 (UVM_MAX_GPUS_V1 + 1)
|
||||
#define UVM_MAX_GPUS_V2 (NV_MAX_DEVICES * NV_MAX_SUBDEVICES)
|
||||
#define UVM_MAX_PROCESSORS_V2 (UVM_MAX_GPUS_V2 + 1)
|
||||
#define UVM_MAX_GPUS (NV_MAX_DEVICES * UVM_PARENT_ID_MAX_SUB_PROCESSORS)
|
||||
#define UVM_MAX_PROCESSORS (UVM_MAX_GPUS + 1)
|
||||
|
||||
// For backward compatibility:
|
||||
// TODO: Bug 4465348: remove these after replacing old references.
|
||||
#define UVM_MAX_GPUS UVM_MAX_GPUS_V1
|
||||
#define UVM_MAX_PROCESSORS UVM_MAX_PROCESSORS_V1
|
||||
|
||||
#define UVM_PROCESSOR_MASK_SIZE ((UVM_MAX_PROCESSORS_V2 + (sizeof(NvU64) * 8) - 1) / (sizeof(NvU64) * 8))
|
||||
#define UVM_PROCESSOR_MASK_SIZE ((UVM_MAX_PROCESSORS + (sizeof(NvU64) * 8) - 1) / (sizeof(NvU64) * 8))
|
||||
|
||||
#define UVM_INIT_FLAGS_DISABLE_HMM ((NvU64)0x1)
|
||||
#define UVM_INIT_FLAGS_MULTI_PROCESS_SHARING_MODE ((NvU64)0x2)
|
||||
@ -423,29 +422,7 @@ typedef struct
|
||||
NvU32 pid; // process id causing the fault
|
||||
NvU32 threadId; // thread id causing the fault
|
||||
NvU64 pc; // address of the instruction causing the fault
|
||||
} UvmEventCpuFaultInfo_V1;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
//
|
||||
// eventType has to be 1st argument of this structure. Setting eventType to
|
||||
// UvmEventTypeMemoryViolation helps to identify event data in a queue.
|
||||
//
|
||||
NvU8 eventType;
|
||||
NvU8 accessType; // read/write violation (UvmEventMemoryAccessType)
|
||||
//
|
||||
// This structure is shared between UVM kernel and tools.
|
||||
// Manually padding the structure so that compiler options like pragma pack
|
||||
// or malign-double will have no effect on the field offsets.
|
||||
//
|
||||
NvU16 padding16Bits;
|
||||
NvS32 nid; // NUMA node ID of faulting CPU
|
||||
NvU64 address; // faulting address
|
||||
NvU64 timeStamp; // cpu time when the fault occurred
|
||||
NvU32 pid; // process id causing the fault
|
||||
NvU32 threadId; // thread id causing the fault
|
||||
NvU64 pc; // address of the instruction causing the fault
|
||||
} UvmEventCpuFaultInfo_V2;
|
||||
} UvmEventCpuFaultInfo;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
@ -721,13 +698,7 @@ typedef struct
|
||||
//
|
||||
NvU8 eventType;
|
||||
NvU8 faultType; // type of gpu fault, refer UvmEventFaultType
|
||||
NvU8 accessType; // memory access type, refer UvmEventMemoryAccessType
|
||||
//
|
||||
// This structure is shared between UVM kernel and tools.
|
||||
// Manually padding the structure so that compiler options like pragma pack
|
||||
// or malign-double will have no effect on the field offsets
|
||||
//
|
||||
NvU8 padding8Bits_1;
|
||||
NvU16 gpuIndex; // GPU that experienced the fault
|
||||
union
|
||||
{
|
||||
NvU16 gpcId; // If this is a replayable fault, this field contains
|
||||
@ -759,14 +730,13 @@ typedef struct
|
||||
// UvmEventFaultClientTypeGpc indicates replayable
|
||||
// fault, while UvmEventFaultClientTypeHub indicates
|
||||
// non-replayable fault.
|
||||
|
||||
NvU8 accessType; // memory access type, refer UvmEventMemoryAccessType
|
||||
//
|
||||
// This structure is shared between UVM kernel and tools.
|
||||
// Manually padding the structure so that compiler options like pragma pack
|
||||
// or malign-double will have no effect on the field offsets
|
||||
//
|
||||
NvU8 padding8Bits_2;
|
||||
NvU16 gpuIndex; // GPU that experienced the fault
|
||||
NvU16 padding16bits;
|
||||
} UvmEventGpuFaultInfo_V2;
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
@ -1108,8 +1078,8 @@ typedef struct
|
||||
// or malign-double will have no effect on the field offsets
|
||||
//
|
||||
NvU8 padding8bits;
|
||||
NvU16 padding16bits[2];
|
||||
NvU16 processorIndex; // index of the cpu/gpu that was throttled
|
||||
NvU32 padding32bits;
|
||||
NvU64 address; // address of the page whose servicing is being
|
||||
// throttled
|
||||
NvU64 timeStamp; // cpu start time stamp for the throttling operation
|
||||
@ -1150,8 +1120,8 @@ typedef struct
|
||||
// or malign-double will have no effect on the field offsets
|
||||
//
|
||||
NvU8 padding8bits;
|
||||
NvU16 padding16bits[2];
|
||||
NvU16 processorIndex; // index of the cpu/gpu that was throttled
|
||||
NvU32 padding32bits;
|
||||
NvU64 address; // address of the page whose servicing is being
|
||||
// throttled
|
||||
NvU64 timeStamp; // cpu end time stamp for the throttling operation
|
||||
@ -1409,7 +1379,7 @@ typedef struct
|
||||
NvU8 eventType;
|
||||
UvmEventMigrationInfo_Lite migration_Lite;
|
||||
|
||||
UvmEventCpuFaultInfo_V1 cpuFault;
|
||||
UvmEventCpuFaultInfo cpuFault;
|
||||
UvmEventMigrationInfo_V1 migration;
|
||||
UvmEventGpuFaultInfo_V1 gpuFault;
|
||||
UvmEventGpuFaultReplayInfo_V1 gpuFaultReplay;
|
||||
@ -1443,7 +1413,7 @@ typedef struct
|
||||
NvU8 eventType;
|
||||
UvmEventMigrationInfo_Lite migration_Lite;
|
||||
|
||||
UvmEventCpuFaultInfo_V2 cpuFault;
|
||||
UvmEventCpuFaultInfo cpuFault;
|
||||
UvmEventMigrationInfo_V2 migration;
|
||||
UvmEventGpuFaultInfo_V2 gpuFault;
|
||||
UvmEventGpuFaultReplayInfo_V2 gpuFaultReplay;
|
||||
@ -1510,19 +1480,7 @@ typedef enum {
|
||||
UvmToolsEventQueueVersion_V2 = 2,
|
||||
} UvmToolsEventQueueVersion;
|
||||
|
||||
typedef struct UvmEventControlData_V1_tag {
|
||||
// entries between get_ahead and get_behind are currently being read
|
||||
volatile NvU32 get_ahead;
|
||||
volatile NvU32 get_behind;
|
||||
// entries between put_ahead and put_behind are currently being written
|
||||
volatile NvU32 put_ahead;
|
||||
volatile NvU32 put_behind;
|
||||
|
||||
// counter of dropped events
|
||||
NvU64 dropped[UvmEventNumTypesAll];
|
||||
} UvmToolsEventControlData_V1;
|
||||
|
||||
typedef struct UvmEventControlData_V2_tag {
|
||||
typedef struct UvmEventControlData_tag {
|
||||
// entries between get_ahead and get_behind are currently being read
|
||||
volatile NvU32 get_ahead;
|
||||
volatile NvU32 get_behind;
|
||||
@ -1531,19 +1489,12 @@ typedef struct UvmEventControlData_V2_tag {
|
||||
volatile NvU32 put_ahead;
|
||||
volatile NvU32 put_behind;
|
||||
|
||||
// The version values are limited to UvmToolsEventQueueVersion and
|
||||
// initialized by UvmToolsCreateEventQueue().
|
||||
NvU32 version;
|
||||
NvU32 padding32Bits;
|
||||
|
||||
// counter of dropped events
|
||||
NvU64 dropped[UvmEventNumTypesAll];
|
||||
} UvmToolsEventControlData_V2;
|
||||
} UvmToolsEventControlData;
|
||||
|
||||
// For backward compatibility:
|
||||
// TODO: Bug 4465348: remove these after replacing old references.
|
||||
typedef UvmToolsEventControlData_V1 UvmToolsEventControlData;
|
||||
typedef UvmEventEntry_V1 UvmEventEntry;
|
||||
// TODO: Bug 4465348: remove this after replacing old references.
|
||||
typedef UvmToolsEventControlData UvmToolsEventControlData_V1;
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// UVM Tools forward types (handles) definitions
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2023 NVIDIA Corporation
|
||||
Copyright (c) 2015-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -1328,12 +1328,12 @@ error_block_free:
|
||||
|
||||
static void cpu_chunk_remove_sysmem_gpu_mapping(uvm_cpu_chunk_t *chunk, uvm_gpu_t *gpu)
|
||||
{
|
||||
NvU64 gpu_mapping_addr = uvm_cpu_chunk_get_parent_gpu_phys_addr(chunk, gpu->parent);
|
||||
NvU64 gpu_mapping_addr = uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu);
|
||||
if (gpu_mapping_addr == 0)
|
||||
return;
|
||||
|
||||
uvm_pmm_sysmem_mappings_remove_gpu_mapping(&gpu->pmm_reverse_sysmem_mappings, gpu_mapping_addr);
|
||||
uvm_cpu_chunk_unmap_parent_gpu_phys(chunk, gpu->parent);
|
||||
uvm_cpu_chunk_unmap_gpu(chunk, gpu);
|
||||
}
|
||||
|
||||
static NV_STATUS cpu_chunk_add_sysmem_gpu_mapping(uvm_cpu_chunk_t *chunk,
|
||||
@ -1356,17 +1356,14 @@ static NV_STATUS cpu_chunk_add_sysmem_gpu_mapping(uvm_cpu_chunk_t *chunk,
|
||||
|
||||
chunk_size = uvm_cpu_chunk_get_size(chunk);
|
||||
|
||||
// TODO: Bug 3744779: Handle benign assertion in
|
||||
// pmm_sysmem_mappings_remove_gpu_mapping() in case of a
|
||||
// failure.
|
||||
status = uvm_pmm_sysmem_mappings_add_gpu_mapping(&gpu->pmm_reverse_sysmem_mappings,
|
||||
uvm_cpu_chunk_get_parent_gpu_phys_addr(chunk, gpu->parent),
|
||||
uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu),
|
||||
uvm_va_block_cpu_page_address(block, page_index),
|
||||
chunk_size,
|
||||
block,
|
||||
UVM_ID_CPU);
|
||||
if (status != NV_OK)
|
||||
cpu_chunk_remove_sysmem_gpu_mapping(chunk, gpu);
|
||||
uvm_cpu_chunk_unmap_gpu(chunk, gpu);
|
||||
|
||||
return status;
|
||||
}
|
||||
@ -1395,10 +1392,10 @@ static NV_STATUS block_gpu_map_phys_all_cpu_pages(uvm_va_block_t *block, uvm_gpu
|
||||
|
||||
for_each_possible_uvm_node(nid) {
|
||||
for_each_cpu_chunk_in_block(chunk, page_index, block, nid) {
|
||||
UVM_ASSERT_MSG(uvm_cpu_chunk_get_parent_gpu_phys_addr(chunk, gpu->parent) == 0,
|
||||
UVM_ASSERT_MSG(uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu) == 0,
|
||||
"GPU%u DMA address 0x%llx\n",
|
||||
uvm_id_value(gpu->id),
|
||||
uvm_cpu_chunk_get_parent_gpu_phys_addr(chunk, gpu->parent));
|
||||
uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu));
|
||||
|
||||
status = cpu_chunk_add_sysmem_gpu_mapping(chunk, block, page_index, gpu);
|
||||
if (status != NV_OK)
|
||||
@ -1561,8 +1558,7 @@ NV_STATUS uvm_va_block_gpu_state_alloc(uvm_va_block_t *va_block)
|
||||
}
|
||||
|
||||
void uvm_va_block_unmap_cpu_chunk_on_gpus(uvm_va_block_t *block,
|
||||
uvm_cpu_chunk_t *chunk,
|
||||
uvm_page_index_t page_index)
|
||||
uvm_cpu_chunk_t *chunk)
|
||||
{
|
||||
uvm_gpu_id_t id;
|
||||
|
||||
@ -1601,7 +1597,7 @@ NV_STATUS uvm_va_block_map_cpu_chunk_on_gpus(uvm_va_block_t *block,
|
||||
return NV_OK;
|
||||
|
||||
error:
|
||||
uvm_va_block_unmap_cpu_chunk_on_gpus(block, chunk, page_index);
|
||||
uvm_va_block_unmap_cpu_chunk_on_gpus(block, chunk);
|
||||
return status;
|
||||
}
|
||||
|
||||
@ -1620,7 +1616,7 @@ void uvm_va_block_remove_cpu_chunks(uvm_va_block_t *va_block, uvm_va_block_regio
|
||||
uvm_page_mask_region_clear(&va_block->cpu.pte_bits[UVM_PTE_BITS_CPU_WRITE], chunk_region);
|
||||
uvm_va_block_cpu_clear_resident_region(va_block, nid, chunk_region);
|
||||
uvm_cpu_chunk_remove_from_block(va_block, nid, page_index);
|
||||
uvm_va_block_unmap_cpu_chunk_on_gpus(va_block, chunk, page_index);
|
||||
uvm_va_block_unmap_cpu_chunk_on_gpus(va_block, chunk);
|
||||
uvm_cpu_chunk_free(chunk);
|
||||
}
|
||||
}
|
||||
@ -2308,7 +2304,7 @@ static bool block_gpu_supports_2m(uvm_va_block_t *block, uvm_gpu_t *gpu)
|
||||
return uvm_mmu_page_size_supported(&gpu_va_space->page_tables, UVM_PAGE_SIZE_2M);
|
||||
}
|
||||
|
||||
NvU32 uvm_va_block_gpu_big_page_size(uvm_va_block_t *va_block, uvm_gpu_t *gpu)
|
||||
NvU64 uvm_va_block_gpu_big_page_size(uvm_va_block_t *va_block, uvm_gpu_t *gpu)
|
||||
{
|
||||
uvm_gpu_va_space_t *gpu_va_space;
|
||||
|
||||
@ -2316,7 +2312,7 @@ NvU32 uvm_va_block_gpu_big_page_size(uvm_va_block_t *va_block, uvm_gpu_t *gpu)
|
||||
return gpu_va_space->page_tables.big_page_size;
|
||||
}
|
||||
|
||||
static uvm_va_block_region_t range_big_page_region_all(NvU64 start, NvU64 end, NvU32 big_page_size)
|
||||
static uvm_va_block_region_t range_big_page_region_all(NvU64 start, NvU64 end, NvU64 big_page_size)
|
||||
{
|
||||
NvU64 first_addr = UVM_ALIGN_UP(start, big_page_size);
|
||||
NvU64 outer_addr = UVM_ALIGN_DOWN(end + 1, big_page_size);
|
||||
@ -2330,20 +2326,20 @@ static uvm_va_block_region_t range_big_page_region_all(NvU64 start, NvU64 end, N
|
||||
return uvm_va_block_region((first_addr - start) / PAGE_SIZE, (outer_addr - start) / PAGE_SIZE);
|
||||
}
|
||||
|
||||
static size_t range_num_big_pages(NvU64 start, NvU64 end, NvU32 big_page_size)
|
||||
static size_t range_num_big_pages(NvU64 start, NvU64 end, NvU64 big_page_size)
|
||||
{
|
||||
uvm_va_block_region_t region = range_big_page_region_all(start, end, big_page_size);
|
||||
return (size_t)uvm_div_pow2_64(uvm_va_block_region_size(region), big_page_size);
|
||||
}
|
||||
|
||||
uvm_va_block_region_t uvm_va_block_big_page_region_all(uvm_va_block_t *va_block, NvU32 big_page_size)
|
||||
uvm_va_block_region_t uvm_va_block_big_page_region_all(uvm_va_block_t *va_block, NvU64 big_page_size)
|
||||
{
|
||||
return range_big_page_region_all(va_block->start, va_block->end, big_page_size);
|
||||
}
|
||||
|
||||
uvm_va_block_region_t uvm_va_block_big_page_region_subset(uvm_va_block_t *va_block,
|
||||
uvm_va_block_region_t region,
|
||||
NvU32 big_page_size)
|
||||
NvU64 big_page_size)
|
||||
{
|
||||
NvU64 start = uvm_va_block_region_start(va_block, region);
|
||||
NvU64 end = uvm_va_block_region_end(va_block, region);
|
||||
@ -2361,12 +2357,12 @@ uvm_va_block_region_t uvm_va_block_big_page_region_subset(uvm_va_block_t *va_blo
|
||||
return big_region;
|
||||
}
|
||||
|
||||
size_t uvm_va_block_num_big_pages(uvm_va_block_t *va_block, NvU32 big_page_size)
|
||||
size_t uvm_va_block_num_big_pages(uvm_va_block_t *va_block, NvU64 big_page_size)
|
||||
{
|
||||
return range_num_big_pages(va_block->start, va_block->end, big_page_size);
|
||||
}
|
||||
|
||||
NvU64 uvm_va_block_big_page_addr(uvm_va_block_t *va_block, size_t big_page_index, NvU32 big_page_size)
|
||||
NvU64 uvm_va_block_big_page_addr(uvm_va_block_t *va_block, size_t big_page_index, NvU64 big_page_size)
|
||||
{
|
||||
NvU64 addr = UVM_ALIGN_UP(va_block->start, big_page_size) + (big_page_index * big_page_size);
|
||||
UVM_ASSERT(addr >= va_block->start);
|
||||
@ -2374,7 +2370,7 @@ NvU64 uvm_va_block_big_page_addr(uvm_va_block_t *va_block, size_t big_page_index
|
||||
return addr;
|
||||
}
|
||||
|
||||
uvm_va_block_region_t uvm_va_block_big_page_region(uvm_va_block_t *va_block, size_t big_page_index, NvU32 big_page_size)
|
||||
uvm_va_block_region_t uvm_va_block_big_page_region(uvm_va_block_t *va_block, size_t big_page_index, NvU64 big_page_size)
|
||||
{
|
||||
NvU64 page_addr = uvm_va_block_big_page_addr(va_block, big_page_index, big_page_size);
|
||||
|
||||
@ -2390,7 +2386,7 @@ uvm_va_block_region_t uvm_va_block_big_page_region(uvm_va_block_t *va_block, siz
|
||||
// uvm_va_block_gpu_state_t::big_ptes) corresponding to page_index. If
|
||||
// page_index cannot be covered by a big PTE due to alignment or block size,
|
||||
// MAX_BIG_PAGES_PER_UVM_VA_BLOCK is returned.
|
||||
size_t uvm_va_block_big_page_index(uvm_va_block_t *va_block, uvm_page_index_t page_index, NvU32 big_page_size)
|
||||
size_t uvm_va_block_big_page_index(uvm_va_block_t *va_block, uvm_page_index_t page_index, NvU64 big_page_size)
|
||||
{
|
||||
uvm_va_block_region_t big_region_all = uvm_va_block_big_page_region_all(va_block, big_page_size);
|
||||
size_t big_index;
|
||||
@ -2415,7 +2411,7 @@ static void uvm_page_mask_init_from_big_ptes(uvm_va_block_t *block,
|
||||
{
|
||||
uvm_va_block_region_t big_region;
|
||||
size_t big_page_index;
|
||||
NvU32 big_page_size = uvm_va_block_gpu_big_page_size(block, gpu);
|
||||
NvU64 big_page_size = uvm_va_block_gpu_big_page_size(block, gpu);
|
||||
|
||||
uvm_page_mask_zero(mask_out);
|
||||
|
||||
@ -2425,7 +2421,7 @@ static void uvm_page_mask_init_from_big_ptes(uvm_va_block_t *block,
|
||||
}
|
||||
}
|
||||
|
||||
NvU32 uvm_va_block_page_size_cpu(uvm_va_block_t *va_block, uvm_page_index_t page_index)
|
||||
NvU64 uvm_va_block_page_size_cpu(uvm_va_block_t *va_block, uvm_page_index_t page_index)
|
||||
{
|
||||
if (!uvm_page_mask_test(&va_block->cpu.pte_bits[UVM_PTE_BITS_CPU_READ], page_index))
|
||||
return 0;
|
||||
@ -2439,7 +2435,7 @@ NvU32 uvm_va_block_page_size_cpu(uvm_va_block_t *va_block, uvm_page_index_t page
|
||||
return PAGE_SIZE;
|
||||
}
|
||||
|
||||
NvU32 uvm_va_block_page_size_gpu(uvm_va_block_t *va_block, uvm_gpu_id_t gpu_id, uvm_page_index_t page_index)
|
||||
NvU64 uvm_va_block_page_size_gpu(uvm_va_block_t *va_block, uvm_gpu_id_t gpu_id, uvm_page_index_t page_index)
|
||||
{
|
||||
uvm_va_block_gpu_state_t *gpu_state = uvm_va_block_gpu_state_get(va_block, gpu_id);
|
||||
size_t big_page_size, big_page_index;
|
||||
@ -2467,7 +2463,7 @@ NvU32 uvm_va_block_page_size_gpu(uvm_va_block_t *va_block, uvm_gpu_id_t gpu_id,
|
||||
// resident. Note that this is different from uvm_va_block_page_size_* because
|
||||
// those return the size of the PTE which maps the page index, which may be
|
||||
// smaller than the physical allocation.
|
||||
static NvU32 block_phys_page_size(uvm_va_block_t *block, block_phys_page_t page)
|
||||
static NvU64 block_phys_page_size(uvm_va_block_t *block, block_phys_page_t page)
|
||||
{
|
||||
uvm_va_block_gpu_state_t *gpu_state;
|
||||
uvm_chunk_size_t chunk_size;
|
||||
@ -2480,7 +2476,7 @@ static NvU32 block_phys_page_size(uvm_va_block_t *block, block_phys_page_t page)
|
||||
return 0;
|
||||
|
||||
UVM_ASSERT(uvm_processor_mask_test(&block->resident, UVM_ID_CPU));
|
||||
return (NvU32)uvm_cpu_chunk_get_size(chunk);
|
||||
return uvm_cpu_chunk_get_size(chunk);
|
||||
}
|
||||
|
||||
gpu_state = uvm_va_block_gpu_state_get(block, page.processor);
|
||||
@ -2489,10 +2485,10 @@ static NvU32 block_phys_page_size(uvm_va_block_t *block, block_phys_page_t page)
|
||||
|
||||
UVM_ASSERT(uvm_processor_mask_test(&block->resident, page.processor));
|
||||
block_gpu_chunk_index(block, block_get_gpu(block, page.processor), page.page_index, &chunk_size);
|
||||
return (NvU32)chunk_size;
|
||||
return chunk_size;
|
||||
}
|
||||
|
||||
NvU32 uvm_va_block_get_physical_size(uvm_va_block_t *block,
|
||||
NvU64 uvm_va_block_get_physical_size(uvm_va_block_t *block,
|
||||
uvm_processor_id_t processor,
|
||||
uvm_page_index_t page_index)
|
||||
{
|
||||
@ -3344,7 +3340,7 @@ static uvm_gpu_phys_address_t block_phys_page_address(uvm_va_block_t *block,
|
||||
|
||||
if (UVM_ID_IS_CPU(block_page.processor)) {
|
||||
uvm_cpu_chunk_t *chunk = uvm_cpu_chunk_get_chunk_for_page(block, block_page.nid, block_page.page_index);
|
||||
NvU64 dma_addr = uvm_cpu_chunk_get_parent_gpu_phys_addr(chunk, gpu->parent);
|
||||
NvU64 dma_addr = uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu);
|
||||
uvm_va_block_region_t chunk_region = uvm_va_block_chunk_region(block,
|
||||
uvm_cpu_chunk_get_size(chunk),
|
||||
block_page.page_index);
|
||||
@ -5387,7 +5383,7 @@ static bool block_check_gpu_chunks(uvm_va_block_t *block, uvm_gpu_id_t id)
|
||||
|
||||
if (chunk) {
|
||||
if (chunk_size != uvm_gpu_chunk_get_size(chunk)) {
|
||||
UVM_ERR_PRINT("chunk size mismatch: calc %u, actual %u. VA block [0x%llx, 0x%llx) GPU: %u page_index: %u chunk index: %zu\n",
|
||||
UVM_ERR_PRINT("chunk size mismatch: calc %u, actual %u. VA block [0x%llx, 0x%llx) GPU: %u page_index: %u chunk index: %lu\n",
|
||||
chunk_size,
|
||||
uvm_gpu_chunk_get_size(chunk),
|
||||
block->start,
|
||||
@ -5399,7 +5395,7 @@ static bool block_check_gpu_chunks(uvm_va_block_t *block, uvm_gpu_id_t id)
|
||||
}
|
||||
|
||||
if (chunk->state != UVM_PMM_GPU_CHUNK_STATE_ALLOCATED) {
|
||||
UVM_ERR_PRINT("Invalid chunk state %s. VA block [0x%llx, 0x%llx) GPU: %u page_index: %u chunk index: %zu chunk_size: %u\n",
|
||||
UVM_ERR_PRINT("Invalid chunk state %s. VA block [0x%llx, 0x%llx) GPU: %u page_index: %u chunk index: %lu chunk_size: llu\n",
|
||||
uvm_pmm_gpu_chunk_state_string(chunk->state),
|
||||
block->start,
|
||||
block->end + 1,
|
||||
@ -5718,7 +5714,7 @@ static bool block_check_mappings_ptes(uvm_va_block_t *block, uvm_va_block_contex
|
||||
uvm_pte_bits_gpu_t pte_bit;
|
||||
uvm_processor_id_t resident_id;
|
||||
uvm_prot_t prot;
|
||||
NvU32 big_page_size;
|
||||
NvU64 big_page_size;
|
||||
size_t num_big_pages, big_page_index;
|
||||
uvm_va_block_region_t big_region, chunk_region;
|
||||
uvm_gpu_chunk_t *chunk;
|
||||
@ -6170,7 +6166,7 @@ static void block_gpu_pte_big_split_write_4k(uvm_va_block_t *block,
|
||||
size_t big_page_index;
|
||||
uvm_processor_id_t curr_resident_id;
|
||||
uvm_prot_t curr_prot;
|
||||
NvU32 big_page_size = uvm_va_block_gpu_big_page_size(block, gpu);
|
||||
NvU64 big_page_size = uvm_va_block_gpu_big_page_size(block, gpu);
|
||||
|
||||
if (UVM_ID_IS_INVALID(resident_id))
|
||||
UVM_ASSERT(new_prot == UVM_PROT_NONE);
|
||||
@ -6252,7 +6248,7 @@ static void block_gpu_pte_clear_big(uvm_va_block_t *block,
|
||||
{
|
||||
uvm_va_block_gpu_state_t *gpu_state = uvm_va_block_gpu_state_get(block, gpu->id);
|
||||
uvm_gpu_va_space_t *gpu_va_space = uvm_va_block_get_gpu_va_space(block, gpu);
|
||||
NvU32 big_page_size = gpu_va_space->page_tables.big_page_size;
|
||||
NvU64 big_page_size = gpu_va_space->page_tables.big_page_size;
|
||||
uvm_gpu_phys_address_t pte_addr;
|
||||
NvU32 pte_size = uvm_mmu_pte_size(&gpu_va_space->page_tables, big_page_size);
|
||||
size_t big_page_index;
|
||||
@ -6298,7 +6294,7 @@ static void block_gpu_pte_write_big(uvm_va_block_t *block,
|
||||
uvm_va_block_gpu_state_t *gpu_state = uvm_va_block_gpu_state_get(block, gpu->id);
|
||||
uvm_gpu_va_space_t *gpu_va_space = uvm_va_block_get_gpu_va_space(block, gpu);
|
||||
uvm_page_tree_t *tree = &gpu_va_space->page_tables;
|
||||
NvU32 big_page_size = tree->big_page_size;
|
||||
NvU64 big_page_size = tree->big_page_size;
|
||||
NvU32 pte_size = uvm_mmu_pte_size(tree, big_page_size);
|
||||
size_t big_page_index;
|
||||
uvm_va_block_region_t contig_region = {0};
|
||||
@ -6376,7 +6372,7 @@ static void block_gpu_pte_merge_big_and_end(uvm_va_block_t *block,
|
||||
{
|
||||
uvm_va_block_gpu_state_t *gpu_state = uvm_va_block_gpu_state_get(block, gpu->id);
|
||||
uvm_page_tree_t *tree = &uvm_va_block_get_gpu_va_space(block, gpu)->page_tables;
|
||||
NvU32 big_page_size = tree->big_page_size;
|
||||
NvU64 big_page_size = tree->big_page_size;
|
||||
NvU64 unmapped_pte_val = tree->hal->unmapped_pte(big_page_size);
|
||||
size_t big_page_index;
|
||||
DECLARE_BITMAP(dummy_big_ptes, MAX_BIG_PAGES_PER_UVM_VA_BLOCK);
|
||||
@ -6937,7 +6933,7 @@ static void block_gpu_split_big(uvm_va_block_t *block,
|
||||
uvm_page_tree_t *tree = &uvm_va_block_get_gpu_va_space(block, gpu)->page_tables;
|
||||
uvm_pte_batch_t *pte_batch = &block_context->mapping.pte_batch;
|
||||
uvm_tlb_batch_t *tlb_batch = &block_context->mapping.tlb_batch;
|
||||
NvU32 big_page_size = tree->big_page_size;
|
||||
NvU64 big_page_size = tree->big_page_size;
|
||||
uvm_va_block_region_t big_region;
|
||||
uvm_processor_id_t resident_id;
|
||||
size_t big_page_index;
|
||||
@ -7039,7 +7035,7 @@ static void block_gpu_map_big_and_4k(uvm_va_block_t *block,
|
||||
DECLARE_BITMAP(big_ptes_mask, MAX_BIG_PAGES_PER_UVM_VA_BLOCK);
|
||||
uvm_va_block_region_t big_region;
|
||||
size_t big_page_index;
|
||||
NvU32 big_page_size = tree->big_page_size;
|
||||
NvU64 big_page_size = tree->big_page_size;
|
||||
uvm_membar_t tlb_membar = block_pte_op_membar(pte_op, gpu, resident_id);
|
||||
|
||||
UVM_ASSERT(!gpu_state->pte_is_2m);
|
||||
@ -7341,7 +7337,7 @@ static void block_gpu_unmap_big_and_4k(uvm_va_block_t *block,
|
||||
DECLARE_BITMAP(big_ptes_split, MAX_BIG_PAGES_PER_UVM_VA_BLOCK);
|
||||
DECLARE_BITMAP(big_ptes_before_or_after, MAX_BIG_PAGES_PER_UVM_VA_BLOCK);
|
||||
DECLARE_BITMAP(big_ptes_mask, MAX_BIG_PAGES_PER_UVM_VA_BLOCK);
|
||||
NvU32 big_page_size = tree->big_page_size;
|
||||
NvU64 big_page_size = tree->big_page_size;
|
||||
NvU64 unmapped_pte_val = tree->hal->unmapped_pte(big_page_size);
|
||||
|
||||
UVM_ASSERT(!gpu_state->pte_is_2m);
|
||||
@ -7487,7 +7483,7 @@ static void block_gpu_compute_new_pte_state(uvm_va_block_t *block,
|
||||
{
|
||||
uvm_va_block_gpu_state_t *gpu_state = uvm_va_block_gpu_state_get(block, gpu->id);
|
||||
uvm_va_block_region_t big_region_all, big_page_region, region;
|
||||
NvU32 big_page_size;
|
||||
NvU64 big_page_size;
|
||||
uvm_page_index_t page_index;
|
||||
size_t big_page_index;
|
||||
DECLARE_BITMAP(big_ptes_not_covered, MAX_BIG_PAGES_PER_UVM_VA_BLOCK);
|
||||
@ -7640,7 +7636,7 @@ static void block_gpu_compute_new_pte_state(uvm_va_block_t *block,
|
||||
// happens, the pending tracker is added to the block's tracker.
|
||||
static NV_STATUS block_alloc_pt_range_with_retry(uvm_va_block_t *va_block,
|
||||
uvm_gpu_t *gpu,
|
||||
NvU32 page_size,
|
||||
NvU64 page_size,
|
||||
uvm_page_table_range_t *page_table_range,
|
||||
uvm_tracker_t *pending_tracker)
|
||||
{
|
||||
@ -7763,13 +7759,13 @@ allocated:
|
||||
// sizes. See block_alloc_pt_range_with_retry.
|
||||
static NV_STATUS block_alloc_ptes_with_retry(uvm_va_block_t *va_block,
|
||||
uvm_gpu_t *gpu,
|
||||
NvU32 page_sizes,
|
||||
NvU64 page_sizes,
|
||||
uvm_tracker_t *pending_tracker)
|
||||
{
|
||||
uvm_va_block_gpu_state_t *gpu_state = uvm_va_block_gpu_state_get(va_block, gpu->id);
|
||||
uvm_gpu_va_space_t *gpu_va_space = uvm_va_block_get_gpu_va_space(va_block, gpu);
|
||||
uvm_page_table_range_t *range;
|
||||
NvU32 page_size;
|
||||
NvU64 page_size;
|
||||
NV_STATUS status, final_status = NV_OK;
|
||||
|
||||
UVM_ASSERT(gpu_state);
|
||||
@ -7821,7 +7817,7 @@ static NV_STATUS block_alloc_ptes_new_state(uvm_va_block_t *va_block,
|
||||
uvm_va_block_new_pte_state_t *new_pte_state,
|
||||
uvm_tracker_t *pending_tracker)
|
||||
{
|
||||
NvU32 page_sizes = 0;
|
||||
NvU64 page_sizes = 0;
|
||||
|
||||
if (new_pte_state->pte_is_2m) {
|
||||
page_sizes |= UVM_PAGE_SIZE_2M;
|
||||
@ -7853,8 +7849,8 @@ static NV_STATUS block_pre_populate_pde1_gpu(uvm_va_block_t *block,
|
||||
uvm_gpu_va_space_t *gpu_va_space,
|
||||
uvm_tracker_t *pending_tracker)
|
||||
{
|
||||
NvU32 page_sizes;
|
||||
NvU32 big_page_size;
|
||||
NvU64 page_sizes;
|
||||
NvU64 big_page_size;
|
||||
uvm_gpu_t *gpu;
|
||||
uvm_va_block_gpu_state_t *gpu_state;
|
||||
|
||||
@ -9509,7 +9505,6 @@ static void block_kill(uvm_va_block_t *block)
|
||||
// Free CPU pages
|
||||
for_each_possible_uvm_node(nid) {
|
||||
uvm_va_block_cpu_node_state_t *node_state = block_node_state_get(block, nid);
|
||||
size_t index = node_to_index(nid);
|
||||
|
||||
for_each_cpu_chunk_in_block_safe(chunk, page_index, next_page_index, block, nid) {
|
||||
// be conservative.
|
||||
@ -9524,9 +9519,20 @@ static void block_kill(uvm_va_block_t *block)
|
||||
|
||||
UVM_ASSERT(uvm_page_mask_empty(&node_state->allocated));
|
||||
UVM_ASSERT(node_state->chunks == 0);
|
||||
kmem_cache_free(g_uvm_va_block_cpu_node_state_cache, block->cpu.node_state[index]);
|
||||
}
|
||||
|
||||
// While a per-NUMA node_state array is in use, all of its elements are
|
||||
// expected to be valid. Therefore the teardown of these elements must occur
|
||||
// as a single "transaction". This teardown must take place after freeing
|
||||
// the CPU pages (see the "Free CPU pages" loop above). This is because as
|
||||
// part of removing chunks from VA blocks, the per-page allocated bitmap is
|
||||
// recomputed using the per-NUMA node_state array elements.
|
||||
for_each_possible_uvm_node(nid) {
|
||||
uvm_va_block_cpu_node_state_t *node_state;
|
||||
|
||||
node_state = block_node_state_get(block, nid);
|
||||
kmem_cache_free(g_uvm_va_block_cpu_node_state_cache, node_state);
|
||||
}
|
||||
uvm_kvfree((void *)block->cpu.node_state);
|
||||
block->cpu.node_state = NULL;
|
||||
|
||||
@ -9642,8 +9648,8 @@ static NV_STATUS block_split_presplit_ptes_gpu(uvm_va_block_t *existing, uvm_va_
|
||||
uvm_va_block_gpu_state_t *existing_gpu_state = uvm_va_block_gpu_state_get(existing, gpu->id);
|
||||
uvm_va_space_t *va_space = uvm_va_block_get_va_space(existing);
|
||||
uvm_va_block_context_t *block_context = uvm_va_space_block_context(va_space, NULL);
|
||||
NvU32 big_page_size = uvm_va_block_gpu_big_page_size(existing, gpu);
|
||||
NvU32 alloc_sizes;
|
||||
NvU64 big_page_size = uvm_va_block_gpu_big_page_size(existing, gpu);
|
||||
NvU64 alloc_sizes;
|
||||
DECLARE_BITMAP(new_big_ptes, MAX_BIG_PAGES_PER_UVM_VA_BLOCK);
|
||||
uvm_page_index_t new_start_page_index = uvm_va_block_cpu_page_index(existing, new->start);
|
||||
size_t big_page_index;
|
||||
@ -9986,7 +9992,7 @@ static NV_STATUS block_split_cpu_chunk_one(uvm_va_block_t *block, uvm_page_index
|
||||
gpu = block_get_gpu(block, id);
|
||||
|
||||
// If the parent chunk has not been mapped, there is nothing to split.
|
||||
gpu_mapping_addr = uvm_cpu_chunk_get_parent_gpu_phys_addr(chunk, gpu->parent);
|
||||
gpu_mapping_addr = uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu);
|
||||
if (gpu_mapping_addr == 0)
|
||||
continue;
|
||||
|
||||
@ -10008,7 +10014,7 @@ static NV_STATUS block_split_cpu_chunk_one(uvm_va_block_t *block, uvm_page_index
|
||||
merge:
|
||||
for_each_gpu_id_in_mask(id, gpu_split_mask) {
|
||||
gpu = block_get_gpu(block, id);
|
||||
gpu_mapping_addr = uvm_cpu_chunk_get_parent_gpu_phys_addr(chunk, gpu->parent);
|
||||
gpu_mapping_addr = uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu);
|
||||
uvm_pmm_sysmem_mappings_merge_gpu_mappings(&gpu->pmm_reverse_sysmem_mappings,
|
||||
gpu_mapping_addr,
|
||||
chunk_size);
|
||||
@ -10194,7 +10200,7 @@ static void block_merge_cpu_chunks_one(uvm_va_block_t *block, uvm_page_index_t p
|
||||
continue;
|
||||
|
||||
gpu = block_get_gpu(block, id);
|
||||
gpu_mapping_addr = uvm_cpu_chunk_get_parent_gpu_phys_addr(chunk, gpu->parent);
|
||||
gpu_mapping_addr = uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu);
|
||||
if (gpu_mapping_addr == 0)
|
||||
continue;
|
||||
|
||||
@ -10646,8 +10652,7 @@ static void block_split_gpu(uvm_va_block_t *existing, uvm_va_block_t *new, uvm_g
|
||||
for_each_possible_uvm_node(nid) {
|
||||
for_each_cpu_chunk_in_block(cpu_chunk, page_index, new, nid) {
|
||||
uvm_pmm_sysmem_mappings_reparent_gpu_mapping(&gpu->pmm_reverse_sysmem_mappings,
|
||||
uvm_cpu_chunk_get_parent_gpu_phys_addr(cpu_chunk,
|
||||
gpu->parent),
|
||||
uvm_cpu_chunk_get_gpu_phys_addr(cpu_chunk, gpu),
|
||||
new);
|
||||
}
|
||||
}
|
||||
@ -10685,7 +10690,7 @@ static void block_split_gpu(uvm_va_block_t *existing, uvm_va_block_t *new, uvm_g
|
||||
gpu_va_space = uvm_gpu_va_space_get(va_space, gpu);
|
||||
if (gpu_va_space) {
|
||||
if (existing_gpu_state->page_table_range_big.table) {
|
||||
NvU32 big_page_size = uvm_va_block_gpu_big_page_size(existing, gpu);
|
||||
NvU64 big_page_size = uvm_va_block_gpu_big_page_size(existing, gpu);
|
||||
|
||||
// existing's end has not been adjusted yet
|
||||
existing_pages_big = range_num_big_pages(existing->start, new->start - 1, big_page_size);
|
||||
@ -13614,7 +13619,7 @@ NV_STATUS uvm_test_va_residency_info(UVM_TEST_VA_RESIDENCY_INFO_PARAMS *params,
|
||||
for_each_id_in_mask(id, &block->mapped) {
|
||||
uvm_processor_id_t processor_to_map;
|
||||
block_phys_page_t block_page;
|
||||
NvU32 page_size = uvm_va_block_page_size_processor(block, id, page_index);
|
||||
NvU64 page_size = uvm_va_block_page_size_processor(block, id, page_index);
|
||||
int nid = NUMA_NO_NODE;
|
||||
|
||||
if (page_size == 0)
|
||||
@ -13650,7 +13655,7 @@ NV_STATUS uvm_test_va_residency_info(UVM_TEST_VA_RESIDENCY_INFO_PARAMS *params,
|
||||
if (uvm_processor_mask_test(resident_on_mask, UVM_ID_CPU)) {
|
||||
if (uvm_pmm_sysmem_mappings_indirect_supported()) {
|
||||
for_each_gpu_id(id) {
|
||||
NvU32 page_size = uvm_va_block_page_size_processor(block, id, page_index);
|
||||
NvU64 page_size = uvm_va_block_page_size_processor(block, id, page_index);
|
||||
uvm_reverse_map_t sysmem_page;
|
||||
uvm_cpu_chunk_t *chunk = uvm_cpu_chunk_get_chunk_for_page_resident(block, page_index);
|
||||
size_t num_pages;
|
||||
@ -13665,8 +13670,7 @@ NV_STATUS uvm_test_va_residency_info(UVM_TEST_VA_RESIDENCY_INFO_PARAMS *params,
|
||||
continue;
|
||||
|
||||
num_pages = uvm_pmm_sysmem_mappings_dma_to_virt(&gpu->pmm_reverse_sysmem_mappings,
|
||||
uvm_cpu_chunk_get_parent_gpu_phys_addr(chunk,
|
||||
gpu->parent),
|
||||
uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu),
|
||||
uvm_cpu_chunk_get_size(chunk),
|
||||
&sysmem_page,
|
||||
1);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2023 NVIDIA Corporation
|
||||
Copyright (c) 2015-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -111,8 +111,6 @@ typedef struct
|
||||
// Pages that have been evicted to sysmem
|
||||
uvm_page_mask_t evicted;
|
||||
|
||||
NvU64 *cpu_chunks_dma_addrs;
|
||||
|
||||
// Array of naturally-aligned chunks. Each chunk has the largest possible
|
||||
// size which can fit within the block, so they are not uniform size.
|
||||
//
|
||||
@ -2155,8 +2153,7 @@ NV_STATUS uvm_va_block_map_cpu_chunk_on_gpus(uvm_va_block_t *va_block,
|
||||
// Physically unmap a CPU chunk from all registered GPUs.
|
||||
// Locking: The va_block lock must be held.
|
||||
void uvm_va_block_unmap_cpu_chunk_on_gpus(uvm_va_block_t *va_block,
|
||||
uvm_cpu_chunk_t *chunk,
|
||||
uvm_page_index_t page_index);
|
||||
uvm_cpu_chunk_t *chunk);
|
||||
|
||||
// Remove any CPU chunks in the given region.
|
||||
// Locking: The va_block lock must be held.
|
||||
@ -2166,19 +2163,19 @@ void uvm_va_block_remove_cpu_chunks(uvm_va_block_t *va_block, uvm_va_block_regio
|
||||
// specified processor in the block. Returns 0 if the address is not resident on
|
||||
// the specified processor.
|
||||
// Locking: The va_block lock must be held.
|
||||
NvU32 uvm_va_block_get_physical_size(uvm_va_block_t *block,
|
||||
NvU64 uvm_va_block_get_physical_size(uvm_va_block_t *block,
|
||||
uvm_processor_id_t processor,
|
||||
uvm_page_index_t page_index);
|
||||
|
||||
// Get CPU page size or 0 if it is not mapped
|
||||
NvU32 uvm_va_block_page_size_cpu(uvm_va_block_t *va_block,
|
||||
NvU64 uvm_va_block_page_size_cpu(uvm_va_block_t *va_block,
|
||||
uvm_page_index_t page_index);
|
||||
|
||||
// Get GPU page size or 0 if it is not mapped on the given GPU
|
||||
NvU32 uvm_va_block_page_size_gpu(uvm_va_block_t *va_block, uvm_gpu_id_t gpu_id, uvm_page_index_t page_index);
|
||||
NvU64 uvm_va_block_page_size_gpu(uvm_va_block_t *va_block, uvm_gpu_id_t gpu_id, uvm_page_index_t page_index);
|
||||
|
||||
// Get page size or 0 if it is not mapped on the given processor
|
||||
static NvU32 uvm_va_block_page_size_processor(uvm_va_block_t *va_block,
|
||||
static NvU64 uvm_va_block_page_size_processor(uvm_va_block_t *va_block,
|
||||
uvm_processor_id_t processor_id,
|
||||
uvm_page_index_t page_index)
|
||||
{
|
||||
@ -2189,10 +2186,10 @@ static NvU32 uvm_va_block_page_size_processor(uvm_va_block_t *va_block,
|
||||
}
|
||||
|
||||
// Returns the big page size for the GPU VA space of the block
|
||||
NvU32 uvm_va_block_gpu_big_page_size(uvm_va_block_t *va_block, uvm_gpu_t *gpu);
|
||||
NvU64 uvm_va_block_gpu_big_page_size(uvm_va_block_t *va_block, uvm_gpu_t *gpu);
|
||||
|
||||
// Returns the number of big pages in the VA block for the given size
|
||||
size_t uvm_va_block_num_big_pages(uvm_va_block_t *va_block, NvU32 big_page_size);
|
||||
size_t uvm_va_block_num_big_pages(uvm_va_block_t *va_block, NvU64 big_page_size);
|
||||
|
||||
// Returns the number of big pages in the VA block for the big page size on the
|
||||
// given GPU
|
||||
@ -2202,29 +2199,29 @@ static size_t uvm_va_block_gpu_num_big_pages(uvm_va_block_t *va_block, uvm_gpu_t
|
||||
}
|
||||
|
||||
// Returns the start address of the given big page index and big page size
|
||||
NvU64 uvm_va_block_big_page_addr(uvm_va_block_t *va_block, size_t big_page_index, NvU32 big_page_size);
|
||||
NvU64 uvm_va_block_big_page_addr(uvm_va_block_t *va_block, size_t big_page_index, NvU64 big_page_size);
|
||||
|
||||
// Returns the region [start, end] of the given big page index and big page size
|
||||
uvm_va_block_region_t uvm_va_block_big_page_region(uvm_va_block_t *va_block,
|
||||
size_t big_page_index,
|
||||
NvU32 big_page_size);
|
||||
NvU64 big_page_size);
|
||||
|
||||
// Returns the largest sub-region region of [start, end] which can fit big
|
||||
// pages. If the region cannot fit any big pages, an invalid region (0, 0) is
|
||||
// returned.
|
||||
uvm_va_block_region_t uvm_va_block_big_page_region_all(uvm_va_block_t *va_block, NvU32 big_page_size);
|
||||
uvm_va_block_region_t uvm_va_block_big_page_region_all(uvm_va_block_t *va_block, NvU64 big_page_size);
|
||||
|
||||
// Returns the largest sub-region region of 'region' which can fit big pages.
|
||||
// If the region cannot fit any big pages, an invalid region (0, 0) is returned.
|
||||
uvm_va_block_region_t uvm_va_block_big_page_region_subset(uvm_va_block_t *va_block,
|
||||
uvm_va_block_region_t region,
|
||||
NvU32 big_page_size);
|
||||
NvU64 big_page_size);
|
||||
|
||||
// Returns the big page index (the bit index within
|
||||
// uvm_va_block_gpu_state_t::big_ptes) corresponding to page_index. If
|
||||
// page_index cannot be covered by a big PTE due to alignment or block size,
|
||||
// MAX_BIG_PAGES_PER_UVM_VA_BLOCK is returned.
|
||||
size_t uvm_va_block_big_page_index(uvm_va_block_t *va_block, uvm_page_index_t page_index, NvU32 big_page_size);
|
||||
size_t uvm_va_block_big_page_index(uvm_va_block_t *va_block, uvm_page_index_t page_index, NvU64 big_page_size);
|
||||
|
||||
// Returns the new residency for a page that faulted or triggered access counter
|
||||
// notifications. The read_duplicate output parameter indicates if the page
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2023 NVIDIA Corporation
|
||||
Copyright (c) 2015-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -1853,7 +1853,7 @@ NV_STATUS uvm_api_alloc_semaphore_pool(UVM_ALLOC_SEMAPHORE_POOL_PARAMS *params,
|
||||
|
||||
if (uvm_api_range_invalid(params->base, params->length))
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
if (params->gpuAttributesCount > UVM_MAX_GPUS_V2)
|
||||
if (params->gpuAttributesCount > UVM_MAX_GPUS)
|
||||
return NV_ERR_INVALID_ARGUMENT;
|
||||
|
||||
if (g_uvm_global.conf_computing_enabled && params->gpuAttributesCount == 0)
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2022 NVIDIA Corporation
|
||||
Copyright (c) 2015-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -188,8 +188,7 @@ typedef struct
|
||||
// GPU which owns the allocation. For sysmem, this is the GPU that the
|
||||
// sysmem was originally allocated under. For the allocation to remain valid
|
||||
// we need to prevent the GPU from going away, similarly to P2P mapped
|
||||
// memory.
|
||||
// Similarly for EGM memory.
|
||||
// memory and to EGM memory.
|
||||
//
|
||||
// This field is not used for sparse mappings as they don't have an
|
||||
// allocation and, hence, owning GPU.
|
||||
@ -212,6 +211,7 @@ typedef struct
|
||||
// EGM memory. If true is_sysmem also has to be true and owning_gpu
|
||||
// has to be valid.
|
||||
bool is_egm;
|
||||
|
||||
// GPU page tables mapping the allocation
|
||||
uvm_page_table_range_vec_t pt_range_vec;
|
||||
|
||||
|
@ -199,7 +199,7 @@ void uvm_hal_volta_host_tlb_invalidate_va(uvm_push_t *push,
|
||||
NvU32 depth,
|
||||
NvU64 base,
|
||||
NvU64 size,
|
||||
NvU32 page_size,
|
||||
NvU64 page_size,
|
||||
uvm_membar_t membar)
|
||||
{
|
||||
NvU32 aperture_value;
|
||||
@ -216,9 +216,9 @@ void uvm_hal_volta_host_tlb_invalidate_va(uvm_push_t *push,
|
||||
NvU32 log2_invalidation_size;
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||||
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(page_size, 1 << 12), "page_size 0x%x\n", page_size);
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(base, page_size), "base 0x%llx page_size 0x%x\n", base, page_size);
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(size, page_size), "size 0x%llx page_size 0x%x\n", size, page_size);
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(page_size, 1 << 12), "page_size 0x%llx\n", page_size);
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(base, page_size), "base 0x%llx page_size 0x%llx\n", base, page_size);
|
||||
UVM_ASSERT_MSG(IS_ALIGNED(size, page_size), "size 0x%llx page_size 0x%llx\n", size, page_size);
|
||||
UVM_ASSERT_MSG(size > 0, "size 0x%llx\n", size);
|
||||
|
||||
// The invalidation size must be a power-of-two number of pages containing
|
||||
|
@ -42,7 +42,7 @@ static NvU32 entries_per_index_volta(NvU32 depth)
|
||||
return 1;
|
||||
}
|
||||
|
||||
static NvLength entry_offset_volta(NvU32 depth, NvU32 page_size)
|
||||
static NvLength entry_offset_volta(NvU32 depth, NvU64 page_size)
|
||||
{
|
||||
UVM_ASSERT(depth < 5);
|
||||
if (page_size == UVM_PAGE_SIZE_4K && depth == 3)
|
||||
@ -252,7 +252,7 @@ static NvU64 make_pte_volta(uvm_aperture_t aperture, NvU64 address, uvm_prot_t p
|
||||
|
||||
static uvm_mmu_mode_hal_t volta_mmu_mode_hal;
|
||||
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_volta(NvU32 big_page_size)
|
||||
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_volta(NvU64 big_page_size)
|
||||
{
|
||||
static bool initialized = false;
|
||||
|
||||
|
@ -159,14 +159,7 @@ static int lkca_aead_internal(struct crypto_aead *aead,
|
||||
}
|
||||
|
||||
if (rc != 0) {
|
||||
if (enc) {
|
||||
pr_info("aead.c: Encryption failed with error %i\n", rc);
|
||||
} else {
|
||||
pr_info("aead.c: Decryption failed with error %i\n", rc);
|
||||
if (rc == -EBADMSG) {
|
||||
pr_info("aead.c: Authentication tag mismatch!\n");
|
||||
}
|
||||
}
|
||||
pr_info("Encryption FAILED\n");
|
||||
}
|
||||
|
||||
*data_out_size = data_in_size;
|
||||
|
@ -39,7 +39,9 @@
|
||||
#define RSA_PSS_PADDING_ZEROS_SIZE_BYTE (8)
|
||||
#define RSA_PSS_TRAILER_FIELD (0xbc)
|
||||
#define SHIFT_RIGHT_AND_GET_BYTE(val, x) ((val >> x) & 0xFF)
|
||||
#ifndef BITS_TO_BYTES
|
||||
#define BITS_TO_BYTES(b) (b >> 3)
|
||||
#endif
|
||||
|
||||
static const unsigned char zeroes[RSA_PSS_PADDING_ZEROS_SIZE_BYTE] = { 0 };
|
||||
|
||||
|
@ -66,6 +66,9 @@ static NvBool battery_present = NV_FALSE;
|
||||
#define ACPI_VIDEO_CLASS "video"
|
||||
#endif
|
||||
|
||||
/* Maximum size of ACPI _DSM method's 4th argument */
|
||||
#define NV_MAX_ACPI_DSM_PARAM_SIZE 1024
|
||||
|
||||
// Used for NVPCF event handling
|
||||
static acpi_handle nvpcf_handle = NULL;
|
||||
static acpi_handle nvpcf_device_handle = NULL;
|
||||
@ -73,21 +76,6 @@ static nv_acpi_t *nvpcf_nv_acpi_object = NULL;
|
||||
|
||||
#define ACPI_NVPCF_EVENT_CHANGE 0xC0
|
||||
|
||||
static int nv_acpi_get_device_handle(nv_state_t *nv, acpi_handle *dev_handle)
|
||||
{
|
||||
nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
|
||||
|
||||
#if defined(DEVICE_ACPI_HANDLE)
|
||||
*dev_handle = DEVICE_ACPI_HANDLE(nvl->dev);
|
||||
return NV_TRUE;
|
||||
#elif defined (ACPI_HANDLE)
|
||||
*dev_handle = ACPI_HANDLE(nvl->dev);
|
||||
return NV_TRUE;
|
||||
#else
|
||||
return NV_FALSE;
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* This callback will be invoked by the acpi_notifier_call_chain()
|
||||
*/
|
||||
@ -174,7 +162,7 @@ static void nv_acpi_nvpcf_event(acpi_handle handle, u32 event_type, void *data)
|
||||
}
|
||||
else
|
||||
{
|
||||
nv_printf(NV_DBG_INFO,"NVRM: %s: NVPCF event 0x%x is not supported\n", event_type, __FUNCTION__);
|
||||
nv_printf(NV_DBG_INFO,"NVRM: %s: NVPCF event 0x%x is not supported\n", __FUNCTION__, event_type);
|
||||
}
|
||||
}
|
||||
|
||||
@ -267,11 +255,10 @@ static void nv_acpi_notify_event(acpi_handle handle, u32 event_type, void *data)
|
||||
|
||||
void nv_acpi_register_notifier(nv_linux_state_t *nvl)
|
||||
{
|
||||
acpi_handle dev_handle = NULL;
|
||||
acpi_handle dev_handle = ACPI_HANDLE(nvl->dev);
|
||||
|
||||
/* Install the ACPI notifier corresponding to dGPU ACPI device. */
|
||||
if ((nvl->nv_acpi_object == NULL) &&
|
||||
nv_acpi_get_device_handle(NV_STATE_PTR(nvl), &dev_handle) &&
|
||||
(dev_handle != NULL))
|
||||
{
|
||||
nvl->nv_acpi_object = nv_install_notifier(dev_handle, nv_acpi_notify_event, nvl);
|
||||
@ -657,64 +644,36 @@ static NV_STATUS nv_acpi_nvif_method(
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
#define MAX_INPUT_PARAM_SIZE 1024
|
||||
/*
|
||||
* This function executes a _DSM ACPI method.
|
||||
*/
|
||||
NV_STATUS NV_API_CALL nv_acpi_dsm_method(
|
||||
nv_state_t *nv,
|
||||
NvU8 *pAcpiDsmGuid,
|
||||
NvU32 acpiDsmRev,
|
||||
NvBool acpiNvpcfDsmFunction,
|
||||
NvU32 acpiDsmSubFunction,
|
||||
void *pInParams,
|
||||
NvU16 inParamSize,
|
||||
NvU32 *outStatus,
|
||||
void *pOutData,
|
||||
NvU16 *pSize
|
||||
static NV_STATUS nv_acpi_evaluate_dsm_method(
|
||||
acpi_handle dev_handle,
|
||||
NvU8 *pathname,
|
||||
NvU8 *pAcpiDsmGuid,
|
||||
NvU32 acpiDsmRev,
|
||||
NvU32 acpiDsmSubFunction,
|
||||
void *arg3,
|
||||
NvU16 arg3Size,
|
||||
NvBool bArg3Integer,
|
||||
NvU32 *outStatus,
|
||||
void *pOutData,
|
||||
NvU16 *pSize
|
||||
)
|
||||
{
|
||||
NV_STATUS status = NV_ERR_OPERATING_SYSTEM;
|
||||
acpi_status acpi_status;
|
||||
NV_STATUS rmStatus = NV_OK;
|
||||
acpi_status status;
|
||||
struct acpi_object_list input;
|
||||
union acpi_object *dsm = NULL;
|
||||
struct acpi_buffer output = { ACPI_ALLOCATE_BUFFER, NULL };
|
||||
union acpi_object dsm_params[4];
|
||||
NvU8 *argument3 = NULL;
|
||||
NvU32 data_size;
|
||||
acpi_handle dev_handle = NULL;
|
||||
|
||||
if (!nv_acpi_get_device_handle(nv, &dev_handle))
|
||||
return NV_ERR_NOT_SUPPORTED;
|
||||
|
||||
if (!dev_handle)
|
||||
return NV_ERR_INVALID_ARGUMENT;
|
||||
|
||||
if ((!pInParams) || (inParamSize > MAX_INPUT_PARAM_SIZE) || (!pOutData) || (!pSize))
|
||||
{
|
||||
nv_printf(NV_DBG_INFO,
|
||||
"NVRM: %s: invalid argument(s)!\n", __FUNCTION__);
|
||||
return NV_ERR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
if (!NV_MAY_SLEEP())
|
||||
{
|
||||
#if defined(DEBUG)
|
||||
nv_printf(NV_DBG_INFO,
|
||||
"NVRM: %s: invalid argument(s)!\n", __FUNCTION__);
|
||||
nv_printf(NV_DBG_ERRORS, "NVRM: %s: invalid context!\n", __FUNCTION__);
|
||||
#endif
|
||||
return NV_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
status = os_alloc_mem((void **)&argument3, inParamSize);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
//
|
||||
// dsm_params[0].buffer.pointer and dsm_params[1].integer.value set in
|
||||
// switch below based on acpiDsmFunction
|
||||
//
|
||||
|
||||
dsm_params[0].buffer.type = ACPI_TYPE_BUFFER;
|
||||
dsm_params[0].buffer.length = 0x10;
|
||||
dsm_params[0].buffer.pointer = pAcpiDsmGuid;
|
||||
@ -725,35 +684,28 @@ NV_STATUS NV_API_CALL nv_acpi_dsm_method(
|
||||
dsm_params[2].integer.type = ACPI_TYPE_INTEGER;
|
||||
dsm_params[2].integer.value = acpiDsmSubFunction;
|
||||
|
||||
dsm_params[3].buffer.type = ACPI_TYPE_BUFFER;
|
||||
dsm_params[3].buffer.length = inParamSize;
|
||||
memcpy(argument3, pInParams, dsm_params[3].buffer.length);
|
||||
dsm_params[3].buffer.pointer = argument3;
|
||||
if (bArg3Integer)
|
||||
{
|
||||
dsm_params[3].integer.type = ACPI_TYPE_INTEGER;
|
||||
dsm_params[3].integer.value = *((NvU32 *)arg3);
|
||||
}
|
||||
else
|
||||
{
|
||||
dsm_params[3].buffer.type = ACPI_TYPE_BUFFER;
|
||||
dsm_params[3].buffer.length = arg3Size;
|
||||
dsm_params[3].buffer.pointer = arg3;
|
||||
}
|
||||
|
||||
// parameters for dsm calls (GUID, rev, subfunction, data)
|
||||
input.count = 4;
|
||||
input.pointer = dsm_params;
|
||||
|
||||
if (acpiNvpcfDsmFunction)
|
||||
{
|
||||
//
|
||||
// acpi_evaluate_object() can operate with either valid object pathname or
|
||||
// valid object handle. For NVPCF DSM function, use valid pathname as we do
|
||||
// not have device handle for NVPCF device
|
||||
//
|
||||
dev_handle = NULL;
|
||||
acpi_status = acpi_evaluate_object(dev_handle, "\\_SB.NPCF._DSM", &input, &output);
|
||||
}
|
||||
else
|
||||
{
|
||||
acpi_status = acpi_evaluate_object(dev_handle, "_DSM", &input, &output);
|
||||
}
|
||||
|
||||
if (ACPI_FAILURE(acpi_status))
|
||||
status = acpi_evaluate_object(dev_handle, pathname, &input, &output);
|
||||
if (ACPI_FAILURE(status))
|
||||
{
|
||||
nv_printf(NV_DBG_INFO,
|
||||
"NVRM: %s: failed to evaluate _DSM method!\n", __FUNCTION__);
|
||||
goto exit;
|
||||
return NV_ERR_OPERATING_SYSTEM;
|
||||
}
|
||||
|
||||
dsm = output.pointer;
|
||||
@ -767,20 +719,80 @@ NV_STATUS NV_API_CALL nv_acpi_dsm_method(
|
||||
dsm->buffer.pointer[0];
|
||||
}
|
||||
|
||||
status = nv_acpi_extract_object(dsm, pOutData, *pSize, &data_size);
|
||||
rmStatus = nv_acpi_extract_object(dsm, pOutData, *pSize, &data_size);
|
||||
*pSize = data_size;
|
||||
|
||||
kfree(output.pointer);
|
||||
}
|
||||
if (status != NV_OK)
|
||||
else
|
||||
{
|
||||
*pSize = 0;
|
||||
}
|
||||
|
||||
if (rmStatus != NV_OK)
|
||||
{
|
||||
nv_printf(NV_DBG_ERRORS,
|
||||
"NVRM: %s: DSM data invalid!\n", __FUNCTION__);
|
||||
}
|
||||
|
||||
exit:
|
||||
return rmStatus;
|
||||
}
|
||||
|
||||
/*
|
||||
* This function executes a _DSM ACPI method.
|
||||
*/
|
||||
NV_STATUS NV_API_CALL nv_acpi_dsm_method(
|
||||
nv_state_t *nv,
|
||||
NvU8 *pAcpiDsmGuid,
|
||||
NvU32 acpiDsmRev,
|
||||
NvBool acpiNvpcfDsmFunction,
|
||||
NvU32 acpiDsmSubFunction,
|
||||
void *pInParams,
|
||||
NvU16 inParamSize,
|
||||
NvU32 *outStatus,
|
||||
void *pOutData,
|
||||
NvU16 *pSize
|
||||
)
|
||||
{
|
||||
NV_STATUS rmStatus = NV_ERR_OPERATING_SYSTEM;
|
||||
NvU8 *argument3 = NULL;
|
||||
nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
|
||||
acpi_handle dev_handle = ACPI_HANDLE(nvl->dev);
|
||||
NvU8 *pathname = "_DSM";
|
||||
|
||||
if (!dev_handle)
|
||||
return NV_ERR_INVALID_ARGUMENT;
|
||||
|
||||
if ((!pInParams) || (inParamSize > NV_MAX_ACPI_DSM_PARAM_SIZE) || (!pOutData) || (!pSize))
|
||||
{
|
||||
nv_printf(NV_DBG_INFO,
|
||||
"NVRM: %s: invalid argument(s)!\n", __FUNCTION__);
|
||||
return NV_ERR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
rmStatus = os_alloc_mem((void **)&argument3, inParamSize);
|
||||
if (rmStatus != NV_OK)
|
||||
return rmStatus;
|
||||
|
||||
memcpy(argument3, pInParams, inParamSize);
|
||||
|
||||
if (acpiNvpcfDsmFunction)
|
||||
{
|
||||
//
|
||||
// acpi_evaluate_object() can operate with either valid object pathname or
|
||||
// valid object handle. For NVPCF DSM function, use valid pathname as we do
|
||||
// not have device handle for NVPCF device
|
||||
//
|
||||
dev_handle = NULL;
|
||||
pathname = "\\_SB.NPCF._DSM";
|
||||
}
|
||||
|
||||
rmStatus = nv_acpi_evaluate_dsm_method(dev_handle, pathname, pAcpiDsmGuid, acpiDsmRev,
|
||||
acpiDsmSubFunction, argument3, inParamSize,
|
||||
NV_FALSE, NULL, pOutData, pSize);
|
||||
|
||||
os_free_mem(argument3);
|
||||
return status;
|
||||
return rmStatus;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -796,13 +808,11 @@ NV_STATUS NV_API_CALL nv_acpi_ddc_method(
|
||||
acpi_status status;
|
||||
union acpi_object *ddc = NULL;
|
||||
NvU32 i, largestEdidSize;
|
||||
acpi_handle dev_handle = NULL;
|
||||
nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
|
||||
acpi_handle dev_handle = ACPI_HANDLE(nvl->dev);
|
||||
acpi_handle lcd_dev_handle = NULL;
|
||||
acpi_handle handle = NULL;
|
||||
|
||||
if (!nv_acpi_get_device_handle(nv, &dev_handle))
|
||||
return NV_ERR_NOT_SUPPORTED;
|
||||
|
||||
if (!dev_handle)
|
||||
return NV_ERR_INVALID_ARGUMENT;
|
||||
|
||||
@ -836,7 +846,7 @@ NV_STATUS NV_API_CALL nv_acpi_ddc_method(
|
||||
case 0x0400:
|
||||
case 0xA420:
|
||||
lcd_dev_handle = handle;
|
||||
nv_printf(NV_DBG_INFO, "NVRM: %s Found LCD: %x\n",
|
||||
nv_printf(NV_DBG_INFO, "NVRM: %s Found LCD: %llx\n",
|
||||
__FUNCTION__, device_id);
|
||||
break;
|
||||
default:
|
||||
@ -915,12 +925,10 @@ NV_STATUS NV_API_CALL nv_acpi_rom_method(
|
||||
union acpi_object *rom;
|
||||
union acpi_object rom_arg[2];
|
||||
struct acpi_object_list input = { 2, rom_arg };
|
||||
acpi_handle dev_handle = NULL;
|
||||
nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
|
||||
acpi_handle dev_handle = ACPI_HANDLE(nvl->dev);
|
||||
uint32_t offset, length;
|
||||
|
||||
if (!nv_acpi_get_device_handle(nv, &dev_handle))
|
||||
return NV_ERR_NOT_SUPPORTED;
|
||||
|
||||
if (!dev_handle)
|
||||
return NV_ERR_INVALID_ARGUMENT;
|
||||
|
||||
@ -982,12 +990,10 @@ NV_STATUS NV_API_CALL nv_acpi_dod_method(
|
||||
acpi_status status;
|
||||
struct acpi_buffer output = { ACPI_ALLOCATE_BUFFER, NULL };
|
||||
union acpi_object *dod;
|
||||
acpi_handle dev_handle = NULL;
|
||||
nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
|
||||
acpi_handle dev_handle = ACPI_HANDLE(nvl->dev);
|
||||
NvU32 i, count = (*pSize / sizeof(NvU32));
|
||||
|
||||
if (!nv_acpi_get_device_handle(nv, &dev_handle))
|
||||
return NV_ERR_NOT_SUPPORTED;
|
||||
|
||||
if (!dev_handle)
|
||||
return NV_ERR_INVALID_ARGUMENT;
|
||||
|
||||
@ -1129,17 +1135,11 @@ NvBool nv_acpi_power_resource_method_present(
|
||||
struct pci_dev *pdev
|
||||
)
|
||||
{
|
||||
acpi_handle handle = NULL;
|
||||
acpi_handle handle = ACPI_HANDLE(&pdev->dev);
|
||||
struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER, NULL };
|
||||
union acpi_object *object_package, *object_reference;
|
||||
acpi_status status;
|
||||
|
||||
#if defined(DEVICE_ACPI_HANDLE)
|
||||
handle = DEVICE_ACPI_HANDLE(&pdev->dev);
|
||||
#elif defined (ACPI_HANDLE)
|
||||
handle = ACPI_HANDLE(&pdev->dev);
|
||||
#endif
|
||||
|
||||
if (!handle)
|
||||
return NV_FALSE;
|
||||
|
||||
@ -1198,7 +1198,8 @@ NV_STATUS NV_API_CALL nv_acpi_mux_method(
|
||||
union acpi_object *mux = NULL;
|
||||
union acpi_object mux_arg = { ACPI_TYPE_INTEGER };
|
||||
struct acpi_object_list input = { 1, &mux_arg };
|
||||
acpi_handle dev_handle = NULL;
|
||||
nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
|
||||
acpi_handle dev_handle = ACPI_HANDLE(nvl->dev);
|
||||
acpi_handle mux_dev_handle = NULL;
|
||||
acpi_handle handle = NULL;
|
||||
unsigned long long device_id = 0;
|
||||
@ -1216,9 +1217,6 @@ NV_STATUS NV_API_CALL nv_acpi_mux_method(
|
||||
__FUNCTION__, pMethodName);
|
||||
}
|
||||
|
||||
if (!nv_acpi_get_device_handle(nv, &dev_handle))
|
||||
return NV_ERR_NOT_SUPPORTED;
|
||||
|
||||
if (!dev_handle)
|
||||
return NV_ERR_INVALID_ARGUMENT;
|
||||
|
||||
@ -1384,6 +1382,34 @@ NvBool NV_API_CALL nv_acpi_is_battery_present(void)
|
||||
return NV_FALSE;
|
||||
}
|
||||
|
||||
NV_STATUS NV_API_CALL nv_acpi_d3cold_dsm_for_upstream_port(
|
||||
nv_state_t *nv,
|
||||
NvU8 *pAcpiDsmGuid,
|
||||
NvU32 acpiDsmRev,
|
||||
NvU32 acpiDsmSubFunction,
|
||||
NvU32 *data
|
||||
)
|
||||
{
|
||||
NV_STATUS rmStatus = NV_ERR_OPERATING_SYSTEM;
|
||||
nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
|
||||
acpi_handle dev_handle = ACPI_HANDLE(nvl->dev->parent);
|
||||
NvU32 outData = 0;
|
||||
NvU16 outDatasize = sizeof(NvU32);
|
||||
NvU16 inParamSize = sizeof(NvU32);
|
||||
|
||||
if (!dev_handle)
|
||||
return NV_ERR_INVALID_ARGUMENT;
|
||||
|
||||
rmStatus = nv_acpi_evaluate_dsm_method(dev_handle, "_DSM", pAcpiDsmGuid, acpiDsmRev,
|
||||
acpiDsmSubFunction, data, inParamSize, NV_TRUE,
|
||||
NULL, &outData, &outDatasize);
|
||||
|
||||
if (rmStatus == NV_OK)
|
||||
*data = outData;
|
||||
|
||||
return rmStatus;
|
||||
}
|
||||
|
||||
#else // NV_LINUX_ACPI_EVENTS_SUPPORTED
|
||||
|
||||
void NV_API_CALL nv_acpi_methods_init(NvU32 *handlePresent)
|
||||
@ -1426,6 +1452,17 @@ NV_STATUS NV_API_CALL nv_acpi_dsm_method(
|
||||
return NV_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
NV_STATUS NV_API_CALL nv_acpi_d3cold_dsm_for_upstream_port(
|
||||
nv_state_t *nv,
|
||||
NvU8 *pAcpiDsmGuid,
|
||||
NvU32 acpiDsmRev,
|
||||
NvU32 acpiDsmSubFunction,
|
||||
NvU32 *data
|
||||
)
|
||||
{
|
||||
return NV_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
NV_STATUS NV_API_CALL nv_acpi_ddc_method(
|
||||
nv_state_t *nv,
|
||||
void *pEdidBuffer,
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@ -24,6 +24,7 @@
|
||||
#include "nv-linux.h"
|
||||
|
||||
extern int NVreg_ImexChannelCount;
|
||||
extern int NVreg_CreateImexChannel0;
|
||||
|
||||
static int nv_caps_imex_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
@ -104,6 +105,10 @@ int NV_API_CALL nv_caps_imex_init(void)
|
||||
if (NVreg_ImexChannelCount == 0)
|
||||
{
|
||||
nv_printf(NV_DBG_INFO, "nv-caps-imex is disabled.\n");
|
||||
|
||||
// Disable channel creation as well
|
||||
NVreg_CreateImexChannel0 = 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2019-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2019-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@ -26,6 +26,8 @@
|
||||
#include "nv-procfs.h"
|
||||
#include "nv-hash.h"
|
||||
|
||||
#include "nvmisc.h"
|
||||
|
||||
extern int NVreg_ModifyDeviceFiles;
|
||||
|
||||
/* sys_close() or __close_fd() */
|
||||
@ -49,7 +51,7 @@ typedef struct nv_cap_table_entry
|
||||
struct hlist_node hlist;
|
||||
} nv_cap_table_entry_t;
|
||||
|
||||
#define NV_CAP_NUM_ENTRIES(_table) (sizeof(_table) / sizeof(_table[0]))
|
||||
#define NV_CAP_NUM_ENTRIES(_table) (NV_ARRAY_ELEMENTS(_table))
|
||||
|
||||
static nv_cap_table_entry_t g_nv_cap_nvlink_table[] =
|
||||
{
|
||||
@ -361,18 +363,28 @@ static ssize_t nv_cap_procfs_write(struct file *file,
|
||||
nv_cap_file_private_t *private = NULL;
|
||||
unsigned long bytes_left;
|
||||
char *proc_buffer;
|
||||
int status;
|
||||
|
||||
status = nv_down_read_interruptible(&nv_system_pm_lock);
|
||||
if (status < 0)
|
||||
{
|
||||
nv_printf(NV_DBG_ERRORS, "nv-caps: failed to lock the nv_system_pm_lock!\n");
|
||||
return status;
|
||||
}
|
||||
|
||||
private = ((struct seq_file *)file->private_data)->private;
|
||||
bytes_left = (sizeof(private->buffer) - private->offset - 1);
|
||||
|
||||
if (count == 0)
|
||||
{
|
||||
return -EINVAL;
|
||||
count = -EINVAL;
|
||||
goto done;
|
||||
}
|
||||
|
||||
if ((bytes_left == 0) || (count > bytes_left))
|
||||
{
|
||||
return -ENOSPC;
|
||||
count = -ENOSPC;
|
||||
goto done;
|
||||
}
|
||||
|
||||
proc_buffer = &private->buffer[private->offset];
|
||||
@ -380,7 +392,8 @@ static ssize_t nv_cap_procfs_write(struct file *file,
|
||||
if (copy_from_user(proc_buffer, buffer, count))
|
||||
{
|
||||
nv_printf(NV_DBG_ERRORS, "nv-caps: failed to copy in proc data!\n");
|
||||
return -EFAULT;
|
||||
count = -EFAULT;
|
||||
goto done;
|
||||
}
|
||||
|
||||
private->offset += count;
|
||||
@ -388,17 +401,28 @@ static ssize_t nv_cap_procfs_write(struct file *file,
|
||||
|
||||
*pos = private->offset;
|
||||
|
||||
done:
|
||||
up_read(&nv_system_pm_lock);
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
static int nv_cap_procfs_read(struct seq_file *s, void *v)
|
||||
{
|
||||
int status;
|
||||
nv_cap_file_private_t *private = s->private;
|
||||
|
||||
status = nv_down_read_interruptible(&nv_system_pm_lock);
|
||||
if (status < 0)
|
||||
{
|
||||
return status;
|
||||
}
|
||||
|
||||
seq_printf(s, "%s: %d\n", "DeviceFileMinor", private->minor);
|
||||
seq_printf(s, "%s: %d\n", "DeviceFileMode", private->permissions);
|
||||
seq_printf(s, "%s: %d\n", "DeviceFileModify", private->modify);
|
||||
|
||||
up_read(&nv_system_pm_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -423,14 +447,6 @@ static int nv_cap_procfs_open(struct inode *inode, struct file *file)
|
||||
if (rc < 0)
|
||||
{
|
||||
NV_KFREE(private, sizeof(nv_cap_file_private_t));
|
||||
return rc;
|
||||
}
|
||||
|
||||
rc = nv_down_read_interruptible(&nv_system_pm_lock);
|
||||
if (rc < 0)
|
||||
{
|
||||
single_release(inode, file);
|
||||
NV_KFREE(private, sizeof(nv_cap_file_private_t));
|
||||
}
|
||||
|
||||
return rc;
|
||||
@ -449,8 +465,6 @@ static int nv_cap_procfs_release(struct inode *inode, struct file *file)
|
||||
private = s->private;
|
||||
}
|
||||
|
||||
up_read(&nv_system_pm_lock);
|
||||
|
||||
single_release(inode, file);
|
||||
|
||||
if (private != NULL)
|
||||
|
@ -28,12 +28,21 @@
|
||||
* teardown.
|
||||
*/
|
||||
|
||||
#define NV_MEM_LOGGER_STACK_TRACE 0
|
||||
|
||||
#if defined(NV_STACK_TRACE_PRESENT) && defined(NV_MEM_LOGGER) && defined(DEBUG)
|
||||
#define NV_MEM_LOGGER_STACK_TRACE 1
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
struct rb_node rb_node;
|
||||
void *addr;
|
||||
NvU64 size;
|
||||
NvU32 line;
|
||||
const char *file;
|
||||
#if NV_MEM_LOGGER_STACK_TRACE == 1
|
||||
unsigned long stack_trace[32];
|
||||
#endif
|
||||
} nv_memdbg_node_t;
|
||||
|
||||
struct
|
||||
@ -117,6 +126,12 @@ void nv_memdbg_add(void *addr, NvU64 size, const char *file, int line)
|
||||
node->size = size;
|
||||
node->file = file;
|
||||
node->line = line;
|
||||
|
||||
#if NV_MEM_LOGGER_STACK_TRACE == 1
|
||||
memset(node->stack_trace, '\0', sizeof(node->stack_trace));
|
||||
|
||||
stack_trace_save(node->stack_trace, NV_ARRAY_ELEMENTS(node->stack_trace), 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
NV_SPIN_LOCK_IRQSAVE(&g_nv_memdbg.lock, flags);
|
||||
@ -209,6 +224,10 @@ void nv_memdbg_exit(void)
|
||||
node->size, node->addr);
|
||||
}
|
||||
|
||||
#if NV_MEM_LOGGER_STACK_TRACE == 1
|
||||
stack_trace_print(node->stack_trace, NV_ARRAY_ELEMENTS(node->stack_trace), 1);
|
||||
#endif
|
||||
|
||||
rb_erase(&node->rb_node, &g_nv_memdbg.rb_root);
|
||||
kfree(node);
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2020-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2020-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@ -62,7 +62,7 @@ nvidia_nano_timer_callback(
|
||||
nv_linux_state_t *nvl = nv_nstimer->nv_linux_state;
|
||||
nvidia_stack_t *sp = NULL;
|
||||
|
||||
if (nv_kmem_cache_alloc_stack(&sp) != 0)
|
||||
if (nv_kmem_cache_alloc_stack_atomic(&sp) != 0)
|
||||
{
|
||||
nv_printf(NV_DBG_ERRORS, "NVRM: no cache memory \n");
|
||||
return;
|
||||
@ -189,12 +189,6 @@ void NV_API_CALL nv_start_nano_timer(
|
||||
NvU32 time_us;
|
||||
|
||||
time_us = (NvU32)(time_ns / 1000);
|
||||
|
||||
if (time_us == 0)
|
||||
{
|
||||
nv_printf(NV_DBG_WARNINGS, "NVRM: Timer value cannot be less than 1 usec.\n");
|
||||
}
|
||||
|
||||
time_jiffies = usecs_to_jiffies(time_us);
|
||||
mod_timer(&nv_nstimer->jiffy_timer, jiffies + time_jiffies);
|
||||
#endif
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2011-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2011-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@ -31,6 +31,8 @@
|
||||
#include "nv-p2p.h"
|
||||
#include "rmp2pdefines.h"
|
||||
|
||||
#include "nvmisc.h"
|
||||
|
||||
typedef enum nv_p2p_page_table_type {
|
||||
NV_P2P_PAGE_TABLE_TYPE_NON_PERSISTENT = 0,
|
||||
NV_P2P_PAGE_TABLE_TYPE_PERSISTENT,
|
||||
@ -50,6 +52,7 @@ typedef struct nv_p2p_mem_info {
|
||||
struct semaphore lock;
|
||||
} dma_mapping_list;
|
||||
void *private;
|
||||
void *mig_info;
|
||||
} nv_p2p_mem_info_t;
|
||||
|
||||
// declared and created in nv.c
|
||||
@ -73,7 +76,7 @@ static struct nvidia_status_mapping {
|
||||
};
|
||||
|
||||
#define NVIDIA_STATUS_MAPPINGS \
|
||||
(sizeof(nvidia_status_mappings) / sizeof(struct nvidia_status_mapping))
|
||||
NV_ARRAY_ELEMENTS(nvidia_status_mappings)
|
||||
|
||||
static int nvidia_p2p_map_status(NV_STATUS status)
|
||||
{
|
||||
@ -314,7 +317,7 @@ static NV_STATUS nv_p2p_put_pages(
|
||||
* callback which can free it unlike non-persistent page_table.
|
||||
*/
|
||||
mem_info = container_of(*page_table, nv_p2p_mem_info_t, page_table);
|
||||
status = rm_p2p_put_pages_persistent(sp, mem_info->private, *page_table);
|
||||
status = rm_p2p_put_pages_persistent(sp, mem_info->private, *page_table, mem_info->mig_info);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -412,6 +415,17 @@ static int nv_p2p_get_pages(
|
||||
NvU8 uuid[NVIDIA_P2P_GPU_UUID_LEN] = {0};
|
||||
int rc;
|
||||
|
||||
if (!NV_IS_ALIGNED64(virtual_address, NVRM_P2P_PAGESIZE_BIG_64K) ||
|
||||
!NV_IS_ALIGNED64(length, NVRM_P2P_PAGESIZE_BIG_64K))
|
||||
{
|
||||
nv_printf(NV_DBG_ERRORS,
|
||||
"NVRM: Invalid argument in nv_p2p_get_pages,"
|
||||
"address or length are not aligned "
|
||||
"address=0x%llx, length=0x%llx\n",
|
||||
virtual_address, length);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
rc = nv_kmem_cache_alloc_stack(&sp);
|
||||
if (rc != 0)
|
||||
{
|
||||
@ -495,7 +509,7 @@ static int nv_p2p_get_pages(
|
||||
status = rm_p2p_get_pages_persistent(sp, virtual_address, length,
|
||||
&mem_info->private,
|
||||
physical_addresses, &entries,
|
||||
*page_table, gpu_info);
|
||||
*page_table, gpu_info, &mem_info->mig_info);
|
||||
if (status != NV_OK)
|
||||
{
|
||||
goto failed;
|
||||
|
@ -37,6 +37,10 @@
|
||||
#include <linux/kernfs.h>
|
||||
#endif
|
||||
|
||||
#if !defined(NV_BUS_TYPE_HAS_IOMMU_OPS)
|
||||
#include <linux/iommu.h>
|
||||
#endif
|
||||
|
||||
static void
|
||||
nv_check_and_exclude_gpu(
|
||||
nvidia_stack_t *sp,
|
||||
@ -324,7 +328,7 @@ static NvU32 find_gpu_numa_nodes_in_srat(nv_linux_state_t *nvl)
|
||||
gi = (struct acpi_srat_generic_affinity *) subtable_header;
|
||||
gi_dbdf = *((NvU16 *)(&gi->device_handle[0])) << 16 |
|
||||
*((NvU16 *)(&gi->device_handle[2]));
|
||||
|
||||
|
||||
if (gi_dbdf == dev_dbdf) {
|
||||
numa_node = pxm_to_node(gi->proximity_domain);
|
||||
if (numa_node < MAX_NUMNODES) {
|
||||
@ -349,7 +353,6 @@ exit:
|
||||
acpi_put_table(table_header);
|
||||
return pxm_count;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
static void
|
||||
@ -375,6 +378,7 @@ nv_init_coherent_link_info
|
||||
return;
|
||||
|
||||
gi_found = find_gpu_numa_nodes_in_srat(nvl);
|
||||
|
||||
if (!gi_found &&
|
||||
(device_property_read_u64(nvl->dev, "nvidia,gpu-mem-pxm-start", &pxm_start) != 0 ||
|
||||
device_property_read_u64(nvl->dev, "nvidia,gpu-mem-pxm-count", &pxm_count) != 0))
|
||||
@ -530,35 +534,20 @@ nv_pci_probe
|
||||
if (pci_dev->is_virtfn)
|
||||
{
|
||||
#if defined(NV_VGPU_KVM_BUILD)
|
||||
nvl = pci_get_drvdata(pci_dev->physfn);
|
||||
if (!nvl)
|
||||
#if defined(NV_BUS_TYPE_HAS_IOMMU_OPS)
|
||||
if (pci_dev->dev.bus->iommu_ops == NULL)
|
||||
#else
|
||||
if ((pci_dev->dev.iommu != NULL) && (pci_dev->dev.iommu->iommu_dev != NULL) &&
|
||||
(pci_dev->dev.iommu->iommu_dev->ops == NULL))
|
||||
#endif
|
||||
{
|
||||
nv_printf(NV_DBG_ERRORS, "NVRM: Aborting probe for VF %04x:%02x:%02x.%x "
|
||||
"since PF is not bound to nvidia driver.\n",
|
||||
"since IOMMU is not present on the system.\n",
|
||||
NV_PCI_DOMAIN_NUMBER(pci_dev), NV_PCI_BUS_NUMBER(pci_dev),
|
||||
NV_PCI_SLOT_NUMBER(pci_dev), PCI_FUNC(pci_dev->devfn));
|
||||
goto failed;
|
||||
}
|
||||
|
||||
if (pci_dev->dev.bus->iommu_ops == NULL)
|
||||
{
|
||||
nv = NV_STATE_PTR(nvl);
|
||||
if (rm_is_iommu_needed_for_sriov(sp, nv))
|
||||
{
|
||||
nv_printf(NV_DBG_ERRORS, "NVRM: Aborting probe for VF %04x:%02x:%02x.%x "
|
||||
"since IOMMU is not present on the system.\n",
|
||||
NV_PCI_DOMAIN_NUMBER(pci_dev), NV_PCI_BUS_NUMBER(pci_dev),
|
||||
NV_PCI_SLOT_NUMBER(pci_dev), PCI_FUNC(pci_dev->devfn));
|
||||
goto failed;
|
||||
}
|
||||
}
|
||||
|
||||
if (nvidia_vgpu_vfio_probe(pci_dev) != NV_OK)
|
||||
{
|
||||
nv_printf(NV_DBG_ERRORS, "NVRM: Failed to register device to vGPU VFIO module");
|
||||
goto failed;
|
||||
}
|
||||
|
||||
nv_kmem_cache_free_stack(sp);
|
||||
return 0;
|
||||
#else
|
||||
@ -687,8 +676,8 @@ next_bar:
|
||||
// Invalid 32 or 64-bit BAR.
|
||||
nv_printf(NV_DBG_ERRORS,
|
||||
"NVRM: This PCI I/O region assigned to your NVIDIA device is invalid:\n"
|
||||
"NVRM: BAR%d is %dM @ 0x%llx (PCI:%04x:%02x:%02x.%x)\n", i,
|
||||
(NV_PCI_RESOURCE_SIZE(pci_dev, i) >> 20),
|
||||
"NVRM: BAR%d is %" NvU64_fmtu "M @ 0x%" NvU64_fmtx " (PCI:%04x:%02x:%02x.%x)\n", i,
|
||||
(NvU64)(NV_PCI_RESOURCE_SIZE(pci_dev, i) >> 20),
|
||||
(NvU64)NV_PCI_RESOURCE_START(pci_dev, i),
|
||||
NV_PCI_DOMAIN_NUMBER(pci_dev), NV_PCI_BUS_NUMBER(pci_dev),
|
||||
NV_PCI_SLOT_NUMBER(pci_dev), PCI_FUNC(pci_dev->devfn));
|
||||
@ -708,10 +697,10 @@ next_bar:
|
||||
nv_device_name))
|
||||
{
|
||||
nv_printf(NV_DBG_ERRORS,
|
||||
"NVRM: request_mem_region failed for %dM @ 0x%llx. This can\n"
|
||||
"NVRM: request_mem_region failed for %" NvU64_fmtu "M @ 0x%" NvU64_fmtx ". This can\n"
|
||||
"NVRM: occur when a driver such as rivatv is loaded and claims\n"
|
||||
"NVRM: ownership of the device's registers.\n",
|
||||
(NV_PCI_RESOURCE_SIZE(pci_dev, regs_bar_index) >> 20),
|
||||
(NvU64)(NV_PCI_RESOURCE_SIZE(pci_dev, regs_bar_index) >> 20),
|
||||
(NvU64)NV_PCI_RESOURCE_START(pci_dev, regs_bar_index));
|
||||
goto failed;
|
||||
}
|
||||
|
@ -197,28 +197,25 @@ nv_procfs_read_power(
|
||||
{
|
||||
nv_state_t *nv = s->private;
|
||||
nvidia_stack_t *sp = NULL;
|
||||
const char *vidmem_power_status;
|
||||
const char *dynamic_power_status;
|
||||
const char *gc6_support;
|
||||
const char *gcoff_support;
|
||||
nv_power_info_t power_info;
|
||||
|
||||
if (nv_kmem_cache_alloc_stack(&sp) != 0)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
dynamic_power_status = rm_get_dynamic_power_management_status(sp, nv);
|
||||
seq_printf(s, "Runtime D3 status: %s\n", dynamic_power_status);
|
||||
|
||||
vidmem_power_status = rm_get_vidmem_power_status(sp, nv);
|
||||
seq_printf(s, "Video Memory: %s\n\n", vidmem_power_status);
|
||||
rm_get_power_info(sp, nv, &power_info);
|
||||
seq_printf(s, "Runtime D3 status: %s\n", power_info.dynamic_power_status);
|
||||
seq_printf(s, "Video Memory: %s\n\n", power_info.vidmem_power_status);
|
||||
|
||||
seq_printf(s, "GPU Hardware Support:\n");
|
||||
gc6_support = rm_get_gpu_gcx_support(sp, nv, NV_TRUE);
|
||||
seq_printf(s, " Video Memory Self Refresh: %s\n", gc6_support);
|
||||
seq_printf(s, " Video Memory Self Refresh: %s\n", power_info.gc6_support);
|
||||
seq_printf(s, " Video Memory Off: %s\n\n", power_info.gcoff_support);
|
||||
|
||||
gcoff_support = rm_get_gpu_gcx_support(sp, nv, NV_FALSE);
|
||||
seq_printf(s, " Video Memory Off: %s\n", gcoff_support);
|
||||
seq_printf(s, "S0ix Power Management:\n");
|
||||
seq_printf(s, " Platform Support: %s\n",
|
||||
nv_platform_supports_s0ix() ? "Supported" : "Not Supported");
|
||||
seq_printf(s, " Status: %s\n", power_info.s0ix_status);
|
||||
|
||||
nv_kmem_cache_free_stack(sp);
|
||||
return 0;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2006-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2006-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@ -869,6 +869,8 @@
|
||||
* NVreg_ModifyDeviceFiles, NVreg_DeviceFileGID, NVreg_DeviceFileUID
|
||||
* and NVreg_DeviceFileMode will be honored by nvidia-modprobe.
|
||||
*
|
||||
* Also, refer to the NVreg_CreateImexChannel0 option.
|
||||
*
|
||||
* Possible values:
|
||||
* 0 - Disable IMEX using CUDA driver's fabric handles.
|
||||
* N - N IMEX channels will be enabled in the driver to facilitate N
|
||||
@ -878,6 +880,29 @@
|
||||
#define __NV_IMEX_CHANNEL_COUNT ImexChannelCount
|
||||
#define NV_REG_IMEX_CHANNEL_COUNT NV_REG_STRING(__NV_IMEX_CHANNEL_COUNT)
|
||||
|
||||
/*
|
||||
* Option: NVreg_CreateImexChannel0
|
||||
*
|
||||
* Description:
|
||||
*
|
||||
* This option allows users to specify whether the NVIDIA driver must create
|
||||
* the IMEX channel 0 by default. The channel will be created automatically
|
||||
* when an application (e.g. nvidia-smi, nvidia-persistenced) is run.
|
||||
*
|
||||
* Note that users are advised to enable this option only in trusted
|
||||
* environments where it is acceptable for applications to share the same
|
||||
* IMEX channel.
|
||||
*
|
||||
* For more details on IMEX channels, refer to the NVreg_ImexChannelCount
|
||||
* option.
|
||||
*
|
||||
* Possible values:
|
||||
* 0 - Do not create IMEX channel 0 (default).
|
||||
* 1 - Create IMEX channel 0.
|
||||
*/
|
||||
#define __NV_CREATE_IMEX_CHANNEL_0 CreateImexChannel0
|
||||
#define NV_CREATE_IMEX_CHANNEL_0 NV_REG_STRING(__CREATE_IMEX_CHANNEL_0)
|
||||
|
||||
#if defined(NV_DEFINE_REGISTRY_KEY_TABLE)
|
||||
|
||||
/*
|
||||
@ -927,6 +952,7 @@ NV_DEFINE_REG_STRING_ENTRY(__NV_EXCLUDED_GPUS, NULL);
|
||||
NV_DEFINE_REG_ENTRY(__NV_DMA_REMAP_PEER_MMIO, NV_DMA_REMAP_PEER_MMIO_ENABLE);
|
||||
NV_DEFINE_REG_STRING_ENTRY(__NV_RM_NVLINK_BW, NULL);
|
||||
NV_DEFINE_REG_ENTRY_GLOBAL(__NV_IMEX_CHANNEL_COUNT, 2048);
|
||||
NV_DEFINE_REG_ENTRY_GLOBAL(__NV_CREATE_IMEX_CHANNEL_0, 0);
|
||||
|
||||
/*
|
||||
*----------------registry database definition----------------------
|
||||
@ -974,6 +1000,7 @@ nv_parm_t nv_parms[] = {
|
||||
NV_DEFINE_PARAMS_TABLE_ENTRY(__NV_OPENRM_ENABLE_UNSUPPORTED_GPUS),
|
||||
NV_DEFINE_PARAMS_TABLE_ENTRY(__NV_DMA_REMAP_PEER_MMIO),
|
||||
NV_DEFINE_PARAMS_TABLE_ENTRY(__NV_IMEX_CHANNEL_COUNT),
|
||||
NV_DEFINE_PARAMS_TABLE_ENTRY(__NV_CREATE_IMEX_CHANNEL_0),
|
||||
{NULL, NULL}
|
||||
};
|
||||
|
||||
|
@ -514,7 +514,6 @@ NV_STATUS nv_alloc_system_pages(
|
||||
struct device *dev = at->dev;
|
||||
dma_addr_t bus_addr;
|
||||
|
||||
// Order should be zero except for EGM allocations.
|
||||
unsigned int alloc_page_size = PAGE_SIZE << at->order;
|
||||
unsigned int alloc_num_pages = NV_CEIL(at->num_pages * PAGE_SIZE, alloc_page_size);
|
||||
|
||||
@ -523,7 +522,7 @@ NV_STATUS nv_alloc_system_pages(
|
||||
unsigned int os_pages_in_page = alloc_page_size / PAGE_SIZE;
|
||||
|
||||
nv_printf(NV_DBG_MEMINFO,
|
||||
"NVRM: VM: %u: %u order0 pages, %u order\n", __FUNCTION__, at->num_pages, at->order);
|
||||
"NVRM: VM: %s: %u order0 pages, %u order\n", __FUNCTION__, at->num_pages, at->order);
|
||||
|
||||
gfp_mask = nv_compute_gfp_mask(nv, at);
|
||||
|
||||
@ -641,7 +640,6 @@ void nv_free_system_pages(
|
||||
unsigned int i;
|
||||
struct device *dev = at->dev;
|
||||
|
||||
// Order should be zero except for EGM allocations.
|
||||
unsigned int alloc_page_size = PAGE_SIZE << at->order;
|
||||
unsigned int os_pages_in_page = alloc_page_size / PAGE_SIZE;
|
||||
|
||||
|
@ -29,7 +29,7 @@
|
||||
NvU64 NV_API_CALL nv_get_kern_phys_address(NvU64 address)
|
||||
{
|
||||
/* direct-mapped kernel address */
|
||||
if (virt_addr_valid(address))
|
||||
if (virt_addr_valid((void *)address))
|
||||
return __pa(address);
|
||||
|
||||
nv_printf(NV_DBG_ERRORS,
|
||||
|
@ -3131,6 +3131,7 @@ NV_STATUS NV_API_CALL
|
||||
nv_alias_pages(
|
||||
nv_state_t *nv,
|
||||
NvU32 page_cnt,
|
||||
NvU64 page_size,
|
||||
NvU32 contiguous,
|
||||
NvU32 cache_type,
|
||||
NvU64 guest_id,
|
||||
@ -3152,7 +3153,14 @@ nv_alias_pages(
|
||||
|
||||
at->cache_type = cache_type;
|
||||
if (contiguous)
|
||||
{
|
||||
at->flags.contig = NV_TRUE;
|
||||
at->order = get_order(at->num_pages * PAGE_SIZE);
|
||||
}
|
||||
else
|
||||
{
|
||||
at->order = get_order(page_size);
|
||||
}
|
||||
#if defined(NVCPU_AARCH64)
|
||||
if (at->cache_type != NV_MEMORY_CACHED)
|
||||
at->flags.aliased = NV_TRUE;
|
||||
@ -3160,8 +3168,6 @@ nv_alias_pages(
|
||||
|
||||
at->flags.guest = NV_TRUE;
|
||||
|
||||
at->order = get_order(at->num_pages * PAGE_SIZE);
|
||||
|
||||
for (i=0; i < at->num_pages; ++i)
|
||||
{
|
||||
page_ptr = at->page_table[i];
|
||||
@ -3271,7 +3277,7 @@ NV_STATUS NV_API_CALL nv_register_user_pages(
|
||||
nv_linux_state_t *nvl;
|
||||
nvidia_pte_t *page_ptr;
|
||||
|
||||
nv_printf(NV_DBG_MEMINFO, "NVRM: VM: nv_register_user_pages: 0x%x\n", page_count);
|
||||
nv_printf(NV_DBG_MEMINFO, "NVRM: VM: nv_register_user_pages: 0x%" NvU64_fmtx"\n", page_count);
|
||||
user_pages = *priv_data;
|
||||
nvl = NV_GET_NVL_FROM_NV_STATE(nv);
|
||||
|
||||
@ -3332,7 +3338,7 @@ void NV_API_CALL nv_unregister_user_pages(
|
||||
{
|
||||
nv_alloc_t *at = *priv_data;
|
||||
|
||||
nv_printf(NV_DBG_MEMINFO, "NVRM: VM: nv_unregister_user_pages: 0x%x\n", page_count);
|
||||
nv_printf(NV_DBG_MEMINFO, "NVRM: VM: nv_unregister_user_pages: 0x%" NvU64_fmtx "\n", page_count);
|
||||
|
||||
NV_PRINT_AT(NV_DBG_MEMINFO, at);
|
||||
|
||||
@ -6133,7 +6139,10 @@ void NV_API_CALL nv_get_screen_info(
|
||||
{
|
||||
NvU64 physAddr = screen_info.lfb_base;
|
||||
#if defined(VIDEO_CAPABILITY_64BIT_BASE)
|
||||
physAddr |= (NvU64)screen_info.ext_lfb_base << 32;
|
||||
if (screen_info.capabilities & VIDEO_CAPABILITY_64BIT_BASE)
|
||||
{
|
||||
physAddr |= (NvU64)screen_info.ext_lfb_base << 32;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Make sure base address is mapped to GPU BAR */
|
||||
|
@ -285,12 +285,15 @@ NV_STATUS nvGpuOpsFlushReplayableFaultBuffer(gpuFaultInfo *pFaultInfo,
|
||||
NV_STATUS nvGpuOpsTogglePrefetchFaults(gpuFaultInfo *pFaultInfo,
|
||||
NvBool bEnable);
|
||||
|
||||
// Interface used for CCSL
|
||||
NV_STATUS nvGpuOpsKeyRotationChannelDisable(struct gpuChannel *channelList[],
|
||||
NvU32 channelListCount);
|
||||
|
||||
// Interface used for CCSL
|
||||
NV_STATUS nvGpuOpsCcslContextInit(struct ccslContext_t **ctx,
|
||||
gpuChannelHandle channel);
|
||||
NV_STATUS nvGpuOpsCcslContextClear(struct ccslContext_t *ctx);
|
||||
NV_STATUS nvGpuOpsCcslContextUpdate(struct ccslContext_t *ctx);
|
||||
NV_STATUS nvGpuOpsCcslContextUpdate(UvmCslContext *contextList[],
|
||||
NvU32 contextListCount);
|
||||
NV_STATUS nvGpuOpsCcslRotateIv(struct ccslContext_t *ctx,
|
||||
NvU8 direction);
|
||||
NV_STATUS nvGpuOpsCcslEncrypt(struct ccslContext_t *ctx,
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2013-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2013-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@ -1478,6 +1478,15 @@ NV_STATUS nvUvmInterfacePagingChannelPushStream(UvmGpuPagingChannelHandle channe
|
||||
}
|
||||
EXPORT_SYMBOL(nvUvmInterfacePagingChannelPushStream);
|
||||
|
||||
NV_STATUS nvUvmInterfaceKeyRotationChannelDisable(uvmGpuChannelHandle channelList[],
|
||||
NvU32 channeListCount)
|
||||
{
|
||||
nvidia_stack_t *sp = nvUvmGetSafeStack();
|
||||
|
||||
return rm_gpu_ops_key_rotation_channel_disable(sp, ((gpuChannelHandle *)channelList), channeListCount);
|
||||
}
|
||||
EXPORT_SYMBOL(nvUvmInterfaceKeyRotationChannelDisable);
|
||||
|
||||
NV_STATUS nvUvmInterfaceCslInitContext(UvmCslContext *uvmCslContext,
|
||||
uvmGpuChannelHandle channel)
|
||||
{
|
||||
@ -1516,12 +1525,13 @@ void nvUvmInterfaceDeinitCslContext(UvmCslContext *uvmCslContext)
|
||||
}
|
||||
EXPORT_SYMBOL(nvUvmInterfaceDeinitCslContext);
|
||||
|
||||
NV_STATUS nvUvmInterfaceCslUpdateContext(UvmCslContext *uvmCslContext)
|
||||
NV_STATUS nvUvmInterfaceCslUpdateContext(UvmCslContext *contextList[],
|
||||
NvU32 contextListCount)
|
||||
{
|
||||
NV_STATUS status;
|
||||
nvidia_stack_t *sp = uvmCslContext->nvidia_stack;
|
||||
nvidia_stack_t *sp = contextList[0]->nvidia_stack;
|
||||
|
||||
status = rm_gpu_ops_ccsl_context_update(sp, uvmCslContext->ctx);
|
||||
status = rm_gpu_ops_ccsl_context_update(sp, contextList, contextListCount);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
@ -195,6 +195,7 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += devm_clk_bulk_get_all
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += get_task_ioprio
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += mdev_set_iommu_device
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += offline_and_remove_memory
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += stack_trace
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += crypto_tfm_ctx_aligned
|
||||
|
||||
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_gpl_of_node_to_nid
|
||||
@ -227,6 +228,8 @@ NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_tsec_comms_clear_in
|
||||
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_tsec_comms_alloc_mem_from_gscco
|
||||
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_tsec_comms_free_gscco_mem
|
||||
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_memory_block_size_bytes
|
||||
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_tegra_platform_is_fpga
|
||||
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_tegra_platform_is_sim
|
||||
NV_CONFTEST_SYMBOL_COMPILE_TESTS += crypto
|
||||
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += dma_ops
|
||||
@ -251,6 +254,7 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += pci_driver_has_driver_managed_dma
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_area_struct_has_const_vm_flags
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += memory_failure_has_trapno_arg
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += foll_longterm_present
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += bus_type_has_iommu_ops
|
||||
|
||||
NV_CONFTEST_GENERIC_COMPILE_TESTS += dom0_kernel_present
|
||||
NV_CONFTEST_GENERIC_COMPILE_TESTS += nvidia_vgpu_kvm_build
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1993-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@ -464,6 +464,9 @@ namespace DisplayPort
|
||||
virtual bool getStreamStatusChanged() = 0;
|
||||
virtual void clearStreamStatusChanged() =0;
|
||||
|
||||
virtual bool getDpTunnelingIrq() = 0;
|
||||
virtual void clearDpTunnelingIrq() = 0;
|
||||
|
||||
virtual void setDirtyLinkStatus(bool dirty) = 0;
|
||||
virtual void refreshLinkStatus() = 0;
|
||||
virtual bool isLinkStatusValid(unsigned lanes) = 0;
|
||||
@ -529,6 +532,15 @@ namespace DisplayPort
|
||||
virtual bool readPsrEvtIndicator(vesaPsrEventIndicator *psrErr) = 0;
|
||||
virtual bool readPrSinkDebugInfo(panelReplaySinkDebugInfo *prDbgInfo) = 0;
|
||||
|
||||
virtual bool getDpTunnelBwAllocationSupported() = 0;
|
||||
virtual bool getDpTunnelEstimatedBw(NvU8 &estimatedBw) = 0;
|
||||
virtual bool getDpTunnelGranularityMultiplier(NvU8 &granularityMultiplier) = 0;
|
||||
virtual TriState getDpTunnelBwRequestStatus() = 0;
|
||||
virtual bool setDpTunnelBwAllocation(bool bEnable) = 0;
|
||||
virtual bool hasDpTunnelEstimatedBwChanged() = 0;
|
||||
virtual bool hasDpTunnelBwAllocationCapabilityChanged() = 0;
|
||||
virtual bool writeDpTunnelRequestedBw(NvU8 requestedBw) = 0;
|
||||
|
||||
virtual ~DPCDHAL() {}
|
||||
|
||||
};
|
||||
@ -536,7 +548,876 @@ namespace DisplayPort
|
||||
//
|
||||
// Implement interface
|
||||
//
|
||||
DPCDHAL * MakeDPCDHAL(AuxBus * bus, Timer * timer);
|
||||
DPCDHAL * MakeDPCDHAL(AuxBus * bus, Timer * timer, MainLink * main);
|
||||
|
||||
struct DPCDHALImpl : DPCDHAL
|
||||
{
|
||||
AuxRetry bus;
|
||||
Timer * timer;
|
||||
bool dpcdOffline;
|
||||
bool bGrantsPostLtRequest;
|
||||
bool pc2Disabled;
|
||||
bool uprequestEnable;
|
||||
bool upstreamIsSource;
|
||||
bool bMultistream;
|
||||
bool bGpuFECSupported;
|
||||
bool bLttprSupported;
|
||||
bool bBypassILREdpRevCheck;
|
||||
NvU32 overrideDpcdMaxLinkRate;
|
||||
NvU32 overrideDpcdRev;
|
||||
NvU32 overrideDpcdMaxLaneCount;
|
||||
|
||||
NvU32 gpuDPSupportedVersions;
|
||||
|
||||
struct _LegacyPort: public LegacyPort
|
||||
{
|
||||
DwnStreamPortType type;
|
||||
DwnStreamPortAttribute nonEDID;
|
||||
|
||||
NvU64 maxTmdsClkRate;
|
||||
|
||||
DwnStreamPortType getDownstreamPortType()
|
||||
{
|
||||
return type;
|
||||
}
|
||||
|
||||
DwnStreamPortAttribute getDownstreamNonEDIDPortAttribute()
|
||||
{
|
||||
return nonEDID;
|
||||
}
|
||||
|
||||
NvU64 getMaxTmdsClkRate()
|
||||
{
|
||||
return maxTmdsClkRate;
|
||||
}
|
||||
|
||||
} legacyPort[16];
|
||||
|
||||
struct
|
||||
{
|
||||
unsigned revisionMajor, revisionMinor; // DPCD offset 0
|
||||
bool supportsESI;
|
||||
LinkRate maxLinkRate; // DPCD offset 1
|
||||
unsigned maxLaneCount; // DPCD offset 2
|
||||
unsigned maxLanesAtHBR;
|
||||
unsigned maxLanesAtRBR;
|
||||
bool enhancedFraming;
|
||||
bool bPostLtAdjustmentSupport;
|
||||
|
||||
bool supportsNoHandshakeTraining;
|
||||
bool bSupportsTPS4;
|
||||
unsigned NORP; // DPCD offset 4
|
||||
|
||||
bool detailedCapInfo; // DPCD offset 5
|
||||
bool downStreamPortPresent;
|
||||
NvU8 downStreamPortType;
|
||||
|
||||
unsigned downStreamPortCount; // DPCD offset 7
|
||||
bool ouiSupported;
|
||||
bool msaTimingParIgnored;
|
||||
|
||||
NvU16 linkRateTable[NV_DPCD_SUPPORTED_LINK_RATES__SIZE]; // DPCD offset 10 ~ 1F
|
||||
|
||||
bool supportsMultistream; // DPCD offset 21
|
||||
unsigned numberAudioEndpoints; // DPCD offset 22
|
||||
bool overrideToSST; // force to SST even if MST capable
|
||||
bool noLinkTraining; // DPCD offset 330h
|
||||
|
||||
bool extendedRxCapsPresent; // DPCD offset 000Eh [7] - Extended Receiver Capability present
|
||||
|
||||
// DPCD Offset 2211h;
|
||||
unsigned extendedSleepWakeTimeoutRequestMs;
|
||||
// DPCD Offset 0119h [0] - If we grant the extendedSleepWakeTimeoutRequest
|
||||
bool bExtendedSleepWakeTimeoutGranted;
|
||||
|
||||
bool bFECSupported;
|
||||
|
||||
// DPCD Offset F0002h - Number of Physical Repeaters present (after mapping) between Source and Sink
|
||||
unsigned phyRepeaterCount;
|
||||
// DPCD offset 700 - EDP_DPCD_REV
|
||||
unsigned eDpRevision;
|
||||
|
||||
struct
|
||||
{
|
||||
unsigned revisionMajor, revisionMinor; // DPCD offset F0000h
|
||||
LinkRate maxLinkRate; // DPCD offset F0001h
|
||||
unsigned maxLaneCount; // DPCD offset F0004h
|
||||
unsigned phyRepeaterExtendedWakeTimeoutMs; // DPCD offset F0005h
|
||||
// The array to keep track of FEC capability of each LTTPR
|
||||
bool bFECSupportedRepeater[NV_DPCD14_PHY_REPEATER_CNT_MAX];
|
||||
// If all the LTTPRs supports FEC
|
||||
bool bFECSupported;
|
||||
|
||||
} repeaterCaps;
|
||||
|
||||
struct
|
||||
{
|
||||
bool bIsSupported;
|
||||
bool bUsb4DriverSupport;
|
||||
bool bIsPanelReplayOptimizationSupported;
|
||||
bool bIsBwAllocationSupported;
|
||||
NvU8 maxLaneCount;
|
||||
LinkRate maxLinkRate;
|
||||
} dpInTunnelingCaps;
|
||||
|
||||
PCONCaps pconCaps;
|
||||
vesaPsrSinkCaps psrCaps;
|
||||
NvU32 videoFallbackFormats; // DPCD offset 0200h
|
||||
|
||||
} caps;
|
||||
|
||||
bool bIsDpTunnelBwAllocationEnabled;
|
||||
|
||||
struct
|
||||
{
|
||||
unsigned sinkCount; // DPCD offset 200
|
||||
bool automatedTestRequest;
|
||||
bool cpIRQ;
|
||||
bool mccsIRQ;
|
||||
bool downRepMsgRdy;
|
||||
bool upReqMsgRdy;
|
||||
bool prErrorStatus; // DPCD offset 2004h[3]
|
||||
bool rxCapChanged; // DPCD offset 2005
|
||||
bool linkStatusChanged; // DPCD offset 2005
|
||||
bool streamStatusChanged; // DPCD offset 2005
|
||||
bool hdmiLinkStatusChanged; // DPCD offset 2005
|
||||
bool dpTunnelingIrq; // DPCD offset 2005
|
||||
NvU8 eightyBitCustomPat[10]; // DPCD offset 250 - 259
|
||||
|
||||
struct
|
||||
{
|
||||
struct
|
||||
{
|
||||
bool clockRecoveryDone;
|
||||
bool channelEqualizationDone;
|
||||
bool symbolLocked;
|
||||
} laneStatus[4]; // DPCD offset 202, 203
|
||||
|
||||
bool interlaneAlignDone; // DPCD offset 204
|
||||
bool downstmPortChng;
|
||||
bool linkStatusUpdated;
|
||||
|
||||
//
|
||||
// (ESI specific) signifies that we have link trained and should
|
||||
// update the link status in the next query to isLinkLost. Keep in
|
||||
// mind that linkStatusChanged might still be zero.
|
||||
//
|
||||
bool linkStatusDirtied;
|
||||
} laneStatusIntr;
|
||||
|
||||
struct
|
||||
{
|
||||
bool testRequestTraining; // DPCD offset 218
|
||||
LinkRate testRequestLinkRate; // DPCD offset 219
|
||||
unsigned testRequestLaneCount; // DPCD offset 220
|
||||
} testTraining;
|
||||
|
||||
struct
|
||||
{
|
||||
bool testRequestEdidRead; // DPCD offset 218
|
||||
} testEdid;
|
||||
|
||||
struct
|
||||
{
|
||||
bool testRequestPattern; // DPCD offset 218
|
||||
TestPatternType testPatRequested; // DPCD offset 221
|
||||
NvU16 testHorTotalPixels; // DPCD offset 222, 223
|
||||
NvU16 testVerTotalLines; // DPCD offset 224, 225
|
||||
NvU16 testHorStartPixels; // DPCD offset 226, 227
|
||||
NvU16 testVerStartLines; // DPCD offset 228, 229
|
||||
NvU16 testHsyncWidthPixels; // DPCD offset 22A, 22B
|
||||
bool testHsyncPolarity;
|
||||
NvU16 testVsyncWidthLines; // DPCD offset 22C, 22D
|
||||
bool testVsyncPolarity;
|
||||
NvU16 testActiveWidthPixels; // DPCD offset 22E, 22F
|
||||
NvU16 testActiveHeightLines; // DPCD offset 230, 231
|
||||
} testPattern;
|
||||
|
||||
struct
|
||||
{
|
||||
bool testRequestPhyCompliance; // DPCD offset 218
|
||||
LinkQualityPatternType phyTestPattern; // DPCD offset 248
|
||||
} testPhyCompliance;
|
||||
|
||||
} interrupts;
|
||||
|
||||
bool bIndexedLinkrateCapable, bIndexedLinkrateEnabled;
|
||||
|
||||
public:
|
||||
DPCDHALImpl(AuxBus * bus, Timer * timer)
|
||||
: bus(bus), timer(timer), bGrantsPostLtRequest(false), uprequestEnable(false),
|
||||
upstreamIsSource(false), bMultistream(false), bGpuFECSupported(false),
|
||||
bBypassILREdpRevCheck(false), overrideDpcdMaxLinkRate(0),
|
||||
overrideDpcdRev(0), gpuDPSupportedVersions(0), bIsDpTunnelBwAllocationEnabled(false)
|
||||
{
|
||||
// start with default caps.
|
||||
dpcdOffline = true;
|
||||
|
||||
//
|
||||
// fill out the bare minimum caps required ...
|
||||
// this should be extended in for more dpcd offsets in future.
|
||||
//
|
||||
caps.revisionMajor = 0x1;
|
||||
caps.revisionMinor = 0x1;
|
||||
caps.supportsESI = false;
|
||||
caps.maxLinkRate = HBR3;
|
||||
caps.maxLaneCount = 4;
|
||||
caps.enhancedFraming = true;
|
||||
caps.downStreamPortPresent = true;
|
||||
caps.downStreamPortCount = 1;
|
||||
|
||||
// populate the sinkcount interrupt
|
||||
interrupts.sinkCount = 1;
|
||||
}
|
||||
|
||||
~DPCDHALImpl()
|
||||
{
|
||||
}
|
||||
|
||||
virtual void setAuxBus(AuxBus * bus)
|
||||
{
|
||||
this->bus = bus;
|
||||
}
|
||||
|
||||
bool isDpcdOffline()
|
||||
{
|
||||
return dpcdOffline;
|
||||
}
|
||||
|
||||
void setDPCDOffline(bool bOffline)
|
||||
{
|
||||
dpcdOffline = bOffline;
|
||||
}
|
||||
|
||||
void updateDPCDOffline();
|
||||
|
||||
void setPC2Disabled(bool disabled)
|
||||
{
|
||||
pc2Disabled = disabled;
|
||||
}
|
||||
|
||||
void setLttprSupported(bool isLttprSupported)
|
||||
{
|
||||
bLttprSupported = isLttprSupported;
|
||||
}
|
||||
|
||||
bool isPC2Disabled()
|
||||
{
|
||||
return pc2Disabled;
|
||||
}
|
||||
|
||||
virtual void parseAndReadCaps();
|
||||
virtual PCONCaps * getPCONCaps()
|
||||
{
|
||||
return &(caps.pconCaps);
|
||||
}
|
||||
|
||||
// DPCD offset 0
|
||||
virtual unsigned getRevisionMajor()
|
||||
{
|
||||
return caps.revisionMajor;
|
||||
}
|
||||
|
||||
virtual unsigned getRevisionMinor()
|
||||
{
|
||||
return caps.revisionMinor;
|
||||
}
|
||||
|
||||
// DPCD offset F0000h
|
||||
virtual unsigned lttprGetRevisionMajor()
|
||||
{
|
||||
return caps.repeaterCaps.revisionMajor;
|
||||
}
|
||||
|
||||
virtual unsigned lttprGetRevisionMinor()
|
||||
{
|
||||
return caps.repeaterCaps.revisionMinor;
|
||||
}
|
||||
|
||||
virtual LinkRate getMaxLinkRate();
|
||||
|
||||
// DPCD offset 2
|
||||
virtual unsigned getMaxLaneCount();
|
||||
|
||||
virtual bool getNoLinkTraining()
|
||||
{
|
||||
return caps.noLinkTraining;
|
||||
}
|
||||
|
||||
virtual unsigned getPhyRepeaterCount()
|
||||
{
|
||||
return caps.phyRepeaterCount;
|
||||
}
|
||||
|
||||
// Max lanes supported at the desired link rate.
|
||||
virtual unsigned getMaxLaneCountSupportedAtLinkRate(LinkRate linkRate);
|
||||
|
||||
virtual bool getEnhancedFraming()
|
||||
{
|
||||
return caps.enhancedFraming;
|
||||
}
|
||||
|
||||
// DPCD offset 5
|
||||
virtual bool getDownstreamPort(NvU8 *portType)
|
||||
{
|
||||
*portType = caps.downStreamPortType;
|
||||
return caps.downStreamPortPresent;
|
||||
}
|
||||
|
||||
virtual bool getSupportsNoHandshakeTraining()
|
||||
{
|
||||
return caps.supportsNoHandshakeTraining;
|
||||
}
|
||||
|
||||
// DPCD offset 7
|
||||
virtual unsigned getLegacyPortCount()
|
||||
{
|
||||
return caps.downStreamPortCount;
|
||||
}
|
||||
|
||||
virtual LegacyPort * getLegacyPort(unsigned index)
|
||||
{
|
||||
return &legacyPort[index];
|
||||
}
|
||||
|
||||
virtual bool getMsaTimingparIgnored()
|
||||
{
|
||||
return caps.msaTimingParIgnored;
|
||||
}
|
||||
|
||||
virtual bool getOuiSupported()
|
||||
{
|
||||
return caps.ouiSupported;
|
||||
}
|
||||
|
||||
virtual bool getSDPExtnForColorimetry();
|
||||
|
||||
virtual bool getRootAsyncSDPSupported();
|
||||
|
||||
virtual AuxRetry::status setOuiSource(unsigned ouiId, const char * model,
|
||||
size_t modelNameLength, NvU8 chipRevision);
|
||||
virtual bool getOuiSource(unsigned &ouiId, char * modelName,
|
||||
size_t modelNameBufferSize, NvU8 & chipRevision);
|
||||
virtual bool getOuiSink(unsigned &ouiId, char * modelName,
|
||||
size_t modelNameBufferSize, NvU8 & chipRevision);
|
||||
|
||||
// DPCD offset 21h
|
||||
virtual bool getSupportsMultistream()
|
||||
{
|
||||
return caps.supportsMultistream && (!caps.overrideToSST);
|
||||
}
|
||||
|
||||
virtual void setSupportsESI(bool bIsESISupported)
|
||||
{
|
||||
caps.supportsESI = bIsESISupported;
|
||||
}
|
||||
|
||||
//
|
||||
// Single stream specific caps
|
||||
// DPCD offset 22h
|
||||
//
|
||||
virtual unsigned getNumberOfAudioEndpoints();
|
||||
|
||||
// DPCD offset 30h
|
||||
virtual bool getGUID(GUID & guid);
|
||||
virtual AuxRetry::status setGUID(GUID & guid);
|
||||
|
||||
void parsePortDescriptors();
|
||||
|
||||
//
|
||||
// Notifications of external events
|
||||
//
|
||||
virtual void notifyIRQ()
|
||||
{
|
||||
parseAndReadInterrupts();
|
||||
}
|
||||
|
||||
virtual void populateFakeDpcd();
|
||||
|
||||
// DPCD override routine: Max link rate override.
|
||||
void overrideMaxLinkRate(NvU32 overrideMaxLinkRate);
|
||||
|
||||
// DPCD override routine: Max lane count override.
|
||||
void overrideMaxLaneCount(NvU32 maxLaneCount)
|
||||
{
|
||||
caps.maxLaneCount = maxLaneCount;
|
||||
overrideDpcdMaxLaneCount = maxLaneCount;
|
||||
}
|
||||
|
||||
// DPCD override routine: Max lane count override at a given link rate.
|
||||
void skipCableBWCheck(NvU32 maxLaneAtHighRate, NvU32 maxLaneAtLowRate)
|
||||
{
|
||||
caps.maxLanesAtHBR = maxLaneAtHighRate;
|
||||
caps.maxLanesAtRBR = maxLaneAtLowRate;
|
||||
}
|
||||
|
||||
// DPCD override routine: Optimal link config (link rate and lane count) override.
|
||||
void overrideOptimalLinkCfg(LinkRate optimalLinkRate,
|
||||
NvU32 optimalLaneCount)
|
||||
{
|
||||
caps.maxLinkRate = optimalLinkRate;
|
||||
caps.maxLaneCount = optimalLaneCount;
|
||||
}
|
||||
|
||||
// DPCD override routine: Optimal link rate
|
||||
void overrideOptimalLinkRate(LinkRate optimalLinkRate)
|
||||
{
|
||||
caps.maxLinkRate = optimalLinkRate;
|
||||
}
|
||||
|
||||
virtual void notifyHPD(bool status, bool bSkipDPCDRead);
|
||||
virtual bool isPostLtAdjustRequestSupported()
|
||||
{
|
||||
//
|
||||
// If the upstream DPTX and downstream DPRX both support TPS4,
|
||||
// TPS4 shall be used instead of POST_LT_ADJ_REQ.
|
||||
//
|
||||
NvBool bTps4Supported = FLD_TEST_DRF(0073_CTRL_CMD_DP, _GET_CAPS_DP_VERSIONS_SUPPORTED,
|
||||
_DP1_4, _YES, gpuDPSupportedVersions) &&
|
||||
caps.bSupportsTPS4;
|
||||
return bGrantsPostLtRequest && !bTps4Supported;
|
||||
}
|
||||
|
||||
virtual void setPostLtAdjustRequestGranted(bool bGrantPostLtRequest);
|
||||
virtual bool getIsPostLtAdjRequestInProgress();
|
||||
virtual TrainingPatternSelectType getTrainingPatternSelect();
|
||||
virtual bool setTrainingMultiLaneSet(NvU8 numLanes,
|
||||
NvU8 *voltSwingSet,
|
||||
NvU8 *preEmphasisSet);
|
||||
|
||||
virtual AuxRetry::status setIgnoreMSATimingParamters(bool msaTimingParamIgnoreEn);
|
||||
|
||||
virtual AuxRetry::status setLinkQualPatternSet(LinkQualityPatternType linkQualPattern, unsigned laneCount);
|
||||
virtual AuxRetry::status setLinkQualLaneSet(unsigned lane, LinkQualityPatternType linkQualPattern);
|
||||
|
||||
virtual AuxRetry::status setMessagingEnable(bool _uprequestEnable, bool _upstreamIsSource);
|
||||
virtual AuxRetry::status setMultistreamLink(bool enable);
|
||||
virtual AuxRetry::status setMultistreamHotplugMode(MultistreamHotplugMode notifyType);
|
||||
|
||||
bool parseTestRequestTraining(NvU8 * buffer /* 0x18-0x28 valid */);
|
||||
void parseAutomatedTestRequest(bool testRequestPending);
|
||||
|
||||
virtual bool parseTestRequestPhy();
|
||||
|
||||
virtual bool interruptCapabilitiesChanged()
|
||||
{
|
||||
return interrupts.rxCapChanged;
|
||||
}
|
||||
|
||||
virtual void clearInterruptCapabilitiesChanged()
|
||||
{
|
||||
NvU8 irqVector = 0;
|
||||
irqVector = FLD_SET_DRF(_DPCD, _LINK_SERVICE_IRQ_VECTOR_ESI0, _RX_CAP_CHANGED, _YES, irqVector);
|
||||
bus.write(NV_DPCD_LINK_SERVICE_IRQ_VECTOR_ESI0, &irqVector, sizeof irqVector);
|
||||
}
|
||||
|
||||
virtual bool isPanelReplayErrorSet()
|
||||
{
|
||||
return interrupts.prErrorStatus;
|
||||
}
|
||||
|
||||
virtual void readPanelReplayError();
|
||||
virtual void clearPanelReplayError()
|
||||
{
|
||||
NvU8 irqVector = 0U;
|
||||
irqVector = FLD_SET_DRF(_DPCD, _DEVICE_SERVICE_IRQ_VECTOR_ESI1,
|
||||
_PANEL_REPLAY_ERROR_STATUS, _YES, irqVector);
|
||||
bus.write(NV_DPCD_DEVICE_SERVICE_IRQ_VECTOR_ESI1, &irqVector,
|
||||
sizeof irqVector);
|
||||
}
|
||||
|
||||
virtual bool getLinkStatusChanged()
|
||||
{
|
||||
return interrupts.linkStatusChanged;
|
||||
}
|
||||
|
||||
virtual void clearLinkStatusChanged()
|
||||
{
|
||||
NvU8 irqVector = 0;
|
||||
irqVector = FLD_SET_DRF(_DPCD, _LINK_SERVICE_IRQ_VECTOR_ESI0, _LINK_STATUS_CHANGED, _YES, irqVector);
|
||||
bus.write(NV_DPCD_LINK_SERVICE_IRQ_VECTOR_ESI0, &irqVector, sizeof irqVector);
|
||||
}
|
||||
|
||||
virtual bool getHdmiLinkStatusChanged()
|
||||
{
|
||||
return interrupts.hdmiLinkStatusChanged;
|
||||
}
|
||||
|
||||
virtual void clearHdmiLinkStatusChanged()
|
||||
{
|
||||
NvU8 irqVector = 0;
|
||||
irqVector = FLD_SET_DRF(_DPCD, _LINK_SERVICE_IRQ_VECTOR_ESI0, _HDMI_LINK_STATUS_CHANGED, _YES, irqVector);
|
||||
bus.write(NV_DPCD_LINK_SERVICE_IRQ_VECTOR_ESI0, &irqVector, sizeof irqVector);
|
||||
}
|
||||
|
||||
virtual bool getStreamStatusChanged()
|
||||
{
|
||||
return interrupts.streamStatusChanged;
|
||||
}
|
||||
|
||||
virtual void clearStreamStatusChanged()
|
||||
{
|
||||
NvU8 irqVector = 0;
|
||||
irqVector = FLD_SET_DRF(_DPCD, _LINK_SERVICE_IRQ_VECTOR_ESI0, _STREAM_STATUS_CHANGED, _YES, irqVector);
|
||||
bus.write(NV_DPCD_LINK_SERVICE_IRQ_VECTOR_ESI0, &irqVector, sizeof irqVector);
|
||||
}
|
||||
|
||||
virtual bool getDpTunnelingIrq()
|
||||
{
|
||||
return interrupts.dpTunnelingIrq;
|
||||
}
|
||||
|
||||
virtual void clearDpTunnelingIrq()
|
||||
{
|
||||
NvU8 irqVector = 0;
|
||||
irqVector = FLD_SET_DRF(_DPCD20, _LINK_SERVICE_IRQ_VECTOR_ESI0, _DP_TUNNELING_IRQ, _YES, irqVector);
|
||||
bus.write(NV_DPCD20_LINK_SERVICE_IRQ_VECTOR_ESI0, &irqVector, sizeof irqVector);
|
||||
}
|
||||
|
||||
virtual bool isLinkStatusValid(unsigned lanes);
|
||||
virtual void refreshLinkStatus();
|
||||
virtual void setDirtyLinkStatus(bool dirty)
|
||||
{
|
||||
interrupts.laneStatusIntr.linkStatusDirtied = dirty;
|
||||
}
|
||||
|
||||
void parseAndReadInterruptsESI();
|
||||
|
||||
void readLTTPRLinkStatus(NvS32 rxIndex, NvU8 *buffer);
|
||||
void resetIntrLaneStatus();
|
||||
|
||||
void fetchLinkStatusESI();
|
||||
void fetchLinkStatusLegacy();
|
||||
|
||||
virtual bool readTraining(NvU8* voltageSwingLane, NvU8* preemphasisLane,
|
||||
NvU8* trainingScoreLane, NvU8* postCursor,
|
||||
NvU8 activeLaneCount);
|
||||
|
||||
virtual bool isLaneSettingsChanged(NvU8* oldVoltageSwingLane,
|
||||
NvU8* newVoltageSwingLane,
|
||||
NvU8* oldPreemphasisLane,
|
||||
NvU8* newPreemphasisLane,
|
||||
NvU8 activeLaneCount);
|
||||
|
||||
void parseAndReadInterruptsLegacy();
|
||||
|
||||
void parseAndReadInterrupts()
|
||||
{
|
||||
if (caps.supportsESI)
|
||||
parseAndReadInterruptsESI(); // DP 1.2 should use the new ESI region
|
||||
else
|
||||
parseAndReadInterruptsLegacy();
|
||||
|
||||
}
|
||||
|
||||
virtual int getSinkCount() // DPCD offset 200
|
||||
{
|
||||
return interrupts.sinkCount;
|
||||
}
|
||||
|
||||
//
|
||||
// This was introduced as part of WAR for HP SDC Panel since their
|
||||
// TCON sets DPCD 0x200 SINK_COUNT=0. It should never be called to
|
||||
// set the SinkCount in other cases since SinkCount comes from DPCD.
|
||||
//
|
||||
virtual void setSinkCount(int sinkCount)
|
||||
{
|
||||
interrupts.sinkCount = sinkCount;
|
||||
}
|
||||
|
||||
virtual bool interruptContentProtection()
|
||||
{
|
||||
return interrupts.cpIRQ;
|
||||
}
|
||||
|
||||
virtual void clearInterruptContentProtection();
|
||||
|
||||
virtual bool intteruptMCCS()
|
||||
{
|
||||
return interrupts.mccsIRQ;
|
||||
}
|
||||
|
||||
virtual void clearInterruptMCCS();
|
||||
|
||||
virtual bool interruptDownReplyReady()
|
||||
{
|
||||
return interrupts.downRepMsgRdy;
|
||||
}
|
||||
|
||||
virtual bool interruptUpRequestReady()
|
||||
{
|
||||
return interrupts.upReqMsgRdy;
|
||||
}
|
||||
|
||||
virtual void clearInterruptDownReplyReady();
|
||||
virtual void clearInterruptUpRequestReady();
|
||||
|
||||
virtual bool getLaneStatusSymbolLock(int lane)
|
||||
{
|
||||
return interrupts.laneStatusIntr.laneStatus[lane].symbolLocked;
|
||||
}
|
||||
|
||||
virtual bool getLaneStatusClockRecoveryDone(int lane)
|
||||
{
|
||||
return interrupts.laneStatusIntr.laneStatus[lane].clockRecoveryDone;
|
||||
}
|
||||
|
||||
virtual bool getInterlaneAlignDone() // DPCD offset 204
|
||||
{
|
||||
return interrupts.laneStatusIntr.interlaneAlignDone;
|
||||
}
|
||||
|
||||
virtual bool getDownStreamPortStatusChange()
|
||||
{
|
||||
return interrupts.laneStatusIntr.downstmPortChng;
|
||||
}
|
||||
|
||||
virtual bool getPendingTestRequestTraining() // DPCD offset 218
|
||||
{
|
||||
return interrupts.testTraining.testRequestTraining;
|
||||
}
|
||||
|
||||
virtual bool getPendingAutomatedTestRequest()
|
||||
{
|
||||
return interrupts.automatedTestRequest;
|
||||
}
|
||||
|
||||
virtual bool getPendingTestRequestEdidRead()
|
||||
{
|
||||
return interrupts.testEdid.testRequestEdidRead;
|
||||
}
|
||||
|
||||
virtual bool getPendingTestRequestPhyCompliance()
|
||||
{
|
||||
return interrupts.testPhyCompliance.testRequestPhyCompliance;
|
||||
}
|
||||
|
||||
virtual void getTestRequestTraining(LinkRate & rate, unsigned & lanes) // DPCD offset 219, 220
|
||||
{
|
||||
rate = interrupts.testTraining.testRequestLinkRate;
|
||||
lanes = interrupts.testTraining.testRequestLaneCount;
|
||||
}
|
||||
|
||||
virtual LinkQualityPatternType getPhyTestPattern() // DPCD offset 248
|
||||
{
|
||||
return interrupts.testPhyCompliance.phyTestPattern;
|
||||
}
|
||||
|
||||
virtual void getCustomTestPattern(NvU8 *testPattern) // DPCD offset 250 - 259
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 10; i++)
|
||||
{
|
||||
testPattern[i] = interrupts.eightyBitCustomPat[i];
|
||||
}
|
||||
}
|
||||
|
||||
virtual bool getBKSV(NvU8 *bKSV);
|
||||
virtual bool getBCaps(BCaps &bCaps, NvU8 * rawByte);
|
||||
virtual bool getHdcp22BCaps(BCaps &bCaps, NvU8 *rawByte);
|
||||
virtual bool getBinfo(BInfo &bInfo);
|
||||
virtual bool getRxStatus(const HDCPState &hdcpState, NvU8 *data);
|
||||
|
||||
virtual AuxRetry::status setTestResponseChecksum(NvU8 checksum)
|
||||
{
|
||||
if (caps.revisionMajor <= 0)
|
||||
DP_ASSERT(0 && "Something is wrong, revision major should be > 0");
|
||||
|
||||
return bus.write(NV_DPCD_TEST_EDID_CHKSUM, &checksum, sizeof checksum);
|
||||
}
|
||||
|
||||
virtual AuxRetry::status setTestResponse(bool ack, bool edidChecksumWrite);
|
||||
|
||||
// Message box encoding
|
||||
virtual AuxRetry::status writeDownRequestMessageBox(NvU8 * data, size_t length)
|
||||
{
|
||||
//
|
||||
// We can assume no message was sent if this fails.
|
||||
// Reasoning:
|
||||
// Sinks are not allowed to DEFER except on the first 16 byte write.
|
||||
// If there isn't enough room for the 48 byte packet, that write
|
||||
// will defer.
|
||||
//
|
||||
return bus.write(NV_DPCD_MBOX_DOWN_REQ, data, (unsigned)length);
|
||||
}
|
||||
|
||||
virtual size_t getDownRequestMessageBoxSize()
|
||||
{
|
||||
return DP_MESSAGEBOX_SIZE;
|
||||
}
|
||||
|
||||
virtual AuxRetry::status writeUpReplyMessageBox(NvU8 * data, size_t length)
|
||||
{
|
||||
if (caps.revisionMajor <= 0)
|
||||
DP_ASSERT(0 && "Something is wrong, revision major should be > 0");
|
||||
|
||||
//
|
||||
// We can assume no message was sent if this fails.
|
||||
// Reasoning:
|
||||
// Sinks are not allowed to DEFER except on the first 16 byte write.
|
||||
// If there isn't enough room for the 48 byte packet, that write
|
||||
// will defer.
|
||||
//
|
||||
return bus.write(NV_DPCD_MBOX_UP_REP, data, (unsigned)length);
|
||||
}
|
||||
|
||||
virtual size_t getUpReplyMessageBoxSize()
|
||||
{
|
||||
return DP_MESSAGEBOX_SIZE;
|
||||
}
|
||||
|
||||
virtual AuxRetry::status readDownReplyMessageBox(NvU32 offset, NvU8 * data, size_t length)
|
||||
{
|
||||
// if (caps.revisionMajor <= 0)
|
||||
// DP_ASSERT(0 && "Something is wrong, revision major should be > 0");
|
||||
|
||||
DP_ASSERT(offset + length <= DP_MESSAGEBOX_SIZE);
|
||||
|
||||
return bus.read(NV_DPCD_MBOX_DOWN_REP + offset, data, (unsigned)length);
|
||||
}
|
||||
|
||||
virtual size_t getDownReplyMessageBoxSize()
|
||||
{
|
||||
return DP_MESSAGEBOX_SIZE;
|
||||
}
|
||||
|
||||
virtual AuxRetry::status readUpRequestMessageBox(NvU32 offset, NvU8 * data, size_t length)
|
||||
{
|
||||
if (caps.revisionMajor <= 0)
|
||||
DP_ASSERT(0 && "Something is wrong, revision major should be > 0");
|
||||
|
||||
DP_ASSERT(offset + length <= DP_MESSAGEBOX_SIZE);
|
||||
|
||||
return bus.read(NV_DPCD_MBOX_UP_REQ + offset, data, (unsigned)length);
|
||||
}
|
||||
|
||||
virtual size_t getUpRequestMessageBoxSize()
|
||||
{
|
||||
return DP_MESSAGEBOX_SIZE;
|
||||
}
|
||||
|
||||
virtual size_t getTransactionSize()
|
||||
{
|
||||
return bus.getDirect()->transactionSize();
|
||||
}
|
||||
|
||||
virtual PowerState getPowerState();
|
||||
virtual bool setPowerState(PowerState newState);
|
||||
virtual void payloadTableClearACT();
|
||||
virtual bool payloadWaitForACTReceived();
|
||||
virtual bool payloadAllocate(unsigned streamId, unsigned begin, unsigned count);
|
||||
|
||||
void overrideMultiStreamCap(bool mstCapable)
|
||||
{
|
||||
caps.overrideToSST = !mstCapable;
|
||||
}
|
||||
|
||||
bool getMultiStreamCapOverride()
|
||||
{
|
||||
return caps.overrideToSST;
|
||||
}
|
||||
|
||||
bool getDpcdMultiStreamCap(void)
|
||||
{
|
||||
return caps.supportsMultistream;
|
||||
}
|
||||
|
||||
virtual void setGpuDPSupportedVersions(NvU32 _gpuDPSupportedVersions);
|
||||
|
||||
void setGpuFECSupported(bool bSupportFEC)
|
||||
{
|
||||
bGpuFECSupported = bSupportFEC;
|
||||
}
|
||||
|
||||
void applyRegkeyOverrides(const DP_REGKEY_DATABASE& dpRegkeyDatabase);
|
||||
|
||||
// To clear pending message {DOWN_REP/UP_REQ} and reply true if existed.
|
||||
virtual bool clearPendingMsg();
|
||||
|
||||
virtual bool isMessagingEnabled();
|
||||
|
||||
virtual void setIndexedLinkrateEnabled(bool val)
|
||||
{
|
||||
bIndexedLinkrateEnabled = val;
|
||||
}
|
||||
|
||||
virtual bool isIndexedLinkrateEnabled()
|
||||
{
|
||||
return bIndexedLinkrateEnabled;
|
||||
}
|
||||
|
||||
virtual bool isIndexedLinkrateCapable()
|
||||
{
|
||||
return bIndexedLinkrateCapable;
|
||||
}
|
||||
|
||||
virtual NvU16 *getLinkRateTable();
|
||||
|
||||
virtual NvU32 getVideoFallbackSupported()
|
||||
{
|
||||
return caps.videoFallbackFormats;
|
||||
}
|
||||
|
||||
virtual bool getRawLinkRateTable(NvU8 *buffer);
|
||||
|
||||
virtual void resetProtocolConverter()
|
||||
{
|
||||
NvU8 data = 0;
|
||||
bus.write(NV_DPCD14_PCON_FRL_LINK_CONFIG_1, &data, sizeof(data));
|
||||
bus.write(NV_DPCD14_PCON_FRL_LINK_CONFIG_2, &data, sizeof(data));
|
||||
}
|
||||
|
||||
virtual bool setSourceControlMode(bool bEnableSourceControlMode, bool bEnableFRLMode);
|
||||
|
||||
virtual bool checkPCONFrlReady(bool *bFrlReady);
|
||||
|
||||
virtual bool setupPCONFrlLinkAssessment(NvU32 linkBwMask,
|
||||
bool bEnableExtendLTMode = false,
|
||||
bool bEnableConcurrentMode = false);
|
||||
|
||||
virtual bool checkPCONFrlLinkStatus(NvU32 *frlRateMask);
|
||||
virtual bool queryHdmiLinkStatus(bool *bLinkActive, bool *bLinkReady);
|
||||
|
||||
virtual NvU32 restorePCONFrlLink(NvU32 linkBwMask,
|
||||
bool bEnableExtendLTMode = false,
|
||||
bool bEnableConcurrentMode = false);
|
||||
|
||||
virtual void readPsrCapabilities(vesaPsrSinkCaps *caps)
|
||||
{
|
||||
dpMemCopy(caps, &this->caps.psrCaps, sizeof(vesaPsrSinkCaps));
|
||||
}
|
||||
|
||||
virtual bool updatePsrConfiguration(vesaPsrConfig psrcfg);
|
||||
virtual bool readPsrConfiguration(vesaPsrConfig *psrcfg);
|
||||
|
||||
virtual bool readPsrState(vesaPsrState *psrState);
|
||||
virtual bool readPsrDebugInfo(vesaPsrDebugStatus *psrDbgState);
|
||||
|
||||
virtual bool writePsrErrorStatus(vesaPsrErrorStatus psrErr);
|
||||
virtual bool readPsrErrorStatus(vesaPsrErrorStatus *psrErr);
|
||||
|
||||
virtual bool writePsrEvtIndicator(vesaPsrEventIndicator psrEvt);
|
||||
virtual bool readPsrEvtIndicator(vesaPsrEventIndicator *psrEvt);
|
||||
|
||||
virtual bool readPrSinkDebugInfo(panelReplaySinkDebugInfo *prDbgInfo);
|
||||
|
||||
bool getDpTunnelBwAllocationSupported()
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
virtual bool getDpTunnelGranularityMultiplier(NvU8 &granularityMultiplier);
|
||||
virtual TriState getDpTunnelBwRequestStatus();
|
||||
virtual bool setDpTunnelBwAllocation(bool bEnable);
|
||||
|
||||
bool getDpTunnelEstimatedBw(NvU8 &estimatedBw);
|
||||
bool hasDpTunnelEstimatedBwChanged();
|
||||
bool hasDpTunnelBwAllocationCapabilityChanged();
|
||||
bool writeDpTunnelRequestedBw(NvU8 requestedBw);
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif //INCLUDED_DP_CONFIGCAPS_H
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@ -65,6 +65,7 @@ namespace DisplayPort
|
||||
DP_IMP_ERROR_INSUFFICIENT_BANDWIDTH,
|
||||
DP_IMP_ERROR_INSUFFICIENT_BANDWIDTH_DSC,
|
||||
DP_IMP_ERROR_INSUFFICIENT_BANDWIDTH_NO_DSC,
|
||||
DP_IMP_ERROR_INSUFFICIENT_DP_TUNNELING_BANDWIDTH,
|
||||
DP_IMP_ERROR_WATERMARK_BLANKING,
|
||||
DP_IMP_ERROR_PPS_COLOR_FORMAT_NOT_SUPPORTED,
|
||||
DP_IMP_ERROR_PPS_INVALID_HBLANK,
|
||||
@ -274,6 +275,10 @@ namespace DisplayPort
|
||||
|
||||
virtual DscCaps getDscCaps() = 0;
|
||||
|
||||
virtual NvBool isDynamicPPSSupported() = 0;
|
||||
|
||||
virtual NvBool isDynamicDscToggleSupported() = 0;
|
||||
|
||||
//
|
||||
// This function returns the device itself or its parent device that is doing
|
||||
// DSC decompression for it.
|
||||
@ -321,8 +326,14 @@ namespace DisplayPort
|
||||
virtual bool isMSAOverMSTCapable() = 0;
|
||||
virtual bool isFakedMuxDevice() = 0;
|
||||
virtual bool setPanelReplayConfig(panelReplayConfig prcfg) = 0;
|
||||
virtual bool getPanelReplayConfig(panelReplayConfig *pPrcfg) = 0;
|
||||
virtual bool isPanelReplaySupported() = 0;
|
||||
virtual bool getPanelReplayStatus(PanelReplayStatus *pPrStatus) = 0;
|
||||
virtual bool getDeviceSpecificData(NvU8 *oui, NvU8 *deviceIdString,
|
||||
NvU8 *hwRevision, NvU8 *swMajorRevision,
|
||||
NvU8 *swMinorRevision) = 0;
|
||||
|
||||
virtual bool setModeList(DisplayPort::DpModesetParams *pModeList, unsigned numModes) = 0;
|
||||
|
||||
protected:
|
||||
virtual ~Device() {}
|
||||
@ -594,6 +605,8 @@ namespace DisplayPort
|
||||
virtual void notifyGPUCapabilityChange() = 0;
|
||||
virtual void notifyHBR2WAREngage() = 0;
|
||||
|
||||
virtual bool dpUpdateDscStream(Group *target, NvU32 dscBpp) = 0;
|
||||
|
||||
// Create a new Group. Note that if you wish to do a modeset but send the
|
||||
// stream nowhere, you may do a modeset with an EMPTY group. This is expected
|
||||
// to be the mechanism by which monitor faking is implemented.
|
||||
@ -710,6 +723,7 @@ namespace DisplayPort
|
||||
virtual bool setTestPattern(NV0073_CTRL_DP_TESTPATTERN testPattern,
|
||||
NvU8 laneMask, NV0073_CTRL_DP_CSTM cstm,
|
||||
NvBool bIsHBR2, NvBool bSkipLaneDataOverride) = 0;
|
||||
|
||||
// "data" is an array of NV0073_CTRL_MAX_LANES unsigned ints
|
||||
virtual bool getLaneConfig(NvU32 *numLanes, NvU32 *data) = 0;
|
||||
// "data" is an array of NV0073_CTRL_MAX_LANES unsigned ints
|
||||
@ -735,6 +749,7 @@ namespace DisplayPort
|
||||
virtual bool updatePsrLinkState(bool bTurnOnLink) = 0;
|
||||
|
||||
virtual bool readPrSinkDebugInfo(panelReplaySinkDebugInfo *prDbgInfo) = 0;
|
||||
virtual void enableDpTunnelingBwAllocationSupport() = 0;
|
||||
|
||||
protected:
|
||||
virtual ~Connector() {}
|
||||
|
@ -49,6 +49,9 @@
|
||||
#define HDCP_FLAGS_ABORT_DEVICE_INVALID 0x00080000 // Abort due to an invalid device in DP1.2 topology
|
||||
#define HDCP_FLAGS_ABORT_HOP_LIMIT_EXCEEDED 0x80000000 // Abort, number of devices in DP1.2 topology exceeds supported limit
|
||||
|
||||
#define DP_TUNNEL_REQUEST_BW_MAX_TIME_MS (1000U)
|
||||
#define DP_TUNNEL_REQUEST_BW_POLLING_INTERVAL_MS (10U)
|
||||
|
||||
static inline unsigned getDataClockMultiplier(NvU64 linkRate, NvU64 laneCount)
|
||||
{
|
||||
//
|
||||
@ -192,6 +195,7 @@ namespace DisplayPort
|
||||
bool compoundQueryResult;
|
||||
unsigned compoundQueryCount;
|
||||
unsigned compoundQueryLocalLinkPBN;
|
||||
NvU64 compoundQueryUsedTunnelingBw;
|
||||
bool compoundQueryForceEnableFEC;
|
||||
|
||||
unsigned freeSlots;
|
||||
@ -309,7 +313,6 @@ namespace DisplayPort
|
||||
bool bNoFallbackInPostLQA;
|
||||
|
||||
bool bReportDeviceLostBeforeNew;
|
||||
bool bEnableAudioBeyond48K;
|
||||
bool bDisableSSC;
|
||||
bool bEnableFastLT;
|
||||
NvU32 maxLinkRateFromRegkey;
|
||||
@ -348,9 +351,6 @@ namespace DisplayPort
|
||||
//
|
||||
bool bPowerDownPhyBeforeD3;
|
||||
|
||||
// Force DSC on sink irrespective of LT status
|
||||
bool bForceDscOnSink;
|
||||
|
||||
//
|
||||
// Reset the MSTM_CTRL registers on branch device irrespective of
|
||||
// IRQ VECTOR register having stale message. Certain branch devices
|
||||
@ -362,6 +362,11 @@ namespace DisplayPort
|
||||
bool bForceClearPendingMsg;
|
||||
bool bSkipFakeDeviceDpcdAccess;
|
||||
|
||||
NvU64 allocatedDpTunnelBw;
|
||||
NvU64 allocatedDpTunnelBwShadow;
|
||||
bool bForceDisableTunnelBwAllocation;
|
||||
bool bClientRequestedDpTunnelBwAllocation;
|
||||
bool bIsDpTunnelBwAllocationEnabled;
|
||||
|
||||
Group *perHeadAttachedGroup[NV_MAX_HEADS];
|
||||
NvU32 inTransitionHeadMask;
|
||||
@ -444,6 +449,9 @@ namespace DisplayPort
|
||||
const DpModesetParams &modesetParams, // Modeset info
|
||||
DscParams *pDscParams = NULL, // DSC parameters
|
||||
DP_IMP_ERROR *pErrorCode = NULL); // Error Status code
|
||||
virtual bool compoundQueryAttachTunneling(const DpModesetParams &modesetParams,
|
||||
DscParams *pDscParams = NULL,
|
||||
DP_IMP_ERROR *pErrorCode = NULL);
|
||||
|
||||
virtual bool endCompoundQuery();
|
||||
|
||||
@ -495,6 +503,7 @@ namespace DisplayPort
|
||||
char tagHDCPReauthentication;
|
||||
char tagDelayedHdcpCapRead;
|
||||
char tagDelayedHDCPCPIrqHandling;
|
||||
char tagDpBwAllocationChanged;
|
||||
|
||||
//
|
||||
// Enable disable TMDS mode
|
||||
@ -563,6 +572,18 @@ namespace DisplayPort
|
||||
bool willLinkSupportModeSST(const LinkConfiguration & linkConfig, const ModesetInfo & modesetInfo);
|
||||
void forceLinkTraining();
|
||||
|
||||
bool updateDpTunnelBwAllocation();
|
||||
void configureDpTunnelBwAllocation();
|
||||
TriState requestDpTunnelBw(NvU8 requestedBw);
|
||||
bool allocateDpTunnelBw(NvU64 bandwidth);
|
||||
bool allocateMaxDpTunnelBw();
|
||||
NvU64 getMaxTunnelBw();
|
||||
|
||||
void enableDpTunnelingBwAllocationSupport()
|
||||
{
|
||||
bClientRequestedDpTunnelBwAllocation = true;
|
||||
}
|
||||
|
||||
void assessLink(LinkTrainingType trainType = NORMAL_LINK_TRAINING);
|
||||
|
||||
bool isLinkInD3();
|
||||
@ -594,8 +615,8 @@ namespace DisplayPort
|
||||
void populateDscBranchCaps(DSC_INFO* dscInfo, DeviceImpl * dev);
|
||||
void populateDscModesetInfo(MODESET_INFO * pModesetInfo, const DpModesetParams * pModesetParams);
|
||||
|
||||
bool train(const LinkConfiguration & lConfig, bool force, LinkTrainingType trainType = NORMAL_LINK_TRAINING);
|
||||
bool validateLinkConfiguration(const LinkConfiguration & lConfig);
|
||||
virtual bool train(const LinkConfiguration & lConfig, bool force, LinkTrainingType trainType = NORMAL_LINK_TRAINING);
|
||||
virtual bool validateLinkConfiguration(const LinkConfiguration & lConfig);
|
||||
|
||||
virtual bool assessPCONLinkCapability(PCONLinkControl *params);
|
||||
bool trainPCONFrlLink(PCONLinkControl *pConControl);
|
||||
@ -606,12 +627,12 @@ namespace DisplayPort
|
||||
// the lowest level function(nearest to the hal) for the connector.
|
||||
bool rawTrain(const LinkConfiguration & lConfig, bool force, LinkTrainingType linkTrainingType);
|
||||
|
||||
bool enableFlush();
|
||||
bool beforeAddStream(GroupImpl * group, bool force=false, bool forFlushMode = false);
|
||||
void afterAddStream(GroupImpl * group);
|
||||
void beforeDeleteStream(GroupImpl * group, bool forFlushMode = false);
|
||||
void afterDeleteStream(GroupImpl * group);
|
||||
void disableFlush(bool test=false);
|
||||
virtual bool enableFlush();
|
||||
virtual bool beforeAddStream(GroupImpl * group, bool force=false, bool forFlushMode = false);
|
||||
virtual void afterAddStream(GroupImpl * group);
|
||||
virtual void beforeDeleteStream(GroupImpl * group, bool forFlushMode = false);
|
||||
virtual void afterDeleteStream(GroupImpl * group);
|
||||
virtual void disableFlush(bool test=false);
|
||||
|
||||
bool beforeAddStreamMST(GroupImpl * group, bool force = false, bool forFlushMode = false);
|
||||
|
||||
@ -619,7 +640,7 @@ namespace DisplayPort
|
||||
|
||||
bool deleteAllVirtualChannels();
|
||||
void clearTimeslices();
|
||||
bool allocateTimeslice(GroupImpl * targetGroup);
|
||||
virtual bool allocateTimeslice(GroupImpl * targetGroup);
|
||||
void freeTimeslice(GroupImpl * targetGroup);
|
||||
void flushTimeslotsToHardware();
|
||||
bool getHDCPAbortCodesDP12(NvU32 &hdcpAbortCodesDP12);
|
||||
@ -629,6 +650,7 @@ namespace DisplayPort
|
||||
bool handleCPIRQ();
|
||||
void handleSSC();
|
||||
void handleMCCSIRQ();
|
||||
void handleDpTunnelingIrq();
|
||||
void handleHdmiLinkStatusChanged();
|
||||
void sortActiveGroups(bool ascending);
|
||||
void configInit();
|
||||
@ -639,7 +661,7 @@ namespace DisplayPort
|
||||
void notifyLongPulseInternal(bool statusConnected);
|
||||
virtual void notifyLongPulse(bool status);
|
||||
virtual void notifyShortPulse();
|
||||
virtual Group * newGroup() ;
|
||||
virtual Group * newGroup();
|
||||
virtual void destroy();
|
||||
virtual void createFakeMuxDevice(const NvU8 *buffer, NvU32 bufferSize);
|
||||
virtual void deleteFakeMuxDevice();
|
||||
@ -664,6 +686,7 @@ namespace DisplayPort
|
||||
Group * createFirmwareGroup();
|
||||
virtual void notifyGPUCapabilityChange();
|
||||
virtual void notifyHBR2WAREngage();
|
||||
bool dpUpdateDscStream(Group *target, NvU32 dscBpp);
|
||||
|
||||
bool getTestPattern(NV0073_CTRL_DP_TESTPATTERN *testPattern);
|
||||
bool setTestPattern(NV0073_CTRL_DP_TESTPATTERN testPattern, NvU8 laneMask, NV0073_CTRL_DP_CSTM cstm, NvBool bIsHBR2, NvBool bSkipLaneDataOverride = false);
|
||||
@ -707,16 +730,16 @@ namespace DisplayPort
|
||||
//
|
||||
struct DevicePendingEDIDRead : protected EdidReadMultistream::EdidReadMultistreamEventSink, public ListElement
|
||||
{
|
||||
EdidReadMultistream reader;
|
||||
DiscoveryManager::Device device;
|
||||
ConnectorImpl * parent;
|
||||
DiscoveryManager::Device device;
|
||||
EdidReadMultistream reader;
|
||||
|
||||
void mstEdidCompleted(EdidReadMultistream * from);
|
||||
void mstEdidReadFailed(EdidReadMultistream * from);
|
||||
|
||||
public:
|
||||
DevicePendingEDIDRead(ConnectorImpl * _parent, MessageManager * manager, DiscoveryManager::Device dev)
|
||||
: reader(_parent->timer, manager, this, dev.address), device(dev), parent(_parent)
|
||||
: parent(_parent), device(dev), reader(_parent->timer, manager, this, dev.address)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
@ -44,6 +44,7 @@ namespace DisplayPort
|
||||
#define HDCP_BCAPS_DDC_EN_BIT 0x80
|
||||
#define HDCP_BCAPS_DP_EN_BIT 0x01
|
||||
#define HDCP_I2C_CLIENT_ADDR 0x74
|
||||
#define DEVICE_OUI_SIZE 3
|
||||
|
||||
struct GroupImpl;
|
||||
struct ConnectorImpl;
|
||||
@ -170,7 +171,6 @@ namespace DisplayPort
|
||||
|
||||
// Panel replay Caps
|
||||
PanelReplayCaps prCaps;
|
||||
|
||||
bool bIsFakedMuxDevice;
|
||||
bool bIsPreviouslyFakedMuxDevice;
|
||||
bool bisMarkedForDeletion;
|
||||
@ -202,6 +202,8 @@ namespace DisplayPort
|
||||
bool bSkipFakeDeviceDpcdAccess;
|
||||
|
||||
DeviceImpl(DPCDHAL * hal, ConnectorImpl * connector, DeviceImpl * parent, bool bSkipFakeDeviceDpcdAccess);
|
||||
NvU64 maxModeBwRequired;
|
||||
|
||||
~DeviceImpl();
|
||||
|
||||
virtual bool isCableOk();
|
||||
@ -380,6 +382,11 @@ namespace DisplayPort
|
||||
return dpcdRevisionMinor >= minor;
|
||||
}
|
||||
|
||||
NvU64 getMaxModeBwRequired()
|
||||
{
|
||||
return maxModeBwRequired;
|
||||
}
|
||||
|
||||
virtual void queryGUID2();
|
||||
|
||||
virtual bool getSDPExtnForColorimetrySupported();
|
||||
@ -445,6 +452,7 @@ namespace DisplayPort
|
||||
bool isPanelReplaySupported(void);
|
||||
void getPanelReplayCaps(void);
|
||||
bool setPanelReplayConfig(panelReplayConfig prcfg);
|
||||
bool getPanelReplayConfig(panelReplayConfig *pPrcfg);
|
||||
bool getPanelReplayStatus(PanelReplayStatus *pPrStatus);
|
||||
|
||||
NvBool getDSCSupport();
|
||||
@ -481,6 +489,11 @@ namespace DisplayPort
|
||||
unsigned getDscMaxSliceWidth();
|
||||
unsigned getDscDecoderColorDepthSupportMask();
|
||||
void setDscDecompressionDevice(bool bDscCapBasedOnParent);
|
||||
virtual bool getDeviceSpecificData(NvU8 *oui, NvU8 *deviceIdString,
|
||||
NvU8 *hwRevision, NvU8 *swMajorRevision,
|
||||
NvU8 *swMinorRevision);
|
||||
|
||||
virtual bool setModeList(DisplayPort::DpModesetParams *pModeList, unsigned numModes);
|
||||
};
|
||||
class DeviceHDCPDetection : public Object, MessageManager::Message::MessageEventSink, Timer::TimerCallback
|
||||
{
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2010-2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2010-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@ -124,26 +124,7 @@ namespace DisplayPort
|
||||
return this->patchedChecksum;
|
||||
}
|
||||
|
||||
bool isValidHeader() const
|
||||
{
|
||||
NvU8 validHeaderData[8] = {
|
||||
0x00, 0xFF, 0xFF, 0xFF, 0xFF,
|
||||
0xFF, 0xFF, 0x00};
|
||||
|
||||
if (buffer.getLength() < 0x8)
|
||||
return false;
|
||||
|
||||
for (unsigned i = 0; i < 8; i++)
|
||||
{
|
||||
if (buffer.data[i] != validHeaderData[i])
|
||||
{
|
||||
DP_LOG(("DP-EDID> Invalid EDID Header"));
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
bool isValidHeader() const;
|
||||
|
||||
unsigned getManufId() const
|
||||
{
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@ -147,6 +147,7 @@ namespace DisplayPort
|
||||
// Defines the same as NV0073_CTRL_CMD_DP_GET_CAPS_PARAMS.dpVersionsSupported
|
||||
//
|
||||
NvU32 _gpuSupportedDpVersions;
|
||||
|
||||
bool _isStreamCloningEnabled;
|
||||
bool _needForceRmEdid;
|
||||
bool _skipPowerdownEDPPanelWhenHeadDetach;
|
||||
@ -156,10 +157,11 @@ namespace DisplayPort
|
||||
bool _useDfpMaxLinkRateCaps;
|
||||
bool _applyLinkBwOverrideWarRegVal;
|
||||
bool _isDynamicMuxCapable;
|
||||
bool _isMDMEnabled;
|
||||
bool _enableMSAOverrideOverMST;
|
||||
|
||||
bool _isLTPhyRepeaterSupported;
|
||||
bool _isMSTPCONCapsReadDisabled;
|
||||
bool _isDownspreadSupported;
|
||||
//
|
||||
// LTTPR count reported by RM, it might not be the same with DPLib probe
|
||||
// For example, some Intel LTTPR might not be ready to response 0xF0000 probe
|
||||
@ -258,6 +260,16 @@ namespace DisplayPort
|
||||
return (_isDynamicMuxCapable && _isEDP);
|
||||
}
|
||||
|
||||
virtual bool isMDMEnabled()
|
||||
{
|
||||
return (_isMDMEnabled && _isEDP);
|
||||
}
|
||||
|
||||
virtual bool isDownspreadSupported()
|
||||
{
|
||||
return _isDownspreadSupported;
|
||||
}
|
||||
|
||||
// Get GPU DSC capabilities
|
||||
virtual void getDscCaps(bool *pbDscSupported,
|
||||
unsigned *pEncoderColorFormatMask,
|
||||
@ -313,6 +325,11 @@ namespace DisplayPort
|
||||
return this->_isLTPhyRepeaterSupported;
|
||||
}
|
||||
|
||||
EvoInterface * getProvider()
|
||||
{
|
||||
return this->provider;
|
||||
}
|
||||
|
||||
// Return the current mux state. Returns false if device is not mux capable
|
||||
bool getDynamicMuxState(NvU32 *muxState);
|
||||
|
||||
@ -334,8 +351,8 @@ namespace DisplayPort
|
||||
virtual bool getMaxLinkConfigFromUefi(NvU8 &linkRate, NvU8 &laneCount);
|
||||
virtual bool setDpMSAParameters(bool bStereoEnable, const NV0073_CTRL_CMD_DP_SET_MSA_PROPERTIES_PARAMS &msaparams);
|
||||
virtual bool setDpStereoMSAParameters(bool bStereoEnable, const NV0073_CTRL_CMD_DP_SET_MSA_PROPERTIES_PARAMS &msaparams);
|
||||
virtual bool setFlushMode();
|
||||
virtual void clearFlushMode(unsigned headMask, bool testMode=false);
|
||||
bool setFlushMode();
|
||||
void clearFlushMode(unsigned headMask, bool testMode=false);
|
||||
|
||||
virtual bool dscCrcTransaction(NvBool bEnable, gpuDscCrc *data, NvU16 *headIndex);
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2015-2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2015-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@ -32,12 +32,14 @@
|
||||
|
||||
#include "nvtypes.h"
|
||||
#include "dp_tracing.h"
|
||||
#include "dp_printf.h"
|
||||
|
||||
extern "C" void * dpMalloc(NvLength size);
|
||||
extern "C" void dpFree(void * ptr);
|
||||
extern "C" void dpDebugBreakpoint();
|
||||
// Note: dpPrint() implementations are expected to append a newline themselves.
|
||||
extern "C" void dpPrint(const char * formatter, ...);
|
||||
extern "C" void dpPrintf(DP_LOG_LEVEL severity, const char * formatter, ...);
|
||||
extern "C" void dpTraceEvent(NV_DP_TRACING_EVENT event,
|
||||
NV_DP_TRACING_PRIORITY priority, NvU32 numArgs, ...);
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1993-2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@ -108,13 +108,6 @@ template <class T> void dp_used(const T & /*x*/) {}
|
||||
//
|
||||
|
||||
#if NV_DP_ASSERT_ENABLED
|
||||
#define DP_LOG(x) \
|
||||
do \
|
||||
{ \
|
||||
dpPrint x; \
|
||||
addDpLogRecord x; \
|
||||
}while (false)
|
||||
|
||||
#define DP_ASSERT(x) \
|
||||
if (!(x)) \
|
||||
{ \
|
||||
@ -123,9 +116,6 @@ template <class T> void dp_used(const T & /*x*/) {}
|
||||
dpDebugBreakpoint(); \
|
||||
}
|
||||
#else
|
||||
|
||||
#define DP_LOG(x)
|
||||
|
||||
#define DP_ASSERT(x) \
|
||||
{ \
|
||||
DP_USED(x); \
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user