555.42.02

This commit is contained in:
Bernhard Stoeckner 2024-05-21 15:11:46 +02:00
parent 083cd9cf17
commit 5a1c474040
No known key found for this signature in database
GPG Key ID: 7D23DC2750FAC2E1
955 changed files with 171849 additions and 144768 deletions

View File

@ -1,5 +1,9 @@
# Changelog
## Release 555 Entries
### [555.42.02] 2024-05-21
## Release 550 Entries
### [550.78] 2024-04-25

View File

@ -1,7 +1,7 @@
# NVIDIA Linux Open GPU Kernel Module Source
This is the source release of the NVIDIA Linux open GPU kernel modules,
version 550.78.
version 555.42.02.
## How to Build
@ -17,7 +17,7 @@ as root:
Note that the kernel modules built here must be used with GSP
firmware and user-space NVIDIA GPU driver components from a corresponding
550.78 driver release. This can be achieved by installing
555.42.02 driver release. This can be achieved by installing
the NVIDIA GPU driver from the .run file using the `--no-kernel-modules`
option. E.g.,
@ -74,7 +74,7 @@ kernel.
The NVIDIA open kernel modules support the same range of Linux kernel
versions that are supported with the proprietary NVIDIA kernel modules.
This is currently Linux kernel 3.10 or newer.
This is currently Linux kernel 4.15 or newer.
## How to Contribute
@ -188,7 +188,7 @@ encountered specific to them.
For details on feature support and limitations, see the NVIDIA GPU driver
end user README here:
https://us.download.nvidia.com/XFree86/Linux-x86_64/550.78/README/kernel_open.html
https://us.download.nvidia.com/XFree86/Linux-x86_64/555.42.02/README/kernel_open.html
For vGPU support, please refer to the README.vgpu packaged in the vGPU Host
Package for more details.
@ -856,6 +856,7 @@ Subsystem Device ID.
| NVIDIA RTX A500 Embedded GPU | 25FB |
| NVIDIA GeForce RTX 4090 | 2684 |
| NVIDIA GeForce RTX 4090 D | 2685 |
| NVIDIA GeForce RTX 4070 Ti SUPER | 2689 |
| NVIDIA RTX 6000 Ada Generation | 26B1 1028 16A1 |
| NVIDIA RTX 6000 Ada Generation | 26B1 103C 16A1 |
| NVIDIA RTX 6000 Ada Generation | 26B1 10DE 16A1 |

View File

@ -72,7 +72,7 @@ EXTRA_CFLAGS += -I$(src)/common/inc
EXTRA_CFLAGS += -I$(src)
EXTRA_CFLAGS += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-format-extra-args
EXTRA_CFLAGS += -D__KERNEL__ -DMODULE -DNVRM
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"550.78\"
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"555.42.02\"
ifneq ($(SYSSRCHOST1X),)
EXTRA_CFLAGS += -I$(SYSSRCHOST1X)
@ -118,7 +118,7 @@ ifeq ($(ARCH),x86_64)
endif
ifeq ($(ARCH),powerpc)
EXTRA_CFLAGS += -mlittle-endian -mno-strict-align -mno-altivec
EXTRA_CFLAGS += -mlittle-endian -mno-strict-align
endif
EXTRA_CFLAGS += -DNV_UVM_ENABLE
@ -172,6 +172,7 @@ NV_CFLAGS_FROM_CONFTEST := $(shell $(NV_CONFTEST_CMD) build_cflags)
NV_CONFTEST_CFLAGS = $(NV_CFLAGS_FROM_CONFTEST) $(EXTRA_CFLAGS) -fno-pie
NV_CONFTEST_CFLAGS += $(call cc-disable-warning,pointer-sign)
NV_CONFTEST_CFLAGS += $(call cc-option,-fshort-wchar,)
NV_CONFTEST_CFLAGS += $(call cc-option,-Werror=incompatible-pointer-types,)
NV_CONFTEST_CFLAGS += -Wno-error
NV_CONFTEST_COMPILE_TEST_HEADERS := $(obj)/conftest/macros.h

View File

@ -28,7 +28,7 @@ else
else
KERNEL_UNAME ?= $(shell uname -r)
KERNEL_MODLIB := /lib/modules/$(KERNEL_UNAME)
KERNEL_SOURCES := $(shell test -d $(KERNEL_MODLIB)/source && echo $(KERNEL_MODLIB)/source || echo $(KERNEL_MODLIB)/build)
KERNEL_SOURCES := $(shell ((test -d $(KERNEL_MODLIB)/source && echo $(KERNEL_MODLIB)/source) || (test -d $(KERNEL_MODLIB)/build/source && echo $(KERNEL_MODLIB)/build/source)) || echo $(KERNEL_MODLIB)/build)
endif
KERNEL_OUTPUT := $(KERNEL_SOURCES)
@ -42,7 +42,11 @@ else
else
KERNEL_UNAME ?= $(shell uname -r)
KERNEL_MODLIB := /lib/modules/$(KERNEL_UNAME)
ifeq ($(KERNEL_SOURCES), $(KERNEL_MODLIB)/source)
# $(filter patter...,text) - Returns all whitespace-separated words in text that
# do match any of the pattern words, removing any words that do not match.
# Set the KERNEL_OUTPUT only if either $(KERNEL_MODLIB)/source or
# $(KERNEL_MODLIB)/build/source path matches the KERNEL_SOURCES.
ifneq ($(filter $(KERNEL_SOURCES),$(KERNEL_MODLIB)/source $(KERNEL_MODLIB)/build/source),)
KERNEL_OUTPUT := $(KERNEL_MODLIB)/build
KBUILD_PARAMS := KBUILD_OUTPUT=$(KERNEL_OUTPUT)
endif

View File

@ -37,13 +37,11 @@ typedef enum _HYPERVISOR_TYPE
OS_HYPERVISOR_UNKNOWN
} HYPERVISOR_TYPE;
#define CMD_VGPU_VFIO_WAKE_WAIT_QUEUE 0
#define CMD_VGPU_VFIO_INJECT_INTERRUPT 1
#define CMD_VGPU_VFIO_REGISTER_MDEV 2
#define CMD_VGPU_VFIO_PRESENT 3
#define CMD_VFIO_PCI_CORE_PRESENT 4
#define CMD_VFIO_WAKE_REMOVE_GPU 1
#define CMD_VGPU_VFIO_PRESENT 2
#define CMD_VFIO_PCI_CORE_PRESENT 3
#define MAX_VF_COUNT_PER_GPU 64
#define MAX_VF_COUNT_PER_GPU 64
typedef enum _VGPU_TYPE_INFO
{
@ -54,17 +52,11 @@ typedef enum _VGPU_TYPE_INFO
typedef struct
{
void *vgpuVfioRef;
void *waitQueue;
void *nv;
NvU32 *vgpuTypeIds;
NvU8 **vgpuNames;
NvU32 numVgpuTypes;
NvU32 domain;
NvU8 bus;
NvU8 slot;
NvU8 function;
NvBool is_virtfn;
NvU32 domain;
NvU32 bus;
NvU32 device;
NvU32 return_status;
} vgpu_vfio_info;
typedef struct

View File

@ -58,14 +58,10 @@
#include <linux/version.h>
#include <linux/utsname.h>
#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 32)
#error "This driver does not support kernels older than 2.6.32!"
#elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 7, 0)
# define KERNEL_2_6
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 0, 0)
# define KERNEL_3
#else
#error "This driver does not support development kernels!"
#if LINUX_VERSION_CODE == KERNEL_VERSION(4, 4, 0)
// Version 4.4 is allowed, temporarily, although not officially supported.
#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 15, 0)
#error "This driver does not support kernels older than Linux 4.15!"
#endif
#if defined (CONFIG_SMP) && !defined (__SMP__)
@ -836,16 +832,16 @@ static inline dma_addr_t nv_phys_to_dma(struct device *dev, NvU64 pa)
#define NV_PRINT_AT(nv_debug_level,at) \
{ \
nv_printf(nv_debug_level, \
"NVRM: VM: %s:%d: 0x%p, %d page(s), count = %d, flags = 0x%08x, " \
"NVRM: VM: %s:%d: 0x%p, %d page(s), count = %d, " \
"page_table = 0x%p\n", __FUNCTION__, __LINE__, at, \
at->num_pages, NV_ATOMIC_READ(at->usage_count), \
at->flags, at->page_table); \
at->page_table); \
}
#define NV_PRINT_VMA(nv_debug_level,vma) \
{ \
nv_printf(nv_debug_level, \
"NVRM: VM: %s:%d: 0x%lx - 0x%lx, 0x%08x bytes @ 0x%016llx, 0x%p, 0x%p\n", \
"NVRM: VM: %s:%d: 0x%lx - 0x%lx, 0x%08lx bytes @ 0x%016llx, 0x%p, 0x%p\n", \
__FUNCTION__, __LINE__, vma->vm_start, vma->vm_end, NV_VMA_SIZE(vma), \
NV_VMA_OFFSET(vma), NV_VMA_PRIVATE(vma), NV_VMA_FILE(vma)); \
}
@ -1078,6 +1074,8 @@ static inline void nv_kmem_ctor_dummy(void *arg)
kmem_cache_destroy(kmem_cache); \
}
#define NV_KMEM_CACHE_ALLOC_ATOMIC(kmem_cache) \
kmem_cache_alloc(kmem_cache, GFP_ATOMIC)
#define NV_KMEM_CACHE_ALLOC(kmem_cache) \
kmem_cache_alloc(kmem_cache, GFP_KERNEL)
#define NV_KMEM_CACHE_FREE(ptr, kmem_cache) \
@ -1104,6 +1102,23 @@ static inline void *nv_kmem_cache_zalloc(struct kmem_cache *k, gfp_t flags)
#endif
}
static inline int nv_kmem_cache_alloc_stack_atomic(nvidia_stack_t **stack)
{
nvidia_stack_t *sp = NULL;
#if defined(NVCPU_X86_64)
if (rm_is_altstack_in_use())
{
sp = NV_KMEM_CACHE_ALLOC_ATOMIC(nvidia_stack_t_cache);
if (sp == NULL)
return -ENOMEM;
sp->size = sizeof(sp->stack);
sp->top = sp->stack + sp->size;
}
#endif
*stack = sp;
return 0;
}
static inline int nv_kmem_cache_alloc_stack(nvidia_stack_t **stack)
{
nvidia_stack_t *sp = NULL;
@ -1614,6 +1629,10 @@ typedef struct nv_linux_state_s {
nv_kthread_q_t open_q;
NvBool is_accepting_opens;
struct semaphore open_q_lock;
#if defined(NV_VGPU_KVM_BUILD)
wait_queue_head_t wait;
NvS32 return_status;
#endif
} nv_linux_state_t;
extern nv_linux_state_t *nv_linux_devices;

View File

@ -29,17 +29,17 @@
typedef int vm_fault_t;
#endif
/* pin_user_pages
/*
* pin_user_pages()
*
* Presence of pin_user_pages() also implies the presence of unpin-user_page().
* Both were added in the v5.6-rc1
* Both were added in the v5.6.
*
* pin_user_pages() was added by commit eddb1c228f7951d399240
* ("mm/gup: introduce pin_user_pages*() and FOLL_PIN") in v5.6-rc1 (2020-01-30)
*
* Removed vmas parameter from pin_user_pages() by commit 40896a02751
* ("mm/gup: remove vmas parameter from pin_user_pages()")
* in linux-next, expected in v6.5-rc1 (2023-05-17)
* pin_user_pages() was added by commit eddb1c228f79
* ("mm/gup: introduce pin_user_pages*() and FOLL_PIN") in v5.6.
*
* Removed vmas parameter from pin_user_pages() by commit 4c630f307455
* ("mm/gup: remove vmas parameter from pin_user_pages()") in v6.5.
*/
#include <linux/mm.h>
@ -63,25 +63,28 @@ typedef int vm_fault_t;
#define NV_UNPIN_USER_PAGE put_page
#endif // NV_PIN_USER_PAGES_PRESENT
/* get_user_pages
/*
* get_user_pages()
*
* The 8-argument version of get_user_pages was deprecated by commit
* (2016 Feb 12: cde70140fed8429acf7a14e2e2cbd3e329036653)for the non-remote case
* The 8-argument version of get_user_pages() was deprecated by commit
* cde70140fed8 ("mm/gup: Overload get_user_pages() functions") in v4.6-rc1.
* (calling get_user_pages with current and current->mm).
*
* Completely moved to the 6 argument version of get_user_pages -
* 2016 Apr 4: c12d2da56d0e07d230968ee2305aaa86b93a6832
* Completely moved to the 6 argument version of get_user_pages() by
* commit c12d2da56d0e ("mm/gup: Remove the macro overload API migration
* helpers from the get_user*() APIs") in v4.6-rc4.
*
* write and force parameters were replaced with gup_flags by -
* 2016 Oct 12: 768ae309a96103ed02eb1e111e838c87854d8b51
* write and force parameters were replaced with gup_flags by
* commit 768ae309a961 ("mm: replace get_user_pages() write/force parameters
* with gup_flags") in v4.9.
*
* A 7-argument version of get_user_pages was introduced into linux-4.4.y by
* commit 8e50b8b07f462ab4b91bc1491b1c91bd75e4ad40 which cherry-picked the
* replacement of the write and force parameters with gup_flags
* commit 8e50b8b07f462 ("mm: replace get_user_pages() write/force parameters
* with gup_flags") which cherry-picked the replacement of the write and
* force parameters with gup_flags.
*
* Removed vmas parameter from get_user_pages() by commit 7bbf9c8c99
* ("mm/gup: remove unused vmas parameter from get_user_pages()")
* in linux-next, expected in v6.5-rc1 (2023-05-17)
* Removed vmas parameter from get_user_pages() by commit 54d020692b34
* ("mm/gup: remove unused vmas parameter from get_user_pages()") in v6.5.
*
*/
@ -112,18 +115,19 @@ typedef int vm_fault_t;
}
#endif // NV_GET_USER_PAGES_HAS_ARGS_FLAGS
/* pin_user_pages_remote
/*
* pin_user_pages_remote()
*
* pin_user_pages_remote() was added by commit eddb1c228f7951d399240
* ("mm/gup: introduce pin_user_pages*() and FOLL_PIN") in v5.6 (2020-01-30)
* pin_user_pages_remote() was added by commit eddb1c228f79
* ("mm/gup: introduce pin_user_pages*() and FOLL_PIN") in v5.6.
*
* pin_user_pages_remote() removed 'tsk' parameter by commit
* 64019a2e467a ("mm/gup: remove task_struct pointer for all gup code")
* in v5.9-rc1 (2020-08-11). *
* 64019a2e467a ("mm/gup: remove task_struct pointer for all gup code")
* in v5.9.
*
* Removed unused vmas parameter from pin_user_pages_remote() by commit
* 83bcc2e132("mm/gup: remove unused vmas parameter from pin_user_pages_remote()")
* in linux-next, expected in v6.5-rc1 (2023-05-14)
* 0b295316b3a9 ("mm/gup: remove unused vmas parameter from
* pin_user_pages_remote()") in v6.5.
*
*/
@ -143,7 +147,7 @@ typedef int vm_fault_t;
/*
* get_user_pages_remote() was added by commit 1e9877902dc7
* ("mm/gup: Introduce get_user_pages_remote()") in v4.6 (2016-02-12).
* ("mm/gup: Introduce get_user_pages_remote()") in v4.6.
*
* Note that get_user_pages_remote() requires the caller to hold a reference on
* the task_struct (if non-NULL and if this API has tsk argument) and the mm_struct.
@ -153,19 +157,17 @@ typedef int vm_fault_t;
*
* get_user_pages_remote() write/force parameters were replaced
* with gup_flags by commit 9beae1ea8930 ("mm: replace get_user_pages_remote()
* write/force parameters with gup_flags") in v4.9 (2016-10-13).
* write/force parameters with gup_flags") in v4.9.
*
* get_user_pages_remote() added 'locked' parameter by commit 5b56d49fc31d
* ("mm: add locked parameter to get_user_pages_remote()") in
* v4.10 (2016-12-14).
* ("mm: add locked parameter to get_user_pages_remote()") in v4.10.
*
* get_user_pages_remote() removed 'tsk' parameter by
* commit 64019a2e467a ("mm/gup: remove task_struct pointer for
* all gup code") in v5.9-rc1 (2020-08-11).
* all gup code") in v5.9.
*
* Removed vmas parameter from get_user_pages_remote() by commit a4bde14d549
* ("mm/gup: remove vmas parameter from get_user_pages_remote()")
* in linux-next, expected in v6.5-rc1 (2023-05-14)
* Removed vmas parameter from get_user_pages_remote() by commit ca5e863233e8
* ("mm/gup: remove vmas parameter from get_user_pages_remote()") in v6.5.
*
*/

View File

@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 1999-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 1999-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@ -609,6 +609,15 @@ typedef enum
NV_POWER_STATE_RUNNING
} nv_power_state_t;
typedef struct
{
const char *vidmem_power_status;
const char *dynamic_power_status;
const char *gc6_support;
const char *gcoff_support;
const char *s0ix_status;
} nv_power_info_t;
#define NV_PRIMARY_VGA(nv) ((nv)->primary_vga)
#define NV_IS_CTL_DEVICE(nv) ((nv)->flags & NV_FLAG_CONTROL)
@ -778,7 +787,7 @@ nv_state_t* NV_API_CALL nv_get_ctl_state (void);
void NV_API_CALL nv_set_dma_address_size (nv_state_t *, NvU32 );
NV_STATUS NV_API_CALL nv_alias_pages (nv_state_t *, NvU32, NvU32, NvU32, NvU64, NvU64 *, void **);
NV_STATUS NV_API_CALL nv_alias_pages (nv_state_t *, NvU32, NvU64, NvU32, NvU32, NvU64, NvU64 *, void **);
NV_STATUS NV_API_CALL nv_alloc_pages (nv_state_t *, NvU32, NvU64, NvBool, NvU32, NvBool, NvBool, NvS32, NvU64 *, void **);
NV_STATUS NV_API_CALL nv_free_pages (nv_state_t *, NvU32, NvBool, NvU32, void *);
@ -822,6 +831,7 @@ void NV_API_CALL nv_acpi_methods_init (NvU32 *);
void NV_API_CALL nv_acpi_methods_uninit (void);
NV_STATUS NV_API_CALL nv_acpi_method (NvU32, NvU32, NvU32, void *, NvU16, NvU32 *, void *, NvU16 *);
NV_STATUS NV_API_CALL nv_acpi_d3cold_dsm_for_upstream_port (nv_state_t *, NvU8 *, NvU32, NvU32, NvU32 *);
NV_STATUS NV_API_CALL nv_acpi_dsm_method (nv_state_t *, NvU8 *, NvU32, NvBool, NvU32, void *, NvU16, NvU32 *, void *, NvU16 *);
NV_STATUS NV_API_CALL nv_acpi_ddc_method (nv_state_t *, void *, NvU32 *, NvBool);
NV_STATUS NV_API_CALL nv_acpi_dod_method (nv_state_t *, NvU32 *, NvU32 *);
@ -990,10 +1000,10 @@ NV_STATUS NV_API_CALL rm_p2p_init_mapping (nvidia_stack_t *, NvU64, NvU6
NV_STATUS NV_API_CALL rm_p2p_destroy_mapping (nvidia_stack_t *, NvU64);
NV_STATUS NV_API_CALL rm_p2p_get_pages (nvidia_stack_t *, NvU64, NvU32, NvU64, NvU64, NvU64 *, NvU32 *, NvU32 *, NvU32 *, NvU8 **, void *);
NV_STATUS NV_API_CALL rm_p2p_get_gpu_info (nvidia_stack_t *, NvU64, NvU64, NvU8 **, void **);
NV_STATUS NV_API_CALL rm_p2p_get_pages_persistent (nvidia_stack_t *, NvU64, NvU64, void **, NvU64 *, NvU32 *, void *, void *);
NV_STATUS NV_API_CALL rm_p2p_get_pages_persistent (nvidia_stack_t *, NvU64, NvU64, void **, NvU64 *, NvU32 *, void *, void *, void **);
NV_STATUS NV_API_CALL rm_p2p_register_callback (nvidia_stack_t *, NvU64, NvU64, NvU64, void *, void (*)(void *), void *);
NV_STATUS NV_API_CALL rm_p2p_put_pages (nvidia_stack_t *, NvU64, NvU32, NvU64, void *);
NV_STATUS NV_API_CALL rm_p2p_put_pages_persistent(nvidia_stack_t *, void *, void *);
NV_STATUS NV_API_CALL rm_p2p_put_pages_persistent(nvidia_stack_t *, void *, void *, void *);
NV_STATUS NV_API_CALL rm_p2p_dma_map_pages (nvidia_stack_t *, nv_dma_device_t *, NvU8 *, NvU64, NvU32, NvU64 *, void **);
NV_STATUS NV_API_CALL rm_dma_buf_dup_mem_handle (nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle, NvHandle, NvHandle, void *, NvHandle, NvU64, NvU64, NvHandle *, void **);
void NV_API_CALL rm_dma_buf_undup_mem_handle(nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle);
@ -1027,9 +1037,7 @@ void NV_API_CALL rm_enable_dynamic_power_management(nvidia_stack_t *, nv_s
NV_STATUS NV_API_CALL rm_ref_dynamic_power(nvidia_stack_t *, nv_state_t *, nv_dynamic_power_mode_t);
void NV_API_CALL rm_unref_dynamic_power(nvidia_stack_t *, nv_state_t *, nv_dynamic_power_mode_t);
NV_STATUS NV_API_CALL rm_transition_dynamic_power(nvidia_stack_t *, nv_state_t *, NvBool, NvBool *);
const char* NV_API_CALL rm_get_vidmem_power_status(nvidia_stack_t *, nv_state_t *);
const char* NV_API_CALL rm_get_dynamic_power_management_status(nvidia_stack_t *, nv_state_t *);
const char* NV_API_CALL rm_get_gpu_gcx_support(nvidia_stack_t *, nv_state_t *, NvBool);
void NV_API_CALL rm_get_power_info(nvidia_stack_t *, nv_state_t *, nv_power_info_t *);
void NV_API_CALL rm_acpi_notify(nvidia_stack_t *, nv_state_t *, NvU32);
void NV_API_CALL rm_acpi_nvpcf_notify(nvidia_stack_t *);
@ -1041,13 +1049,12 @@ NV_STATUS NV_API_CALL nv_vgpu_create_request(nvidia_stack_t *, nv_state_t *, c
NV_STATUS NV_API_CALL nv_vgpu_delete(nvidia_stack_t *, const NvU8 *, NvU16);
NV_STATUS NV_API_CALL nv_vgpu_get_type_ids(nvidia_stack_t *, nv_state_t *, NvU32 *, NvU32 *, NvBool, NvU8, NvBool);
NV_STATUS NV_API_CALL nv_vgpu_get_type_info(nvidia_stack_t *, nv_state_t *, NvU32, char *, int, NvU8);
NV_STATUS NV_API_CALL nv_vgpu_get_bar_info(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU64 *, NvU32, void *, NvBool *);
NV_STATUS NV_API_CALL nv_vgpu_get_bar_info(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU64 *,
NvU64 *, NvU64 *, NvU32 *, NvBool *, NvU8 *);
NV_STATUS NV_API_CALL nv_vgpu_get_hbm_info(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU64 *, NvU64 *);
NV_STATUS NV_API_CALL nv_vgpu_start(nvidia_stack_t *, const NvU8 *, void *, NvS32 *, NvU8 *, NvU32);
NV_STATUS NV_API_CALL nv_vgpu_get_sparse_mmap(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU64 **, NvU64 **, NvU32 *);
NV_STATUS NV_API_CALL nv_vgpu_process_vf_info(nvidia_stack_t *, nv_state_t *, NvU8, NvU32, NvU8, NvU8, NvU8, NvBool, void *);
NV_STATUS NV_API_CALL nv_vgpu_update_request(nvidia_stack_t *, const NvU8 *, NvU32, NvU64 *, NvU64 *, const char *);
NV_STATUS NV_API_CALL nv_gpu_bind_event(nvidia_stack_t *);
NV_STATUS NV_API_CALL nv_gpu_unbind_event(nvidia_stack_t *, NvU32, NvBool *);
NV_STATUS NV_API_CALL nv_get_usermap_access_params(nv_state_t*, nv_usermap_access_params_t*);
nv_soc_irq_type_t NV_API_CALL nv_get_current_irq_type(nv_state_t*);

View File

@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2013-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2013-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@ -1462,6 +1462,29 @@ NV_STATUS nvUvmInterfacePagingChannelPushStream(UvmGpuPagingChannelHandle channe
char *methodStream,
NvU32 methodStreamSize);
/*******************************************************************************
nvUvmInterfaceKeyRotationChannelDisable
This function notifies RM that the given channels are idle.
This function is called after RM has notified UVM that keys need to be rotated.
When called RM will disable the channels, rotate their keys, and then re-enable
the channels.
Locking: This function acquires an API and GPU lock.
Memory : This function dynamically allocates memory.
Arguments:
channelList[IN] - An array of channel handles whose channels are idle.
channelListCount[IN] - Number of channels in channelList. Its value must be
greater than 0.
Error codes:
NV_ERR_INVALID_ARGUMENT - channelList is NULL or channeListCount is 0.
*/
NV_STATUS nvUvmInterfaceKeyRotationChannelDisable(uvmGpuChannelHandle channelList[],
NvU32 channeListCount);
/*******************************************************************************
Cryptography Services Library (CSL) Interface
*/
@ -1507,7 +1530,7 @@ void nvUvmInterfaceDeinitCslContext(UvmCslContext *uvmCslContext);
/*******************************************************************************
nvUvmInterfaceCslUpdateContext
Updates a context after a key rotation event and can only be called once per
Updates contexts after a key rotation event and can only be called once per
key rotation event. Following a key rotation event, and before
nvUvmInterfaceCslUpdateContext is called, data encrypted by the GPU with the
previous key can be decrypted with nvUvmInterfaceCslDecrypt.
@ -1516,12 +1539,14 @@ void nvUvmInterfaceDeinitCslContext(UvmCslContext *uvmCslContext);
Memory : This function does not dynamically allocate memory.
Arguments:
uvmCslContext[IN] - The CSL context associated with a channel.
contextList[IN/OUT] - An array of pointers to CSL contexts.
contextListCount[IN] - Number of CSL contexts in contextList. Its value
must be greater than 0.
Error codes:
NV_ERR_INVALID_ARGUMENT - The CSL context is not associated with a channel.
NV_ERR_INVALID_ARGUMENT - contextList is NULL or contextListCount is 0.
*/
NV_STATUS nvUvmInterfaceCslUpdateContext(UvmCslContext *uvmCslContext);
NV_STATUS nvUvmInterfaceCslUpdateContext(UvmCslContext *contextList[],
NvU32 contextListCount);
/*******************************************************************************
nvUvmInterfaceCslRotateIv
@ -1739,7 +1764,14 @@ NV_STATUS nvUvmInterfaceCslIncrementIv(UvmCslContext *uvmCslContext,
Checks and logs information about non-CSL encryptions, such as those that
originate from the GPU.
This function does not modify elements of the UvmCslContext.
For contexts associated with channels, this function does not modify elements of
the UvmCslContext and must be called for each external encryption invocation.
For the context associated with fault buffers, bufferSize can encompass multiple
encryption invocations, and the UvmCslContext will be updated following a key
rotation event.
In either case the IV remains unmodified after this function is called.
Locking: This function does not acquire an API or GPU lock.
Memory : This function does not dynamically allocate memory.
@ -1748,7 +1780,7 @@ NV_STATUS nvUvmInterfaceCslIncrementIv(UvmCslContext *uvmCslContext,
Arguments:
uvmCslContext[IN/OUT] - The CSL context.
bufferSize[OUT] - The size of the buffer encrypted by the
bufferSize[OUT] - The size of the buffer(s) encrypted by the
external entity in units of bytes.
Error codes:

View File

@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2014-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2014-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@ -39,12 +39,12 @@
// are multiple BIG page sizes in RM. These defines are used as flags to "0"
// should be OK when user is not sure which pagesize allocation it wants
//
#define UVM_PAGE_SIZE_DEFAULT 0x0
#define UVM_PAGE_SIZE_4K 0x1000
#define UVM_PAGE_SIZE_64K 0x10000
#define UVM_PAGE_SIZE_128K 0x20000
#define UVM_PAGE_SIZE_2M 0x200000
#define UVM_PAGE_SIZE_512M 0x20000000
#define UVM_PAGE_SIZE_DEFAULT 0x0ULL
#define UVM_PAGE_SIZE_4K 0x1000ULL
#define UVM_PAGE_SIZE_64K 0x10000ULL
#define UVM_PAGE_SIZE_128K 0x20000ULL
#define UVM_PAGE_SIZE_2M 0x200000ULL
#define UVM_PAGE_SIZE_512M 0x20000000ULL
//
// When modifying flags, make sure they are compatible with the mirrored
@ -267,6 +267,7 @@ typedef struct UvmGpuChannelInfo_tag
// The errorNotifier is filled out when the channel hits an RC error.
NvNotification *errorNotifier;
NvNotification *keyRotationNotifier;
NvU32 hwRunlistId;
NvU32 hwChannelId;
@ -292,13 +293,13 @@ typedef struct UvmGpuChannelInfo_tag
// GPU VAs of both GPFIFO and GPPUT are needed in Confidential Computing
// so a channel can be controlled via another channel (SEC2 or WLC/LCIC)
NvU64 gpFifoGpuVa;
NvU64 gpPutGpuVa;
NvU64 gpGetGpuVa;
NvU64 gpFifoGpuVa;
NvU64 gpPutGpuVa;
NvU64 gpGetGpuVa;
// GPU VA of work submission offset is needed in Confidential Computing
// so CE channels can ring doorbell of other channels as required for
// WLC/LCIC work submission
NvU64 workSubmissionOffsetGpuVa;
NvU64 workSubmissionOffsetGpuVa;
} UvmGpuChannelInfo;
typedef enum
@ -1086,4 +1087,21 @@ typedef enum UvmCslOperation
UVM_CSL_OPERATION_DECRYPT
} UvmCslOperation;
typedef enum UVM_KEY_ROTATION_STATUS {
// Key rotation complete/not in progress
UVM_KEY_ROTATION_STATUS_IDLE = 0,
// RM is waiting for clients to report their channels are idle for key rotation
UVM_KEY_ROTATION_STATUS_PENDING = 1,
// Key rotation is in progress
UVM_KEY_ROTATION_STATUS_IN_PROGRESS = 2,
// Key rotation timeout failure, RM will RC non-idle channels.
// UVM should never see this status value.
UVM_KEY_ROTATION_STATUS_FAILED_TIMEOUT = 3,
// Key rotation failed because upper threshold was crossed, RM will RC non-idle channels
UVM_KEY_ROTATION_STATUS_FAILED_THRESHOLD = 4,
// Internal RM failure while rotating keys for a certain channel, RM will RC the channel.
UVM_KEY_ROTATION_STATUS_FAILED_ROTATION = 5,
UVM_KEY_ROTATION_STATUS_MAX_COUNT = 6,
} UVM_KEY_ROTATION_STATUS;
#endif // _NV_UVM_TYPES_H_

View File

@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 1993-2020 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@ -494,6 +494,23 @@ do \
//
#define NV_TWO_N_MINUS_ONE(n) (((1ULL<<(n/2))<<((n+1)/2))-1)
//
// Create a 64b bitmask with n bits set
// This is the same as ((1ULL<<n) - 1), but it doesn't overflow for n=64
//
// ...
// n=-1, 0x0000000000000000
// n=0, 0x0000000000000000
// n=1, 0x0000000000000001
// ...
// n=63, 0x7FFFFFFFFFFFFFFF
// n=64, 0xFFFFFFFFFFFFFFFF
// n=65, 0xFFFFFFFFFFFFFFFF
// n=66, 0xFFFFFFFFFFFFFFFF
// ...
//
#define NV_BITMASK64(n) ((n<1) ? 0ULL : (NV_U64_MAX>>((n>64) ? 0 : (64-n))))
#define DRF_READ_1WORD_BS(d,r,f,v) \
((DRF_EXTENT_MW(NV##d##r##f)<8)?DRF_READ_1BYTE_BS(NV##d##r##f,(v)): \
((DRF_EXTENT_MW(NV##d##r##f)<16)?DRF_READ_2BYTE_BS(NV##d##r##f,(v)): \
@ -574,6 +591,13 @@ nvMaskPos32(const NvU32 mask, const NvU32 bitIdx)
n32 = BIT_IDX_32(LOWESTBIT(n32));\
}
// Destructive operation on n64
#define LOWESTBITIDX_64(n64) \
{ \
n64 = BIT_IDX_64(LOWESTBIT(n64));\
}
// Destructive operation on n32
#define HIGHESTBITIDX_32(n32) \
{ \
@ -918,6 +942,11 @@ static NV_FORCEINLINE void *NV_NVUPTR_TO_PTR(NvUPtr address)
// Use (lo) if (b) is less than 64, and (hi) if >= 64.
//
#define NV_BIT_SET_128(b, lo, hi) { nvAssert( (b) < 128 ); if ( (b) < 64 ) (lo) |= NVBIT64(b); else (hi) |= NVBIT64( b & 0x3F ); }
//
// Clear the bit at pos (b) for U64 which is < 128.
// Use (lo) if (b) is less than 64, and (hi) if >= 64.
//
#define NV_BIT_CLEAR_128(b, lo, hi) { nvAssert( (b) < 128 ); if ( (b) < 64 ) (lo) &= ~NVBIT64(b); else (hi) &= ~NVBIT64( b & 0x3F ); }
// Get the number of elements the specified fixed-size array
#define NV_ARRAY_ELEMENTS(x) ((sizeof(x)/sizeof((x)[0])))

View File

@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2014-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2014-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@ -152,6 +152,7 @@ NV_STATUS_CODE(NV_ERR_FABRIC_MANAGER_NOT_PRESENT, 0x0000007A, "Fabric Manag
NV_STATUS_CODE(NV_ERR_ALREADY_SIGNALLED, 0x0000007B, "Semaphore Surface value already >= requested wait value")
NV_STATUS_CODE(NV_ERR_QUEUE_TASK_SLOT_NOT_AVAILABLE, 0x0000007C, "PMU RPC error due to no queue slot available for this event")
NV_STATUS_CODE(NV_ERR_KEY_ROTATION_IN_PROGRESS, 0x0000007D, "Operation not allowed as key rotation is in progress")
NV_STATUS_CODE(NV_ERR_TEST_ONLY_CODE_NOT_ENABLED, 0x0000007E, "Test-only code path not enabled")
// Warnings:
NV_STATUS_CODE(NV_WARN_HOT_SWITCH, 0x00010001, "WARNING Hot switch")

View File

@ -152,6 +152,12 @@ typedef signed short NvS16; /* -32768 to 32767 */
(((NvU32)(c) & 0xff) << 8) | \
(((NvU32)(d) & 0xff))))
// Macro to build an NvU64 from two DWORDS, listed from msb to lsb
#define NvU64_BUILD(a, b) \
((NvU64)( \
(((NvU64)(a) & ~0U) << 32) | \
(((NvU64)(b) & ~0U))))
#if NVTYPES_USE_STDINT
typedef uint32_t NvV32; /* "void": enumerated or multiple fields */
typedef uint32_t NvU32; /* 0 to 4294967295 */

View File

@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 1999-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 1999-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@ -101,9 +101,10 @@ NV_STATUS NV_API_CALL rm_gpu_ops_paging_channels_map(nvidia_stack_t *, nvgpuAdd
void NV_API_CALL rm_gpu_ops_paging_channels_unmap(nvidia_stack_t *, nvgpuAddressSpaceHandle_t, NvU64, nvgpuDeviceHandle_t);
NV_STATUS NV_API_CALL rm_gpu_ops_paging_channel_push_stream(nvidia_stack_t *, nvgpuPagingChannelHandle_t, char *, NvU32);
NV_STATUS NV_API_CALL rm_gpu_ops_key_rotation_channel_disable(nvidia_stack_t *, nvgpuChannelHandle_t [], NvU32);
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_context_init(nvidia_stack_t *, struct ccslContext_t **, nvgpuChannelHandle_t);
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_context_clear(nvidia_stack_t *, struct ccslContext_t *);
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_context_update(nvidia_stack_t *, struct ccslContext_t *);
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_context_update(nvidia_stack_t *, UvmCslContext *[], NvU32);
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_rotate_iv(nvidia_stack_t *, struct ccslContext_t *, NvU8);
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_encrypt(nvidia_stack_t *, struct ccslContext_t *, NvU32, NvU8 const *, NvU8 *, NvU8 *);
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_encrypt_with_iv(nvidia_stack_t *, struct ccslContext_t *, NvU32, NvU8 const *, NvU8*, NvU8 *, NvU8 *);

View File

@ -1416,6 +1416,42 @@ compile_test() {
compile_check_conftest "$CODE" "NV_VFIO_REGISTER_EMULATED_IOMMU_DEV_PRESENT" "" "functions"
;;
bus_type_has_iommu_ops)
#
# Determine if 'bus_type' structure has a 'iommu_ops' field.
#
# This field was removed by commit 17de3f5fdd35 (iommu: Retire bus ops)
# in v6.8
#
CODE="
#include <linux/device.h>
int conftest_bus_type_has_iommu_ops(void) {
return offsetof(struct bus_type, iommu_ops);
}"
compile_check_conftest "$CODE" "NV_BUS_TYPE_HAS_IOMMU_OPS" "" "types"
;;
eventfd_signal_has_counter_arg)
#
# Determine if eventfd_signal() function has an additional 'counter' argument.
#
# This argument was removed by commit 3652117f8548 (eventfd: simplify
# eventfd_signal()) in v6.8
#
CODE="
#include <linux/eventfd.h>
void conftest_eventfd_signal_has_counter_arg(void) {
struct eventfd_ctx *ctx;
eventfd_signal(ctx, 1);
}"
compile_check_conftest "$CODE" "NV_EVENTFD_SIGNAL_HAS_COUNTER_ARG" "" "types"
;;
drm_available)
# Determine if the DRM subsystem is usable
CODE="
@ -5520,7 +5556,8 @@ compile_test() {
of_dma_configure)
#
# Determine if of_dma_configure() function is present
# Determine if of_dma_configure() function is present, and how
# many arguments it takes.
#
# Added by commit 591c1ee465ce ("of: configure the platform
# device dma parameters") in v3.16. However, it was a static,
@ -5530,17 +5567,69 @@ compile_test() {
# commit 1f5c69aa51f9 ("of: Move of_dma_configure() to device.c
# to help re-use") in v4.1.
#
CODE="
# It subsequently began taking a third parameter with commit
# 3d6ce86ee794 ("drivers: remove force dma flag from buses")
# in v4.18.
#
echo "$CONFTEST_PREAMBLE
#if defined(NV_LINUX_OF_DEVICE_H_PRESENT)
#include <linux/of_device.h>
#endif
void conftest_of_dma_configure(void)
{
of_dma_configure();
}
"
" > conftest$$.c
compile_check_conftest "$CODE" "NV_OF_DMA_CONFIGURE_PRESENT" "" "functions"
$CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
rm -f conftest$$.c
if [ -f conftest$$.o ]; then
rm -f conftest$$.o
echo "#undef NV_OF_DMA_CONFIGURE_PRESENT" | append_conftest "functions"
echo "#undef NV_OF_DMA_CONFIGURE_ARGUMENT_COUNT" | append_conftest "functions"
else
echo "#define NV_OF_DMA_CONFIGURE_PRESENT" | append_conftest "functions"
echo "$CONFTEST_PREAMBLE
#if defined(NV_LINUX_OF_DEVICE_H_PRESENT)
#include <linux/of_device.h>
#endif
void conftest_of_dma_configure(void) {
of_dma_configure(NULL, NULL, false);
}" > conftest$$.c
$CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
rm -f conftest$$.c
if [ -f conftest$$.o ]; then
rm -f conftest$$.o
echo "#define NV_OF_DMA_CONFIGURE_ARGUMENT_COUNT 3" | append_conftest "functions"
return
fi
echo "$CONFTEST_PREAMBLE
#if defined(NV_LINUX_OF_DEVICE_H_PRESENT)
#include <linux/of_device.h>
#endif
void conftest_of_dma_configure(void) {
of_dma_configure(NULL, NULL);
}" > conftest$$.c
$CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
rm -f conftest$$.c
if [ -f conftest$$.o ]; then
rm -f conftest$$.o
echo "#define NV_OF_DMA_CONFIGURE_ARGUMENT_COUNT 2" | append_conftest "functions"
return
fi
fi
;;
icc_get)
@ -6761,12 +6850,45 @@ compile_test() {
compile_check_conftest "$CODE" "NV_DRM_MODE_CREATE_DP_COLORSPACE_PROPERTY_HAS_SUPPORTED_COLORSPACES_ARG" "" "types"
;;
drm_syncobj_features_present)
# Determine if DRIVER_SYNCOBJ and DRIVER_SYNCOBJ_TIMELINE DRM
# driver features are present. Timeline DRM synchronization objects
# may only be used if both of these are supported by the driver.
#
# DRIVER_SYNCOBJ_TIMELINE Added by commit 060cebb20cdb ("drm:
# introduce a capability flag for syncobj timeline support") in
# v5.2
#
# DRIVER_SYNCOBJ Added by commit e9083420bbac ("drm: introduce
# sync objects (v4)") in v4.12
CODE="
#if defined(NV_DRM_DRM_DRV_H_PRESENT)
#include <drm/drm_drv.h>
#endif
int features = DRIVER_SYNCOBJ | DRIVER_SYNCOBJ_TIMELINE;"
compile_check_conftest "$CODE" "NV_DRM_SYNCOBJ_FEATURES_PRESENT" "" "types"
;;
stack_trace)
# Determine if functions stack_trace_{save,print} are present.
# Added by commit e9b98e162 ("stacktrace: Provide helpers for
# common stack trace operations") in v5.2.
CODE="
#include <linux/stacktrace.h>
void conftest_stack_trace(void) {
stack_trace_save();
stack_trace_print();
}"
compile_check_conftest "$CODE" "NV_STACK_TRACE_PRESENT" "" "functions"
;;
drm_unlocked_ioctl_flag_present)
# Determine if DRM_UNLOCKED IOCTL flag is present.
#
# DRM_UNLOCKED was removed by commit 2798ffcc1d6a ("drm: Remove
# locking for legacy ioctls and DRM_UNLOCKED") in Linux
# next-20231208.
# locking for legacy ioctls and DRM_UNLOCKED") in v6.8.
#
# DRM_UNLOCKED definition was moved from drmP.h to drm_ioctl.h by
# commit 2640981f3600 ("drm: document drm_ioctl.[hc]") in v4.12.

View File

@ -52,6 +52,7 @@ NV_HEADER_PRESENCE_TESTS = \
linux/dma-resv.h \
soc/tegra/chip-id.h \
soc/tegra/fuse.h \
soc/tegra/fuse-helper.h \
soc/tegra/tegra_bpmp.h \
video/nv_internal.h \
linux/platform/tegra/dce/dce-client-ipc.h \

View File

@ -176,12 +176,10 @@ cursor_plane_req_config_update(struct drm_plane *plane,
return;
}
*req_config = (struct NvKmsKapiCursorRequestedConfig) {
.surface = to_nv_framebuffer(plane_state->fb)->pSurface,
.dstX = plane_state->crtc_x,
.dstY = plane_state->crtc_y,
};
memset(req_config, 0, sizeof(*req_config));
req_config->surface = to_nv_framebuffer(plane_state->fb)->pSurface;
req_config->dstX = plane_state->crtc_x;
req_config->dstY = plane_state->crtc_y;
#if defined(NV_DRM_ALPHA_BLENDING_AVAILABLE)
if (plane->blend_mode_property != NULL && plane->alpha_property != NULL) {
@ -275,24 +273,22 @@ plane_req_config_update(struct drm_plane *plane,
return 0;
}
*req_config = (struct NvKmsKapiLayerRequestedConfig) {
.config = {
.surface = to_nv_framebuffer(plane_state->fb)->pSurface,
memset(req_config, 0, sizeof(*req_config));
/* Source values are 16.16 fixed point */
.srcX = plane_state->src_x >> 16,
.srcY = plane_state->src_y >> 16,
.srcWidth = plane_state->src_w >> 16,
.srcHeight = plane_state->src_h >> 16,
req_config->config.surface = to_nv_framebuffer(plane_state->fb)->pSurface;
.dstX = plane_state->crtc_x,
.dstY = plane_state->crtc_y,
.dstWidth = plane_state->crtc_w,
.dstHeight = plane_state->crtc_h,
/* Source values are 16.16 fixed point */
req_config->config.srcX = plane_state->src_x >> 16;
req_config->config.srcY = plane_state->src_y >> 16;
req_config->config.srcWidth = plane_state->src_w >> 16;
req_config->config.srcHeight = plane_state->src_h >> 16;
.csc = old_config.csc
},
};
req_config->config.dstX = plane_state->crtc_x;
req_config->config.dstY = plane_state->crtc_y;
req_config->config.dstWidth = plane_state->crtc_w;
req_config->config.dstHeight = plane_state->crtc_h;
req_config->config.csc = old_config.csc;
#if defined(NV_DRM_ROTATION_AVAILABLE)
/*
@ -688,9 +684,7 @@ static int nv_drm_plane_atomic_set_property(
to_nv_drm_plane_state(state);
if (property == nv_dev->nv_out_fence_property) {
#if defined(NV_LINUX_NVHOST_H_PRESENT) && defined(CONFIG_TEGRA_GRHOST)
nv_drm_plane_state->fd_user_ptr = u64_to_user_ptr(val);
#endif
nv_drm_plane_state->fd_user_ptr = (void __user *)(uintptr_t)(val);
return 0;
} else if (property == nv_dev->nv_input_colorspace_property) {
nv_drm_plane_state->input_colorspace = val;
@ -875,14 +869,12 @@ static inline void nv_drm_crtc_duplicate_req_head_modeset_config(
* there is no change in new configuration yet with respect
* to older one!
*/
*new = (struct NvKmsKapiHeadRequestedConfig) {
.modeSetConfig = old->modeSetConfig,
};
memset(new, 0, sizeof(*new));
new->modeSetConfig = old->modeSetConfig;
for (i = 0; i < ARRAY_SIZE(old->layerRequestedConfig); i++) {
new->layerRequestedConfig[i] = (struct NvKmsKapiLayerRequestedConfig) {
.config = old->layerRequestedConfig[i].config,
};
new->layerRequestedConfig[i].config =
old->layerRequestedConfig[i].config;
}
}

View File

@ -373,19 +373,15 @@ static int nv_drm_create_properties(struct nv_drm_device *nv_dev)
len++;
}
#if defined(NV_LINUX_NVHOST_H_PRESENT) && defined(CONFIG_TEGRA_GRHOST)
if (!nv_dev->supportsSyncpts) {
return 0;
if (nv_dev->supportsSyncpts) {
nv_dev->nv_out_fence_property =
drm_property_create_range(nv_dev->dev, DRM_MODE_PROP_ATOMIC,
"NV_DRM_OUT_FENCE_PTR", 0, U64_MAX);
if (nv_dev->nv_out_fence_property == NULL) {
return -ENOMEM;
}
}
nv_dev->nv_out_fence_property =
drm_property_create_range(nv_dev->dev, DRM_MODE_PROP_ATOMIC,
"NV_DRM_OUT_FENCE_PTR", 0, U64_MAX);
if (nv_dev->nv_out_fence_property == NULL) {
return -ENOMEM;
}
#endif
nv_dev->nv_input_colorspace_property =
drm_property_create_enum(nv_dev->dev, 0, "NV_INPUT_COLORSPACE",
enum_list, len);
@ -480,6 +476,22 @@ static int nv_drm_load(struct drm_device *dev, unsigned long flags)
return -ENODEV;
}
#if defined(NV_DRM_FBDEV_GENERIC_AVAILABLE)
/*
* If fbdev is enabled, take modeset ownership now before other DRM clients
* can take master (and thus NVKMS ownership).
*/
if (nv_drm_fbdev_module_param) {
if (!nvKms->grabOwnership(pDevice)) {
nvKms->freeDevice(pDevice);
NV_DRM_DEV_LOG_ERR(nv_dev, "Failed to grab NVKMS modeset ownership");
return -EBUSY;
}
nv_dev->hasFramebufferConsole = NV_TRUE;
}
#endif
mutex_lock(&nv_dev->lock);
/* Set NvKmsKapiDevice */
@ -590,6 +602,15 @@ static void __nv_drm_unload(struct drm_device *dev)
return;
}
/* Release modeset ownership if fbdev is enabled */
#if defined(NV_DRM_FBDEV_GENERIC_AVAILABLE)
if (nv_dev->hasFramebufferConsole) {
drm_atomic_helper_shutdown(dev);
nvKms->releaseOwnership(nv_dev->pDevice);
}
#endif
cancel_delayed_work_sync(&nv_dev->hotplug_event_work);
mutex_lock(&nv_dev->lock);
@ -781,6 +802,14 @@ static int nv_drm_get_dev_info_ioctl(struct drm_device *dev,
return 0;
}
static int nv_drm_get_drm_file_unique_id_ioctl(struct drm_device *dev,
void *data, struct drm_file *filep)
{
struct drm_nvidia_get_drm_file_unique_id_params *params = data;
params->id = (u64)(filep->driver_priv);
return 0;
}
static int nv_drm_dmabuf_supported_ioctl(struct drm_device *dev,
void *data, struct drm_file *filep)
{
@ -1279,6 +1308,17 @@ static void nv_drm_postclose(struct drm_device *dev, struct drm_file *filep)
}
#endif /* NV_DRM_ATOMIC_MODESET_AVAILABLE */
static int nv_drm_open(struct drm_device *dev, struct drm_file *filep)
{
_Static_assert(sizeof(filep->driver_priv) >= sizeof(u64),
"filep->driver_priv can not hold an u64");
static atomic64_t id = ATOMIC_INIT(0);
filep->driver_priv = (void *)atomic64_inc_return(&id);
return 0;
}
#if defined(NV_DRM_MASTER_HAS_LEASES)
static struct drm_master *nv_drm_find_lessee(struct drm_master *master,
int lessee_id)
@ -1522,6 +1562,9 @@ static const struct drm_ioctl_desc nv_drm_ioctls[] = {
DRM_IOCTL_DEF_DRV(NVIDIA_GET_DEV_INFO,
nv_drm_get_dev_info_ioctl,
DRM_RENDER_ALLOW|DRM_UNLOCKED),
DRM_IOCTL_DEF_DRV(NVIDIA_GET_DRM_FILE_UNIQUE_ID,
nv_drm_get_drm_file_unique_id_ioctl,
DRM_RENDER_ALLOW|DRM_UNLOCKED),
#if defined(NV_DRM_FENCE_AVAILABLE)
DRM_IOCTL_DEF_DRV(NVIDIA_FENCE_SUPPORTED,
@ -1604,6 +1647,9 @@ static struct drm_driver nv_drm_driver = {
.driver_features =
#if defined(NV_DRM_DRIVER_PRIME_FLAG_PRESENT)
DRIVER_PRIME |
#endif
#if defined(NV_DRM_SYNCOBJ_FEATURES_PRESENT)
DRIVER_SYNCOBJ | DRIVER_SYNCOBJ_TIMELINE |
#endif
DRIVER_GEM | DRIVER_RENDER,
@ -1615,14 +1661,14 @@ static struct drm_driver nv_drm_driver = {
.num_ioctls = ARRAY_SIZE(nv_drm_ioctls),
/*
* linux-next commit 71a7974ac701 ("drm/prime: Unexport helpers for fd/handle
* conversion") unexports drm_gem_prime_handle_to_fd() and
* Linux kernel v6.6 commit 71a7974ac701 ("drm/prime: Unexport helpers
* for fd/handle conversion") unexports drm_gem_prime_handle_to_fd() and
* drm_gem_prime_fd_to_handle().
*
* Prior linux-next commit 6b85aa68d9d5 ("drm: Enable PRIME import/export for
* all drivers") made these helpers the default when .prime_handle_to_fd /
* .prime_fd_to_handle are unspecified, so it's fine to just skip specifying
* them if the helpers aren't present.
* Prior Linux kernel v6.6 commit 6b85aa68d9d5 ("drm: Enable PRIME
* import/export for all drivers") made these helpers the default when
* .prime_handle_to_fd / .prime_fd_to_handle are unspecified, so it's fine
* to just skip specifying them if the helpers aren't present.
*/
#if NV_IS_EXPORT_SYMBOL_PRESENT_drm_gem_prime_handle_to_fd
.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
@ -1656,6 +1702,7 @@ static struct drm_driver nv_drm_driver = {
#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
.postclose = nv_drm_postclose,
#endif
.open = nv_drm_open,
.fops = &nv_drm_fops,
@ -1714,6 +1761,7 @@ void nv_drm_register_drm_device(const nv_gpu_info_t *gpu_info)
struct nv_drm_device *nv_dev = NULL;
struct drm_device *dev = NULL;
struct device *device = gpu_info->os_device_ptr;
bool bus_is_pci;
DRM_DEBUG(
"Registering device for NVIDIA GPU ID 0x08%x",
@ -1747,7 +1795,7 @@ void nv_drm_register_drm_device(const nv_gpu_info_t *gpu_info)
dev->dev_private = nv_dev;
nv_dev->dev = dev;
bool bus_is_pci =
bus_is_pci =
#if defined(NV_LINUX)
device->bus == &pci_bus_type;
#elif defined(NV_BSD)
@ -1771,11 +1819,6 @@ void nv_drm_register_drm_device(const nv_gpu_info_t *gpu_info)
if (nv_drm_fbdev_module_param &&
drm_core_check_feature(dev, DRIVER_MODESET)) {
if (!nvKms->grabOwnership(nv_dev->pDevice)) {
NV_DRM_DEV_LOG_ERR(nv_dev, "Failed to grab NVKMS modeset ownership");
goto failed_grab_ownership;
}
if (bus_is_pci) {
struct pci_dev *pdev = to_pci_dev(device);
@ -1786,8 +1829,6 @@ void nv_drm_register_drm_device(const nv_gpu_info_t *gpu_info)
#endif
}
drm_fbdev_generic_setup(dev, 32);
nv_dev->hasFramebufferConsole = NV_TRUE;
}
#endif /* defined(NV_DRM_FBDEV_GENERIC_AVAILABLE) */
@ -1798,12 +1839,6 @@ void nv_drm_register_drm_device(const nv_gpu_info_t *gpu_info)
return; /* Success */
#if defined(NV_DRM_FBDEV_GENERIC_AVAILABLE)
failed_grab_ownership:
drm_dev_unregister(dev);
#endif
failed_drm_register:
nv_drm_dev_free(dev);
@ -1870,12 +1905,6 @@ void nv_drm_remove_devices(void)
struct nv_drm_device *next = dev_list->next;
struct drm_device *dev = dev_list->dev;
#if defined(NV_DRM_FBDEV_GENERIC_AVAILABLE)
if (dev_list->hasFramebufferConsole) {
drm_atomic_helper_shutdown(dev);
nvKms->releaseOwnership(dev_list->pDevice);
}
#endif
drm_dev_unregister(dev);
nv_drm_dev_free(dev);

View File

@ -293,14 +293,12 @@ __nv_drm_prime_fence_context_new(
* to check a return value.
*/
*nv_prime_fence_context = (struct nv_drm_prime_fence_context) {
.base.ops = &nv_drm_prime_fence_context_ops,
.base.nv_dev = nv_dev,
.base.context = nv_dma_fence_context_alloc(1),
.base.fenceSemIndex = p->index,
.pSemSurface = pSemSurface,
.pLinearAddress = pLinearAddress,
};
nv_prime_fence_context->base.ops = &nv_drm_prime_fence_context_ops;
nv_prime_fence_context->base.nv_dev = nv_dev;
nv_prime_fence_context->base.context = nv_dma_fence_context_alloc(1);
nv_prime_fence_context->base.fenceSemIndex = p->index;
nv_prime_fence_context->pSemSurface = pSemSurface;
nv_prime_fence_context->pLinearAddress = pLinearAddress;
INIT_LIST_HEAD(&nv_prime_fence_context->pending);
@ -1261,18 +1259,16 @@ __nv_drm_semsurf_fence_ctx_new(
* to check a return value.
*/
*ctx = (struct nv_drm_semsurf_fence_ctx) {
.base.ops = &nv_drm_semsurf_fence_ctx_ops,
.base.nv_dev = nv_dev,
.base.context = nv_dma_fence_context_alloc(1),
.base.fenceSemIndex = p->index,
.pSemSurface = pSemSurface,
.pSemMapping.pVoid = semMapping,
.pMaxSubmittedMapping = (volatile NvU64 *)maxSubmittedMapping,
.callback.local = NULL,
.callback.nvKms = NULL,
.current_wait_value = 0,
};
ctx->base.ops = &nv_drm_semsurf_fence_ctx_ops;
ctx->base.nv_dev = nv_dev;
ctx->base.context = nv_dma_fence_context_alloc(1);
ctx->base.fenceSemIndex = p->index;
ctx->pSemSurface = pSemSurface;
ctx->pSemMapping.pVoid = semMapping;
ctx->pMaxSubmittedMapping = (volatile NvU64 *)maxSubmittedMapping;
ctx->callback.local = NULL;
ctx->callback.nvKms = NULL;
ctx->current_wait_value = 0;
spin_lock_init(&ctx->lock);
INIT_LIST_HEAD(&ctx->pending_fences);

View File

@ -551,14 +551,12 @@ static struct drm_gem_object *__nv_drm_gem_nvkms_prime_dup(
{
struct nv_drm_device *nv_dev = to_nv_device(dev);
const struct nv_drm_device *nv_dev_src;
const struct nv_drm_gem_nvkms_memory *nv_nvkms_memory_src;
struct nv_drm_gem_nvkms_memory *nv_nvkms_memory;
struct NvKmsKapiMemory *pMemory;
BUG_ON(nv_gem_src == NULL || nv_gem_src->ops != &nv_gem_nvkms_memory_ops);
nv_dev_src = to_nv_device(nv_gem_src->base.dev);
nv_nvkms_memory_src = to_nv_nvkms_memory_const(nv_gem_src);
if ((nv_nvkms_memory =
nv_drm_calloc(1, sizeof(*nv_nvkms_memory))) == NULL) {

View File

@ -45,8 +45,7 @@
/*
* The inclusion of drm_framebuffer.h was removed from drm_crtc.h by commit
* 720cf96d8fecde29b72e1101f8a567a0ce99594f ("drm: Drop drm_framebuffer.h from
* drm_crtc.h") in linux-next, expected in v5.19-rc7.
* 720cf96d8fec ("drm: Drop drm_framebuffer.h from drm_crtc.h") in v6.0.
*
* We only need drm_framebuffer.h for drm_framebuffer_put(), and it is always
* present (v4.9+) when drm_framebuffer_{put,get}() is present (v4.12+), so it

View File

@ -613,8 +613,8 @@ static inline int nv_drm_format_num_planes(uint32_t format)
#endif /* defined(NV_DRM_FORMAT_MODIFIERS_PRESENT) */
/*
* DRM_UNLOCKED was removed with linux-next commit 2798ffcc1d6a ("drm: Remove
* locking for legacy ioctls and DRM_UNLOCKED"), but it was previously made
* DRM_UNLOCKED was removed with commit 2798ffcc1d6a ("drm: Remove locking for
* legacy ioctls and DRM_UNLOCKED") in v6.8, but it was previously made
* implicit for all non-legacy DRM driver IOCTLs since Linux v4.10 commit
* fa5386459f06 "drm: Used DRM_LEGACY for all legacy functions" (Linux v4.4
* commit ea487835e887 "drm: Enforce unlocked ioctl operation for kms driver

View File

@ -52,6 +52,7 @@
#define DRM_NVIDIA_SEMSURF_FENCE_CREATE 0x15
#define DRM_NVIDIA_SEMSURF_FENCE_WAIT 0x16
#define DRM_NVIDIA_SEMSURF_FENCE_ATTACH 0x17
#define DRM_NVIDIA_GET_DRM_FILE_UNIQUE_ID 0x18
#define DRM_IOCTL_NVIDIA_GEM_IMPORT_NVKMS_MEMORY \
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GEM_IMPORT_NVKMS_MEMORY), \
@ -157,6 +158,11 @@
DRM_NVIDIA_SEMSURF_FENCE_ATTACH), \
struct drm_nvidia_semsurf_fence_attach_params)
#define DRM_IOCTL_NVIDIA_GET_DRM_FILE_UNIQUE_ID \
DRM_IOWR((DRM_COMMAND_BASE + \
DRM_NVIDIA_GET_DRM_FILE_UNIQUE_ID), \
struct drm_nvidia_get_drm_file_unique_id_params)
struct drm_nvidia_gem_import_nvkms_memory_params {
uint64_t mem_size; /* IN */
@ -385,4 +391,8 @@ struct drm_nvidia_semsurf_fence_attach_params {
uint64_t wait_value; /* IN Semaphore value to reach before signal */
};
struct drm_nvidia_get_drm_file_unique_id_params {
uint64_t id; /* OUT Unique ID of the DRM file */
};
#endif /* _UAPI_NVIDIA_DRM_IOCTL_H_ */

View File

@ -587,6 +587,9 @@ int nv_drm_atomic_commit(struct drm_device *dev,
NV_DRM_DEV_LOG_ERR(
nv_dev,
"Flip event timeout on head %u", nv_crtc->head);
while (!list_empty(&nv_crtc->flip_list)) {
__nv_drm_handle_flip_event(nv_crtc);
}
}
}
}

View File

@ -128,4 +128,5 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_has_dumb_destroy
NV_CONFTEST_TYPE_COMPILE_TESTS += fence_ops_use_64bit_seqno
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_aperture_remove_conflicting_pci_framebuffers_has_driver_arg
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_mode_create_dp_colorspace_property_has_supported_colorspaces_arg
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_syncobj_features_present
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_unlocked_ioctl_flag_present

View File

@ -77,10 +77,10 @@ module_param_named(disable_hdmi_frl, disable_hdmi_frl, bool, 0400);
static bool disable_vrr_memclk_switch = false;
module_param_named(disable_vrr_memclk_switch, disable_vrr_memclk_switch, bool, 0400);
static bool hdmi_deepcolor = false;
static bool hdmi_deepcolor = true;
module_param_named(hdmi_deepcolor, hdmi_deepcolor, bool, 0400);
static bool vblank_sem_control = false;
static bool vblank_sem_control = true;
module_param_named(vblank_sem_control, vblank_sem_control, bool, 0400);
static bool opportunistic_display_sync = true;
@ -139,6 +139,20 @@ NvBool nvkms_opportunistic_display_sync(void)
return opportunistic_display_sync;
}
NvBool nvkms_kernel_supports_syncpts(void)
{
/*
* Note this only checks that the kernel has the prerequisite
* support for syncpts; callers must also check that the hardware
* supports syncpts.
*/
#if (defined(CONFIG_TEGRA_GRHOST) || defined(NV_LINUX_HOST1X_NEXT_H_PRESENT))
return NV_TRUE;
#else
return NV_FALSE;
#endif
}
#define NVKMS_SYNCPT_STUBS_NEEDED
/*************************************************************************
@ -1234,6 +1248,26 @@ void nvkms_close_from_kapi(struct nvkms_per_open *popen)
nvkms_close_pm_unlocked(popen);
}
NvBool nvkms_ioctl_from_kapi_try_pmlock
(
struct nvkms_per_open *popen,
NvU32 cmd, void *params_address, const size_t param_size
)
{
NvBool ret;
if (nvkms_read_trylock_pm_lock()) {
return NV_FALSE;
}
ret = nvkms_ioctl_common(popen,
cmd,
(NvU64)(NvUPtr)params_address, param_size) == 0;
nvkms_read_unlock_pm_lock();
return ret;
}
NvBool nvkms_ioctl_from_kapi
(
struct nvkms_per_open *popen,

View File

@ -304,6 +304,11 @@ NvU32 nvkms_enumerate_gpus(nv_gpu_info_t *gpu_info);
NvBool nvkms_allow_write_combining(void);
/*!
* Check if OS supports syncpoints.
*/
NvBool nvkms_kernel_supports_syncpts(void);
/*!
* Checks whether the fd is associated with an nvidia character device.
*/
@ -328,6 +333,16 @@ NvBool nvkms_ioctl_from_kapi
NvU32 cmd, void *params_address, const size_t params_size
);
/*!
* Like nvkms_ioctl_from_kapi, but return NV_FALSE instead of waiting if the
* power management read lock cannot be acquired.
*/
NvBool nvkms_ioctl_from_kapi_try_pmlock
(
struct nvkms_per_open *popen,
NvU32 cmd, void *params_address, const size_t params_size
);
/*!
* APIs for locking.
*/

View File

@ -105,3 +105,4 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += list_is_first
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ktime_get_real_ts64
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ktime_get_raw_ts64
NV_CONFTEST_FUNCTION_COMPILE_TESTS += acpi_video_backlight_use_native
NV_CONFTEST_FUNCTION_COMPILE_TESTS += kernel_read_has_pointer_pos_arg

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2013-2023 NVIDIA Corporation
Copyright (c) 2013-2024 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -3463,8 +3463,7 @@ NV_STATUS UvmToolsDestroySession(UvmToolsSessionHandle session);
//
#if UVM_API_REV_IS_AT_MOST(10)
// This is deprecated and replaced by sizeof(UvmToolsEventControlData_V1) or
// sizeof(UvmToolsEventControlData_V2).
// This is deprecated and replaced by sizeof(UvmToolsEventControlData).
NvLength UvmToolsGetEventControlSize(void);
// This is deprecated and replaced by sizeof(UvmEventEntry_V1) or
@ -3488,8 +3487,6 @@ NvLength UvmToolsGetNumberOfCounters(void);
// version: (INPUT)
// Requested version for events or counters.
// See UvmEventEntry_V1 and UvmEventEntry_V2.
// UvmToolsEventControlData_V2::version records the entry version that
// will be generated.
//
// event_buffer: (INPUT)
// User allocated buffer. Must be page-aligned. Must be large enough to
@ -3502,8 +3499,7 @@ NvLength UvmToolsGetNumberOfCounters(void);
//
// event_control (INPUT)
// User allocated buffer. Must be page-aligned. Must be large enough to
// hold UvmToolsEventControlData_V1 if version is UvmEventEntry_V1 or
// UvmToolsEventControlData_V2 (although single page-size allocation
// hold UvmToolsEventControlData (although single page-size allocation
// should be more than enough). Gets pinned until queue is destroyed.
//
// queue: (OUTPUT)

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2018-2023 NVIDIA Corporation
Copyright (c) 2018-2024 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -205,17 +205,18 @@ void uvm_hal_ampere_host_clear_faulted_channel_sw_method(uvm_push_t *push,
CLEAR_FAULTED_B, HWVALUE(C076, CLEAR_FAULTED_B, INST_HI, instance_ptr_hi));
}
// Copy from Pascal, this version sets TLB_INVALIDATE_INVAL_SCOPE.
// Copy from Turing, this version sets TLB_INVALIDATE_INVAL_SCOPE.
void uvm_hal_ampere_host_tlb_invalidate_all(uvm_push_t *push,
uvm_gpu_phys_address_t pdb,
NvU32 depth,
uvm_membar_t membar)
uvm_gpu_phys_address_t pdb,
NvU32 depth,
uvm_membar_t membar)
{
NvU32 aperture_value;
NvU32 page_table_level;
NvU32 pdb_lo;
NvU32 pdb_hi;
NvU32 ack_value = 0;
NvU32 sysmembar_value = 0;
UVM_ASSERT_MSG(pdb.aperture == UVM_APERTURE_VID || pdb.aperture == UVM_APERTURE_SYS, "aperture: %u", pdb.aperture);
@ -230,8 +231,8 @@ void uvm_hal_ampere_host_tlb_invalidate_all(uvm_push_t *push,
pdb_lo = pdb.address & HWMASK(C56F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO);
pdb_hi = pdb.address >> HWSIZE(C56F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO);
// PDE3 is the highest level on Pascal, see the comment in uvm_pascal_mmu.c
// for details.
// PDE3 is the highest level on Pascal-Ampere, see the comment in
// uvm_pascal_mmu.c for details.
UVM_ASSERT_MSG(depth < NVC56F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE3, "depth %u", depth);
page_table_level = NVC56F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE3 - depth;
@ -242,7 +243,12 @@ void uvm_hal_ampere_host_tlb_invalidate_all(uvm_push_t *push,
ack_value = HWCONST(C56F, MEM_OP_C, TLB_INVALIDATE_ACK_TYPE, GLOBALLY);
}
NV_PUSH_4U(C56F, MEM_OP_A, HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS) |
if (membar == UVM_MEMBAR_SYS)
sysmembar_value = HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, EN);
else
sysmembar_value = HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS);
NV_PUSH_4U(C56F, MEM_OP_A, sysmembar_value |
HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_INVAL_SCOPE, NON_LINK_TLBS),
MEM_OP_B, 0,
MEM_OP_C, HWCONST(C56F, MEM_OP_C, TLB_INVALIDATE_PDB, ONE) |
@ -255,16 +261,18 @@ void uvm_hal_ampere_host_tlb_invalidate_all(uvm_push_t *push,
MEM_OP_D, HWCONST(C56F, MEM_OP_D, OPERATION, MMU_TLB_INVALIDATE) |
HWVALUE(C56F, MEM_OP_D, TLB_INVALIDATE_PDB_ADDR_HI, pdb_hi));
uvm_hal_tlb_invalidate_membar(push, membar);
// GPU membar still requires an explicit membar method.
if (membar == UVM_MEMBAR_GPU)
uvm_push_get_gpu(push)->parent->host_hal->membar_gpu(push);
}
// Copy from Volta, this version sets TLB_INVALIDATE_INVAL_SCOPE.
// Copy from Turing, this version sets TLB_INVALIDATE_INVAL_SCOPE.
void uvm_hal_ampere_host_tlb_invalidate_va(uvm_push_t *push,
uvm_gpu_phys_address_t pdb,
NvU32 depth,
NvU64 base,
NvU64 size,
NvU32 page_size,
NvU64 page_size,
uvm_membar_t membar)
{
NvU32 aperture_value;
@ -272,6 +280,7 @@ void uvm_hal_ampere_host_tlb_invalidate_va(uvm_push_t *push,
NvU32 pdb_lo;
NvU32 pdb_hi;
NvU32 ack_value = 0;
NvU32 sysmembar_value = 0;
NvU32 va_lo;
NvU32 va_hi;
NvU64 end;
@ -281,9 +290,9 @@ void uvm_hal_ampere_host_tlb_invalidate_va(uvm_push_t *push,
NvU32 log2_invalidation_size;
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
UVM_ASSERT_MSG(IS_ALIGNED(page_size, 1 << 12), "page_size 0x%x\n", page_size);
UVM_ASSERT_MSG(IS_ALIGNED(base, page_size), "base 0x%llx page_size 0x%x\n", base, page_size);
UVM_ASSERT_MSG(IS_ALIGNED(size, page_size), "size 0x%llx page_size 0x%x\n", size, page_size);
UVM_ASSERT_MSG(IS_ALIGNED(page_size, 1 << 12), "page_size 0x%llx\n", page_size);
UVM_ASSERT_MSG(IS_ALIGNED(base, page_size), "base 0x%llx page_size 0x%llx\n", base, page_size);
UVM_ASSERT_MSG(IS_ALIGNED(size, page_size), "size 0x%llx page_size 0x%llx\n", size, page_size);
UVM_ASSERT_MSG(size > 0, "size 0x%llx\n", size);
// The invalidation size must be a power-of-two number of pages containing
@ -325,7 +334,7 @@ void uvm_hal_ampere_host_tlb_invalidate_va(uvm_push_t *push,
pdb_lo = pdb.address & HWMASK(C56F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO);
pdb_hi = pdb.address >> HWSIZE(C56F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO);
// PDE3 is the highest level on Pascal-Ampere , see the comment in
// PDE3 is the highest level on Pascal-Ampere, see the comment in
// uvm_pascal_mmu.c for details.
UVM_ASSERT_MSG(depth < NVC56F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE3, "depth %u", depth);
page_table_level = NVC56F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE3 - depth;
@ -337,10 +346,15 @@ void uvm_hal_ampere_host_tlb_invalidate_va(uvm_push_t *push,
ack_value = HWCONST(C56F, MEM_OP_C, TLB_INVALIDATE_ACK_TYPE, GLOBALLY);
}
if (membar == UVM_MEMBAR_SYS)
sysmembar_value = HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, EN);
else
sysmembar_value = HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS);
NV_PUSH_4U(C56F, MEM_OP_A, HWVALUE(C56F, MEM_OP_A, TLB_INVALIDATE_INVALIDATION_SIZE, log2_invalidation_size) |
HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS) |
HWVALUE(C56F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO, va_lo) |
HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_INVAL_SCOPE, NON_LINK_TLBS),
HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_INVAL_SCOPE, NON_LINK_TLBS) |
sysmembar_value |
HWVALUE(C56F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO, va_lo),
MEM_OP_B, HWVALUE(C56F, MEM_OP_B, TLB_INVALIDATE_TARGET_ADDR_HI, va_hi),
MEM_OP_C, HWCONST(C56F, MEM_OP_C, TLB_INVALIDATE_PDB, ONE) |
HWVALUE(C56F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO, pdb_lo) |
@ -352,21 +366,23 @@ void uvm_hal_ampere_host_tlb_invalidate_va(uvm_push_t *push,
MEM_OP_D, HWCONST(C56F, MEM_OP_D, OPERATION, MMU_TLB_INVALIDATE_TARGETED) |
HWVALUE(C56F, MEM_OP_D, TLB_INVALIDATE_PDB_ADDR_HI, pdb_hi));
uvm_hal_tlb_invalidate_membar(push, membar);
// GPU membar still requires an explicit membar method.
if (membar == UVM_MEMBAR_GPU)
gpu->parent->host_hal->membar_gpu(push);
}
// Copy from Pascal, this version sets TLB_INVALIDATE_INVAL_SCOPE.
// Copy from Turing, this version sets TLB_INVALIDATE_INVAL_SCOPE.
void uvm_hal_ampere_host_tlb_invalidate_test(uvm_push_t *push,
uvm_gpu_phys_address_t pdb,
UVM_TEST_INVALIDATE_TLB_PARAMS *params)
{
NvU32 ack_value = 0;
NvU32 sysmembar_value = 0;
NvU32 invalidate_gpc_value = 0;
NvU32 aperture_value = 0;
NvU32 pdb_lo = 0;
NvU32 pdb_hi = 0;
NvU32 page_table_level = 0;
uvm_membar_t membar;
UVM_ASSERT_MSG(pdb.aperture == UVM_APERTURE_VID || pdb.aperture == UVM_APERTURE_SYS, "aperture: %u", pdb.aperture);
if (pdb.aperture == UVM_APERTURE_VID)
@ -381,7 +397,7 @@ void uvm_hal_ampere_host_tlb_invalidate_test(uvm_push_t *push,
pdb_hi = pdb.address >> HWSIZE(C56F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO);
if (params->page_table_level != UvmInvalidatePageTableLevelAll) {
// PDE3 is the highest level on Pascal, see the comment in
// PDE3 is the highest level on Pascal-Ampere, see the comment in
// uvm_pascal_mmu.c for details.
page_table_level = min((NvU32)UvmInvalidatePageTableLevelPde3, params->page_table_level) - 1;
}
@ -393,6 +409,11 @@ void uvm_hal_ampere_host_tlb_invalidate_test(uvm_push_t *push,
ack_value = HWCONST(C56F, MEM_OP_C, TLB_INVALIDATE_ACK_TYPE, GLOBALLY);
}
if (params->membar == UvmInvalidateTlbMemBarSys)
sysmembar_value = HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, EN);
else
sysmembar_value = HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS);
if (params->disable_gpc_invalidate)
invalidate_gpc_value = HWCONST(C56F, MEM_OP_C, TLB_INVALIDATE_GPC, DISABLE);
else
@ -403,9 +424,9 @@ void uvm_hal_ampere_host_tlb_invalidate_test(uvm_push_t *push,
NvU32 va_lo = va & HWMASK(C56F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO);
NvU32 va_hi = va >> HWSIZE(C56F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO);
NV_PUSH_4U(C56F, MEM_OP_A, HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS) |
HWVALUE(C56F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO, va_lo) |
HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_INVAL_SCOPE, NON_LINK_TLBS),
NV_PUSH_4U(C56F, MEM_OP_A, sysmembar_value |
HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_INVAL_SCOPE, NON_LINK_TLBS) |
HWVALUE(C56F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO, va_lo),
MEM_OP_B, HWVALUE(C56F, MEM_OP_B, TLB_INVALIDATE_TARGET_ADDR_HI, va_hi),
MEM_OP_C, HWCONST(C56F, MEM_OP_C, TLB_INVALIDATE_REPLAY, NONE) |
HWVALUE(C56F, MEM_OP_C, TLB_INVALIDATE_PAGE_TABLE_LEVEL, page_table_level) |
@ -418,7 +439,7 @@ void uvm_hal_ampere_host_tlb_invalidate_test(uvm_push_t *push,
HWVALUE(C56F, MEM_OP_D, TLB_INVALIDATE_PDB_ADDR_HI, pdb_hi));
}
else {
NV_PUSH_4U(C56F, MEM_OP_A, HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS) |
NV_PUSH_4U(C56F, MEM_OP_A, sysmembar_value |
HWCONST(C56F, MEM_OP_A, TLB_INVALIDATE_INVAL_SCOPE, NON_LINK_TLBS),
MEM_OP_B, 0,
MEM_OP_C, HWCONST(C56F, MEM_OP_C, TLB_INVALIDATE_REPLAY, NONE) |
@ -432,12 +453,7 @@ void uvm_hal_ampere_host_tlb_invalidate_test(uvm_push_t *push,
HWVALUE(C56F, MEM_OP_D, TLB_INVALIDATE_PDB_ADDR_HI, pdb_hi));
}
if (params->membar == UvmInvalidateTlbMemBarSys)
membar = UVM_MEMBAR_SYS;
else if (params->membar == UvmInvalidateTlbMemBarLocal)
membar = UVM_MEMBAR_GPU;
else
membar = UVM_MEMBAR_NONE;
uvm_hal_tlb_invalidate_membar(push, membar);
// GPU membar still requires an explicit membar method.
if (params->membar == UvmInvalidateTlbMemBarLocal)
uvm_push_get_gpu(push)->parent->host_hal->membar_gpu(push);
}

View File

@ -51,7 +51,7 @@ uvm_mmu_engine_type_t uvm_hal_ampere_mmu_engine_id_to_type(NvU16 mmu_engine_id)
return UVM_MMU_ENGINE_TYPE_GRAPHICS;
}
static NvU32 page_table_depth_ampere(NvU32 page_size)
static NvU32 page_table_depth_ampere(NvU64 page_size)
{
// The common-case is page_size == UVM_PAGE_SIZE_2M, hence the first check
if (page_size == UVM_PAGE_SIZE_2M)
@ -62,14 +62,14 @@ static NvU32 page_table_depth_ampere(NvU32 page_size)
return 4;
}
static NvU32 page_sizes_ampere(void)
static NvU64 page_sizes_ampere(void)
{
return UVM_PAGE_SIZE_512M | UVM_PAGE_SIZE_2M | UVM_PAGE_SIZE_64K | UVM_PAGE_SIZE_4K;
}
static uvm_mmu_mode_hal_t ampere_mmu_mode_hal;
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_ampere(NvU32 big_page_size)
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_ampere(NvU64 big_page_size)
{
static bool initialized = false;

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2018-2021 NVIDIA Corporation
Copyright (c) 2018-2024 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2018-2021 NVIDIA Corporation
Copyright (c) 2018-2024 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -29,10 +29,9 @@
#include "uvm_ats_ibm.h"
#include "nv_uvm_types.h"
#include "uvm_lock.h"
#include "uvm_ats_sva.h"
#include "uvm_ats_sva.h"
#define UVM_ATS_SUPPORTED() (UVM_ATS_IBM_SUPPORTED() || UVM_ATS_SVA_SUPPORTED())
#define UVM_ATS_SUPPORTED() (UVM_ATS_IBM_SUPPORTED() || UVM_ATS_SVA_SUPPORTED())
typedef struct
{

View File

@ -1541,14 +1541,14 @@ static uvm_gpfifo_entry_t *uvm_channel_get_first_pending_entry(uvm_channel_t *ch
NV_STATUS uvm_channel_get_status(uvm_channel_t *channel)
{
uvm_gpu_t *gpu;
NvNotification *errorNotifier;
NvNotification *error_notifier;
if (uvm_channel_is_proxy(channel))
errorNotifier = channel->proxy.channel_info.shadowErrorNotifier;
error_notifier = channel->proxy.channel_info.shadowErrorNotifier;
else
errorNotifier = channel->channel_info.errorNotifier;
error_notifier = channel->channel_info.errorNotifier;
if (errorNotifier->status == 0)
if (error_notifier->status == 0)
return NV_OK;
// In case we hit a channel error, check the ECC error notifier as well so
@ -2584,16 +2584,18 @@ out:
// Return the pool corresponding to the given CE index
//
// This function cannot be used to access the proxy pool in SR-IOV heavy.
// Used to retrieve pools of type UVM_CHANNEL_POOL_TYPE_CE only.
static uvm_channel_pool_t *channel_manager_ce_pool(uvm_channel_manager_t *manager, NvU32 ce)
{
uvm_channel_pool_t *pool;
uvm_channel_pool_t *pool = uvm_channel_pool_first(manager, UVM_CHANNEL_POOL_TYPE_CE);
UVM_ASSERT(pool != NULL);
UVM_ASSERT(test_bit(ce, manager->ce_mask));
// The index of the pool associated with 'ce' is the number of usable CEs
// in [0, ce)
pool = manager->channel_pools + bitmap_weight(manager->ce_mask, ce);
// Pools of type UVM_CHANNEL_POOL_TYPE_CE are stored contiguously. The
// offset of the pool associated with 'ce' is the number of usable CEs in
// [0, ce).
pool += bitmap_weight(manager->ce_mask, ce);
UVM_ASSERT(pool->pool_type == UVM_CHANNEL_POOL_TYPE_CE);
UVM_ASSERT(pool->engine_index == ce);
@ -2811,6 +2813,7 @@ static unsigned channel_manager_get_max_pools(uvm_channel_manager_t *manager)
static NV_STATUS channel_manager_create_ce_pools(uvm_channel_manager_t *manager, unsigned *preferred_ce)
{
unsigned ce;
unsigned type;
// A pool is created for each usable CE, even if it has not been selected as
// the preferred CE for any type, because as more information is discovered
@ -2818,18 +2821,20 @@ static NV_STATUS channel_manager_create_ce_pools(uvm_channel_manager_t *manager,
// previously idle pools.
for_each_set_bit(ce, manager->ce_mask, UVM_COPY_ENGINE_COUNT_MAX) {
NV_STATUS status;
unsigned type;
uvm_channel_pool_t *pool = NULL;
status = channel_pool_add(manager, UVM_CHANNEL_POOL_TYPE_CE, ce, &pool);
if (status != NV_OK)
return status;
}
for (type = 0; type < UVM_CHANNEL_TYPE_CE_COUNT; type++) {
// Set pool type if it hasn't been set before.
if (preferred_ce[type] == ce && manager->pool_to_use.default_for_type[type] == NULL)
manager->pool_to_use.default_for_type[type] = pool;
}
for (type = 0; type < UVM_CHANNEL_TYPE_CE_COUNT; type++) {
// Avoid overwriting previously set defaults.
if (manager->pool_to_use.default_for_type[type] != NULL)
continue;
ce = preferred_ce[type];
manager->pool_to_use.default_for_type[type] = channel_manager_ce_pool(manager, ce);
}
return NV_OK;

View File

@ -218,8 +218,9 @@ static NV_STATUS alloc_and_init_address_space(uvm_gpu_t *gpu)
if (status != NV_OK)
return status;
gpu->big_page.internal_size = gpu_address_space_info.bigPageSize;
UVM_ASSERT(gpu_address_space_info.bigPageSize <= NV_U32_MAX);
gpu->big_page.internal_size = gpu_address_space_info.bigPageSize;
gpu->time.time0_register = gpu_address_space_info.time0Offset;
gpu->time.time1_register = gpu_address_space_info.time1Offset;
@ -458,6 +459,7 @@ static const char *uvm_gpu_virt_type_string(UVM_VIRT_MODE virtMode)
static const char *uvm_gpu_link_type_string(uvm_gpu_link_type_t link_type)
{
BUILD_BUG_ON(UVM_GPU_LINK_MAX != 7);
switch (link_type) {
@ -1082,9 +1084,6 @@ static NV_STATUS configure_address_space(uvm_gpu_t *gpu)
gpu->parent->rm_va_size,
va_per_entry);
UVM_ASSERT(uvm_mmu_page_size_supported(&gpu->address_space_tree, gpu->big_page.internal_size));
UVM_ASSERT(uvm_mmu_page_size_supported(&gpu->address_space_tree, gpu->mem_info.max_vidmem_page_size));
tree_alloc = uvm_page_tree_pdb(&gpu->address_space_tree);
status = uvm_rm_locked_call(nvUvmInterfaceSetPageDirectory(gpu->rm_address_space,
tree_alloc->addr.address,
@ -2364,9 +2363,7 @@ static NV_STATUS init_peer_access(uvm_gpu_t *gpu0,
// check for peer-to-peer compatibility (PCI-E or NvLink).
peer_caps->link_type = get_gpu_link_type(p2p_caps_params->p2pLink);
if (peer_caps->link_type == UVM_GPU_LINK_INVALID
|| peer_caps->link_type == UVM_GPU_LINK_C2C
)
if (peer_caps->link_type == UVM_GPU_LINK_INVALID || peer_caps->link_type == UVM_GPU_LINK_C2C)
return NV_ERR_NOT_SUPPORTED;
peer_caps->total_link_line_rate_mbyte_per_s = p2p_caps_params->totalLinkLineRateMBps;
@ -3296,7 +3293,10 @@ void uvm_parent_gpu_dma_free_page(uvm_parent_gpu_t *parent_gpu, void *va, NvU64
atomic64_sub(PAGE_SIZE, &parent_gpu->mapped_cpu_pages_size);
}
NV_STATUS uvm_parent_gpu_map_cpu_pages(uvm_parent_gpu_t *parent_gpu, struct page *page, size_t size, NvU64 *dma_address_out)
NV_STATUS uvm_parent_gpu_map_cpu_pages(uvm_parent_gpu_t *parent_gpu,
struct page *page,
size_t size,
NvU64 *dma_address_out)
{
NvU64 dma_addr;

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2023 NVIDIA Corporation
Copyright (c) 2015-2024 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2023 NVIDIA Corporation
Copyright (c) 2015-2024 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -251,6 +251,9 @@ static uvm_hal_class_ops_t host_table[] =
.semaphore_release = uvm_hal_turing_host_semaphore_release,
.clear_faulted_channel_method = uvm_hal_turing_host_clear_faulted_channel_method,
.set_gpfifo_entry = uvm_hal_turing_host_set_gpfifo_entry,
.tlb_invalidate_all = uvm_hal_turing_host_tlb_invalidate_all,
.tlb_invalidate_va = uvm_hal_turing_host_tlb_invalidate_va,
.tlb_invalidate_test = uvm_hal_turing_host_tlb_invalidate_test,
}
},
{
@ -632,13 +635,19 @@ NV_STATUS uvm_hal_init_table(void)
return status;
}
status = ops_init_from_parent(host_table, ARRAY_SIZE(host_table), HOST_OP_COUNT, offsetof(uvm_hal_class_ops_t, u.host_ops));
status = ops_init_from_parent(host_table,
ARRAY_SIZE(host_table),
HOST_OP_COUNT,
offsetof(uvm_hal_class_ops_t, u.host_ops));
if (status != NV_OK) {
UVM_ERR_PRINT("ops_init_from_parent(host_table) failed: %s\n", nvstatusToString(status));
return status;
}
status = ops_init_from_parent(arch_table, ARRAY_SIZE(arch_table), ARCH_OP_COUNT, offsetof(uvm_hal_class_ops_t, u.arch_ops));
status = ops_init_from_parent(arch_table,
ARRAY_SIZE(arch_table),
ARCH_OP_COUNT,
offsetof(uvm_hal_class_ops_t, u.arch_ops));
if (status != NV_OK) {
UVM_ERR_PRINT("ops_init_from_parent(arch_table) failed: %s\n", nvstatusToString(status));
return status;
@ -932,14 +941,16 @@ const char *uvm_mmu_engine_type_string(uvm_mmu_engine_type_t mmu_engine_type)
void uvm_hal_print_fault_entry(const uvm_fault_buffer_entry_t *entry)
{
UVM_DBG_PRINT("fault_address: 0x%llx\n", entry->fault_address);
UVM_DBG_PRINT(" fault_instance_ptr: {0x%llx:%s}\n", entry->instance_ptr.address,
uvm_aperture_string(entry->instance_ptr.aperture));
UVM_DBG_PRINT(" fault_instance_ptr: {0x%llx:%s}\n",
entry->instance_ptr.address,
uvm_aperture_string(entry->instance_ptr.aperture));
UVM_DBG_PRINT(" fault_type: %s\n", uvm_fault_type_string(entry->fault_type));
UVM_DBG_PRINT(" fault_access_type: %s\n", uvm_fault_access_type_string(entry->fault_access_type));
UVM_DBG_PRINT(" is_replayable: %s\n", entry->is_replayable? "true": "false");
UVM_DBG_PRINT(" is_virtual: %s\n", entry->is_virtual? "true": "false");
UVM_DBG_PRINT(" in_protected_mode: %s\n", entry->in_protected_mode? "true": "false");
UVM_DBG_PRINT(" fault_source.client_type: %s\n", uvm_fault_client_type_string(entry->fault_source.client_type));
UVM_DBG_PRINT(" fault_source.client_type: %s\n",
uvm_fault_client_type_string(entry->fault_source.client_type));
UVM_DBG_PRINT(" fault_source.client_id: %d\n", entry->fault_source.client_id);
UVM_DBG_PRINT(" fault_source.gpc_id: %d\n", entry->fault_source.gpc_id);
UVM_DBG_PRINT(" fault_source.mmu_engine_id: %d\n", entry->fault_source.mmu_engine_id);
@ -962,13 +973,15 @@ const char *uvm_access_counter_type_string(uvm_access_counter_type_t access_coun
void uvm_hal_print_access_counter_buffer_entry(const uvm_access_counter_buffer_entry_t *entry)
{
if (!entry->address.is_virtual) {
UVM_DBG_PRINT("physical address: {0x%llx:%s}\n", entry->address.address,
uvm_aperture_string(entry->address.aperture));
UVM_DBG_PRINT("physical address: {0x%llx:%s}\n",
entry->address.address,
uvm_aperture_string(entry->address.aperture));
}
else {
UVM_DBG_PRINT("virtual address: 0x%llx\n", entry->address.address);
UVM_DBG_PRINT(" instance_ptr {0x%llx:%s}\n", entry->virtual_info.instance_ptr.address,
uvm_aperture_string(entry->virtual_info.instance_ptr.aperture));
UVM_DBG_PRINT(" instance_ptr {0x%llx:%s}\n",
entry->virtual_info.instance_ptr.address,
uvm_aperture_string(entry->virtual_info.instance_ptr.aperture));
UVM_DBG_PRINT(" mmu_engine_type %s\n", uvm_mmu_engine_type_string(entry->virtual_info.mmu_engine_type));
UVM_DBG_PRINT(" mmu_engine_id %u\n", entry->virtual_info.mmu_engine_id);
UVM_DBG_PRINT(" ve_id %u\n", entry->virtual_info.ve_id);

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2023 NVIDIA Corporation
Copyright (c) 2015-2024 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -112,6 +112,10 @@ void uvm_hal_pascal_host_tlb_invalidate_all(uvm_push_t *push,
uvm_gpu_phys_address_t pdb,
NvU32 depth,
uvm_membar_t membar);
void uvm_hal_turing_host_tlb_invalidate_all(uvm_push_t *push,
uvm_gpu_phys_address_t pdb,
NvU32 depth,
uvm_membar_t membar);
void uvm_hal_ampere_host_tlb_invalidate_all(uvm_push_t *push,
uvm_gpu_phys_address_t pdb,
NvU32 depth,
@ -149,42 +153,49 @@ typedef void (*uvm_hal_host_tlb_invalidate_va_t)(uvm_push_t *push,
NvU32 depth,
NvU64 base,
NvU64 size,
NvU32 page_size,
NvU64 page_size,
uvm_membar_t membar);
void uvm_hal_maxwell_host_tlb_invalidate_va(uvm_push_t *push,
uvm_gpu_phys_address_t pdb,
NvU32 depth,
NvU64 base,
NvU64 size,
NvU32 page_size,
NvU64 page_size,
uvm_membar_t membar);
void uvm_hal_pascal_host_tlb_invalidate_va(uvm_push_t *push,
uvm_gpu_phys_address_t pdb,
NvU32 depth,
NvU64 base,
NvU64 size,
NvU32 page_size,
NvU64 page_size,
uvm_membar_t membar);
void uvm_hal_volta_host_tlb_invalidate_va(uvm_push_t *push,
uvm_gpu_phys_address_t pdb,
NvU32 depth,
NvU64 base,
NvU64 size,
NvU32 page_size,
NvU64 page_size,
uvm_membar_t membar);
void uvm_hal_turing_host_tlb_invalidate_va(uvm_push_t *push,
uvm_gpu_phys_address_t pdb,
NvU32 depth,
NvU64 base,
NvU64 size,
NvU64 page_size,
uvm_membar_t membar);
void uvm_hal_ampere_host_tlb_invalidate_va(uvm_push_t *push,
uvm_gpu_phys_address_t pdb,
NvU32 depth,
NvU64 base,
NvU64 size,
NvU32 page_size,
NvU64 page_size,
uvm_membar_t membar);
void uvm_hal_hopper_host_tlb_invalidate_va(uvm_push_t *push,
uvm_gpu_phys_address_t pdb,
NvU32 depth,
NvU64 base,
NvU64 size,
NvU32 page_size,
NvU64 page_size,
uvm_membar_t membar);
typedef void (*uvm_hal_host_tlb_invalidate_test_t)(uvm_push_t *push,
@ -196,6 +207,9 @@ void uvm_hal_maxwell_host_tlb_invalidate_test(uvm_push_t *push,
void uvm_hal_pascal_host_tlb_invalidate_test(uvm_push_t *push,
uvm_gpu_phys_address_t pdb,
UVM_TEST_INVALIDATE_TLB_PARAMS *params);
void uvm_hal_turing_host_tlb_invalidate_test(uvm_push_t *push,
uvm_gpu_phys_address_t pdb,
UVM_TEST_INVALIDATE_TLB_PARAMS *params);
void uvm_hal_ampere_host_tlb_invalidate_test(uvm_push_t *push,
uvm_gpu_phys_address_t pdb,
UVM_TEST_INVALIDATE_TLB_PARAMS *params);
@ -445,15 +459,15 @@ void uvm_hal_ada_arch_init_properties(uvm_parent_gpu_t *parent_gpu);
void uvm_hal_hopper_arch_init_properties(uvm_parent_gpu_t *parent_gpu);
// Retrieve the page-tree HAL for a given big page size
typedef uvm_mmu_mode_hal_t *(*uvm_hal_lookup_mode_hal_t)(NvU32 big_page_size);
typedef uvm_mmu_mode_hal_t *(*uvm_hal_lookup_mode_hal_t)(NvU64 big_page_size);
typedef void (*uvm_hal_mmu_enable_prefetch_faults_t)(uvm_parent_gpu_t *parent_gpu);
typedef void (*uvm_hal_mmu_disable_prefetch_faults_t)(uvm_parent_gpu_t *parent_gpu);
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_maxwell(NvU32 big_page_size);
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_pascal(NvU32 big_page_size);
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_volta(NvU32 big_page_size);
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_turing(NvU32 big_page_size);
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_ampere(NvU32 big_page_size);
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_hopper(NvU32 big_page_size);
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_maxwell(NvU64 big_page_size);
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_pascal(NvU64 big_page_size);
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_volta(NvU64 big_page_size);
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_turing(NvU64 big_page_size);
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_ampere(NvU64 big_page_size);
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_hopper(NvU64 big_page_size);
void uvm_hal_maxwell_mmu_enable_prefetch_faults_unsupported(uvm_parent_gpu_t *parent_gpu);
void uvm_hal_maxwell_mmu_disable_prefetch_faults_unsupported(uvm_parent_gpu_t *parent_gpu);
void uvm_hal_pascal_mmu_enable_prefetch_faults(uvm_parent_gpu_t *parent_gpu);

View File

@ -1599,7 +1599,7 @@ static void hmm_va_block_cpu_unpopulate_chunk(uvm_va_block_t *va_block,
UVM_ASSERT(uvm_cpu_chunk_get_size(chunk) == PAGE_SIZE);
uvm_cpu_chunk_remove_from_block(va_block, chunk_nid, page_index);
uvm_va_block_unmap_cpu_chunk_on_gpus(va_block, chunk, page_index);
uvm_va_block_unmap_cpu_chunk_on_gpus(va_block, chunk);
uvm_cpu_chunk_free(chunk);
}

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2020-2022 NVIDIA Corporation
Copyright (c) 2020-2024 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -157,6 +157,7 @@ void uvm_hal_hopper_host_tlb_invalidate_all(uvm_push_t *push,
NvU32 pdb_lo;
NvU32 pdb_hi;
NvU32 ack_value = 0;
NvU32 sysmembar_value = 0;
UVM_ASSERT_MSG(pdb.aperture == UVM_APERTURE_VID || pdb.aperture == UVM_APERTURE_SYS, "aperture: %u", pdb.aperture);
@ -183,7 +184,12 @@ void uvm_hal_hopper_host_tlb_invalidate_all(uvm_push_t *push,
ack_value = HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_ACK_TYPE, GLOBALLY);
}
NV_PUSH_4U(C86F, MEM_OP_A, HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS) |
if (membar == UVM_MEMBAR_SYS)
sysmembar_value = HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, EN);
else
sysmembar_value = HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS);
NV_PUSH_4U(C86F, MEM_OP_A, sysmembar_value |
HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_INVAL_SCOPE, NON_LINK_TLBS),
MEM_OP_B, 0,
MEM_OP_C, HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_PDB, ONE) |
@ -196,7 +202,9 @@ void uvm_hal_hopper_host_tlb_invalidate_all(uvm_push_t *push,
MEM_OP_D, HWCONST(C86F, MEM_OP_D, OPERATION, MMU_TLB_INVALIDATE) |
HWVALUE(C86F, MEM_OP_D, TLB_INVALIDATE_PDB_ADDR_HI, pdb_hi));
uvm_hal_tlb_invalidate_membar(push, membar);
// GPU membar still requires an explicit membar method.
if (membar == UVM_MEMBAR_GPU)
uvm_push_get_gpu(push)->parent->host_hal->membar_gpu(push);
}
void uvm_hal_hopper_host_tlb_invalidate_va(uvm_push_t *push,
@ -204,7 +212,7 @@ void uvm_hal_hopper_host_tlb_invalidate_va(uvm_push_t *push,
NvU32 depth,
NvU64 base,
NvU64 size,
NvU32 page_size,
NvU64 page_size,
uvm_membar_t membar)
{
NvU32 aperture_value;
@ -212,6 +220,7 @@ void uvm_hal_hopper_host_tlb_invalidate_va(uvm_push_t *push,
NvU32 pdb_lo;
NvU32 pdb_hi;
NvU32 ack_value = 0;
NvU32 sysmembar_value = 0;
NvU32 va_lo;
NvU32 va_hi;
NvU64 end;
@ -221,9 +230,9 @@ void uvm_hal_hopper_host_tlb_invalidate_va(uvm_push_t *push,
NvU32 log2_invalidation_size;
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
UVM_ASSERT_MSG(IS_ALIGNED(page_size, 1 << 12), "page_size 0x%x\n", page_size);
UVM_ASSERT_MSG(IS_ALIGNED(base, page_size), "base 0x%llx page_size 0x%x\n", base, page_size);
UVM_ASSERT_MSG(IS_ALIGNED(size, page_size), "size 0x%llx page_size 0x%x\n", size, page_size);
UVM_ASSERT_MSG(IS_ALIGNED(page_size, 1 << 12), "page_size 0x%llx\n", page_size);
UVM_ASSERT_MSG(IS_ALIGNED(base, page_size), "base 0x%llx page_size 0x%llx\n", base, page_size);
UVM_ASSERT_MSG(IS_ALIGNED(size, page_size), "size 0x%llx page_size 0x%llx\n", size, page_size);
UVM_ASSERT_MSG(size > 0, "size 0x%llx\n", size);
// The invalidation size must be a power-of-two number of pages containing
@ -277,8 +286,13 @@ void uvm_hal_hopper_host_tlb_invalidate_va(uvm_push_t *push,
ack_value = HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_ACK_TYPE, GLOBALLY);
}
if (membar == UVM_MEMBAR_SYS)
sysmembar_value = HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, EN);
else
sysmembar_value = HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS);
NV_PUSH_4U(C86F, MEM_OP_A, HWVALUE(C86F, MEM_OP_A, TLB_INVALIDATE_INVALIDATION_SIZE, log2_invalidation_size) |
HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS) |
sysmembar_value |
HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_INVAL_SCOPE, NON_LINK_TLBS) |
HWVALUE(C86F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO, va_lo),
MEM_OP_B, HWVALUE(C86F, MEM_OP_B, TLB_INVALIDATE_TARGET_ADDR_HI, va_hi),
@ -292,7 +306,9 @@ void uvm_hal_hopper_host_tlb_invalidate_va(uvm_push_t *push,
MEM_OP_D, HWCONST(C86F, MEM_OP_D, OPERATION, MMU_TLB_INVALIDATE_TARGETED) |
HWVALUE(C86F, MEM_OP_D, TLB_INVALIDATE_PDB_ADDR_HI, pdb_hi));
uvm_hal_tlb_invalidate_membar(push, membar);
// GPU membar still requires an explicit membar method.
if (membar == UVM_MEMBAR_GPU)
gpu->parent->host_hal->membar_gpu(push);
}
void uvm_hal_hopper_host_tlb_invalidate_test(uvm_push_t *push,
@ -300,12 +316,12 @@ void uvm_hal_hopper_host_tlb_invalidate_test(uvm_push_t *push,
UVM_TEST_INVALIDATE_TLB_PARAMS *params)
{
NvU32 ack_value = 0;
NvU32 sysmembar_value = 0;
NvU32 invalidate_gpc_value = 0;
NvU32 aperture_value = 0;
NvU32 pdb_lo = 0;
NvU32 pdb_hi = 0;
NvU32 page_table_level = 0;
uvm_membar_t membar;
UVM_ASSERT_MSG(pdb.aperture == UVM_APERTURE_VID || pdb.aperture == UVM_APERTURE_SYS, "aperture: %u", pdb.aperture);
if (pdb.aperture == UVM_APERTURE_VID)
@ -332,6 +348,11 @@ void uvm_hal_hopper_host_tlb_invalidate_test(uvm_push_t *push,
ack_value = HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_ACK_TYPE, GLOBALLY);
}
if (params->membar == UvmInvalidateTlbMemBarSys)
sysmembar_value = HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, EN);
else
sysmembar_value = HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS);
if (params->disable_gpc_invalidate)
invalidate_gpc_value = HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_GPC, DISABLE);
else
@ -343,7 +364,7 @@ void uvm_hal_hopper_host_tlb_invalidate_test(uvm_push_t *push,
NvU32 va_lo = va & HWMASK(C86F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO);
NvU32 va_hi = va >> HWSIZE(C86F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO);
NV_PUSH_4U(C86F, MEM_OP_A, HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS) |
NV_PUSH_4U(C86F, MEM_OP_A, sysmembar_value |
HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_INVAL_SCOPE, NON_LINK_TLBS) |
HWVALUE(C86F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO, va_lo),
MEM_OP_B, HWVALUE(C86F, MEM_OP_B, TLB_INVALIDATE_TARGET_ADDR_HI, va_hi),
@ -358,7 +379,7 @@ void uvm_hal_hopper_host_tlb_invalidate_test(uvm_push_t *push,
HWVALUE(C86F, MEM_OP_D, TLB_INVALIDATE_PDB_ADDR_HI, pdb_hi));
}
else {
NV_PUSH_4U(C86F, MEM_OP_A, HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS) |
NV_PUSH_4U(C86F, MEM_OP_A, sysmembar_value |
HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_INVAL_SCOPE, NON_LINK_TLBS),
MEM_OP_B, 0,
MEM_OP_C, HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_REPLAY, NONE) |
@ -372,14 +393,9 @@ void uvm_hal_hopper_host_tlb_invalidate_test(uvm_push_t *push,
HWVALUE(C86F, MEM_OP_D, TLB_INVALIDATE_PDB_ADDR_HI, pdb_hi));
}
if (params->membar == UvmInvalidateTlbMemBarSys)
membar = UVM_MEMBAR_SYS;
else if (params->membar == UvmInvalidateTlbMemBarLocal)
membar = UVM_MEMBAR_GPU;
else
membar = UVM_MEMBAR_NONE;
uvm_hal_tlb_invalidate_membar(push, membar);
// GPU membar still requires an explicit membar method.
if (params->membar == UvmInvalidateTlbMemBarLocal)
uvm_push_get_gpu(push)->parent->host_hal->membar_gpu(push);
}
void uvm_hal_hopper_host_set_gpfifo_pushbuffer_segment_base(NvU64 *fifo_entry, NvU64 pushbuffer_va)

View File

@ -61,7 +61,7 @@ uvm_mmu_engine_type_t uvm_hal_hopper_mmu_engine_id_to_type(NvU16 mmu_engine_id)
return UVM_MMU_ENGINE_TYPE_GRAPHICS;
}
static NvU32 page_table_depth_hopper(NvU32 page_size)
static NvU32 page_table_depth_hopper(NvU64 page_size)
{
// The common-case is page_size == UVM_PAGE_SIZE_2M, hence the first check
if (page_size == UVM_PAGE_SIZE_2M)
@ -79,7 +79,7 @@ static NvU32 entries_per_index_hopper(NvU32 depth)
return 1;
}
static NvLength entry_offset_hopper(NvU32 depth, NvU32 page_size)
static NvLength entry_offset_hopper(NvU32 depth, NvU64 page_size)
{
UVM_ASSERT(depth < 6);
if ((page_size == UVM_PAGE_SIZE_4K) && (depth == 4))
@ -92,7 +92,7 @@ static NvLength entry_size_hopper(NvU32 depth)
return entries_per_index_hopper(depth) * 8;
}
static NvU32 index_bits_hopper(NvU32 depth, NvU32 page_size)
static NvU32 index_bits_hopper(NvU32 depth, NvU64 page_size)
{
static const NvU32 bit_widths[] = {1, 9, 9, 9, 8};
@ -120,7 +120,7 @@ static NvU32 num_va_bits_hopper(void)
return 57;
}
static NvLength allocation_size_hopper(NvU32 depth, NvU32 page_size)
static NvLength allocation_size_hopper(NvU32 depth, NvU64 page_size)
{
UVM_ASSERT(depth < 6);
if (depth == 5 && page_size == UVM_PAGE_SIZE_64K)
@ -233,7 +233,7 @@ static NvU64 make_sparse_pte_hopper(void)
HWCONST64(_MMU_VER3, PTE, PCF, SPARSE);
}
static NvU64 unmapped_pte_hopper(NvU32 page_size)
static NvU64 unmapped_pte_hopper(NvU64 page_size)
{
// Setting PCF to NO_VALID_4KB_PAGE on an otherwise-zeroed big PTE causes
// the corresponding 4k PTEs to be ignored. This allows the invalidation of
@ -490,7 +490,7 @@ static void make_pde_hopper(void *entry,
static uvm_mmu_mode_hal_t hopper_mmu_mode_hal;
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_hopper(NvU32 big_page_size)
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_hopper(NvU64 big_page_size)
{
static bool initialized = false;

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2013-2023 NVidia Corporation
Copyright (c) 2013-2024 NVidia Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -494,7 +494,7 @@ typedef struct
NvU64 base NV_ALIGN_BYTES(8); // IN
NvU64 length NV_ALIGN_BYTES(8); // IN
NvU64 offset NV_ALIGN_BYTES(8); // IN
UvmGpuMappingAttributes perGpuAttributes[UVM_MAX_GPUS_V2]; // IN
UvmGpuMappingAttributes perGpuAttributes[UVM_MAX_GPUS]; // IN
NvU64 gpuAttributesCount NV_ALIGN_BYTES(8); // IN
NvS32 rmCtrlFd; // IN
NvU32 hClient; // IN
@ -952,7 +952,6 @@ typedef struct
NvU32 version; // OUT
} UVM_TOOLS_GET_PROCESSOR_UUID_TABLE_PARAMS;
//
// UvmMapDynamicParallelismRegion
//
@ -995,7 +994,7 @@ typedef struct
{
NvU64 base NV_ALIGN_BYTES(8); // IN
NvU64 length NV_ALIGN_BYTES(8); // IN
UvmGpuMappingAttributes perGpuAttributes[UVM_MAX_GPUS_V2]; // IN
UvmGpuMappingAttributes perGpuAttributes[UVM_MAX_GPUS]; // IN
NvU64 gpuAttributesCount NV_ALIGN_BYTES(8); // IN
NV_STATUS rmStatus; // OUT
} UVM_ALLOC_SEMAPHORE_POOL_PARAMS;

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2016-2023 NVIDIA Corporation
Copyright (c) 2016-2024 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -39,6 +39,7 @@
#include "uvm_pte_batch.h"
#include "uvm_tlb_batch.h"
#include "nv_uvm_interface.h"
#include "nv_uvm_types.h"
#include "uvm_pushbuffer.h"
@ -60,7 +61,7 @@ typedef struct
size_t buffer_size;
// Page size in bytes
NvU32 page_size;
NvU64 page_size;
// Size of a single PTE in bytes
NvU32 pte_size;
@ -90,7 +91,7 @@ static NV_STATUS uvm_pte_buffer_init(uvm_va_range_t *va_range,
uvm_gpu_t *gpu,
const uvm_map_rm_params_t *map_rm_params,
NvU64 length,
NvU32 page_size,
NvU64 page_size,
uvm_pte_buffer_t *pte_buffer)
{
uvm_gpu_va_space_t *gpu_va_space = uvm_gpu_va_space_get(va_range->va_space, gpu);
@ -101,11 +102,11 @@ static NV_STATUS uvm_pte_buffer_init(uvm_va_range_t *va_range,
pte_buffer->va_range = va_range;
pte_buffer->gpu = gpu;
pte_buffer->mapping_info.cachingType = map_rm_params->caching_type;
pte_buffer->mapping_info.mappingType = map_rm_params->mapping_type;
pte_buffer->mapping_info.formatType = map_rm_params->format_type;
pte_buffer->mapping_info.elementBits = map_rm_params->element_bits;
pte_buffer->mapping_info.compressionType = map_rm_params->compression_type;
pte_buffer->mapping_info.cachingType = (UvmRmGpuCachingType) map_rm_params->caching_type;
pte_buffer->mapping_info.mappingType = (UvmRmGpuMappingType) map_rm_params->mapping_type;
pte_buffer->mapping_info.formatType = (UvmRmGpuFormatType) map_rm_params->format_type;
pte_buffer->mapping_info.elementBits = (UvmRmGpuFormatElementBits) map_rm_params->element_bits;
pte_buffer->mapping_info.compressionType = (UvmRmGpuCompressionType) map_rm_params->compression_type;
if (va_range->type == UVM_VA_RANGE_TYPE_EXTERNAL)
pte_buffer->mapping_info.mappingPageSize = page_size;
@ -649,9 +650,7 @@ static NV_STATUS set_ext_gpu_map_location(uvm_ext_gpu_map_t *ext_gpu_map,
return NV_OK;
}
// This is a local or peer allocation, so the owning GPU must have been
// registered.
// This also checks for if EGM owning GPU is registered.
// registered. This also checks for if EGM owning GPU is registered.
owning_gpu = uvm_va_space_get_gpu_by_uuid(va_space, &mem_info->uuid);
if (!owning_gpu)
return NV_ERR_INVALID_DEVICE;
@ -664,7 +663,6 @@ static NV_STATUS set_ext_gpu_map_location(uvm_ext_gpu_map_t *ext_gpu_map,
// semantics of sysmem allocations.
// Check if peer access for peer memory is enabled.
// This path also handles EGM allocations.
if (owning_gpu != mapping_gpu && (!mem_info->sysmem || mem_info->egm)) {
// TODO: Bug 1757136: In SLI, the returned UUID may be different but a
// local mapping must be used. We need to query SLI groups to know
@ -855,9 +853,10 @@ static NV_STATUS uvm_map_external_allocation_on_gpu(uvm_va_range_t *va_range,
uvm_ext_gpu_range_tree_t *range_tree = uvm_ext_gpu_range_tree(va_range, mapping_gpu);
UvmGpuMemoryInfo mem_info;
uvm_gpu_va_space_t *gpu_va_space = uvm_gpu_va_space_get(va_space, mapping_gpu);
NvU32 mapping_page_size;
NvU64 mapping_page_size;
NvU64 biggest_mapping_page_size;
NvU64 alignments;
NvU32 smallest_alignment;
NvU64 smallest_alignment;
NV_STATUS status;
uvm_assert_rwsem_locked_read(&va_space->lock);
@ -946,9 +945,11 @@ static NV_STATUS uvm_map_external_allocation_on_gpu(uvm_va_range_t *va_range,
// Check for the maximum page size for the mapping of vidmem allocations,
// the vMMU segment size may limit the range of page sizes.
biggest_mapping_page_size = uvm_mmu_biggest_page_size_up_to(&gpu_va_space->page_tables,
mapping_gpu->mem_info.max_vidmem_page_size);
if (!ext_gpu_map->is_sysmem && (ext_gpu_map->gpu == ext_gpu_map->owning_gpu) &&
(mapping_page_size > mapping_gpu->mem_info.max_vidmem_page_size))
mapping_page_size = mapping_gpu->mem_info.max_vidmem_page_size;
(mapping_page_size > biggest_mapping_page_size))
mapping_page_size = biggest_mapping_page_size;
mem_info.pageSize = mapping_page_size;
@ -985,7 +986,7 @@ static NV_STATUS uvm_map_external_allocation(uvm_va_space_t *va_space, UVM_MAP_E
if (uvm_api_range_invalid_4k(params->base, params->length))
return NV_ERR_INVALID_ADDRESS;
if (params->gpuAttributesCount == 0 || params->gpuAttributesCount > UVM_MAX_GPUS_V2)
if (params->gpuAttributesCount == 0 || params->gpuAttributesCount > UVM_MAX_GPUS)
return NV_ERR_INVALID_ARGUMENT;
mapped_gpus = uvm_processor_mask_cache_alloc();

View File

@ -108,7 +108,7 @@ void uvm_hal_maxwell_host_tlb_invalidate_va(uvm_push_t *push,
NvU32 depth,
NvU64 base,
NvU64 size,
NvU32 page_size,
NvU64 page_size,
uvm_membar_t membar)
{
// No per VA invalidate on Maxwell, redirect to invalidate all.

View File

@ -52,7 +52,7 @@ static NvU32 entries_per_index_maxwell(NvU32 depth)
return 1;
}
static NvLength entry_offset_maxwell(NvU32 depth, NvU32 page_size)
static NvLength entry_offset_maxwell(NvU32 depth, NvU64 page_size)
{
UVM_ASSERT(depth < 2);
if (page_size == UVM_PAGE_SIZE_4K && depth == 0)
@ -128,7 +128,7 @@ static NvLength entry_size_maxwell(NvU32 depth)
return 8;
}
static NvU32 index_bits_maxwell_64(NvU32 depth, NvU32 page_size)
static NvU32 index_bits_maxwell_64(NvU32 depth, NvU64 page_size)
{
UVM_ASSERT(depth < 2);
UVM_ASSERT(page_size == UVM_PAGE_SIZE_4K ||
@ -146,7 +146,7 @@ static NvU32 index_bits_maxwell_64(NvU32 depth, NvU32 page_size)
}
}
static NvU32 index_bits_maxwell_128(NvU32 depth, NvU32 page_size)
static NvU32 index_bits_maxwell_128(NvU32 depth, NvU64 page_size)
{
UVM_ASSERT(depth < 2);
UVM_ASSERT(page_size == UVM_PAGE_SIZE_4K ||
@ -169,32 +169,32 @@ static NvU32 num_va_bits_maxwell(void)
return 40;
}
static NvLength allocation_size_maxwell_64(NvU32 depth, NvU32 page_size)
static NvLength allocation_size_maxwell_64(NvU32 depth, NvU64 page_size)
{
return entry_size_maxwell(depth) << index_bits_maxwell_64(depth, page_size);
}
static NvLength allocation_size_maxwell_128(NvU32 depth, NvU32 page_size)
static NvLength allocation_size_maxwell_128(NvU32 depth, NvU64 page_size)
{
return entry_size_maxwell(depth) << index_bits_maxwell_128(depth, page_size);
}
static NvU32 page_table_depth_maxwell(NvU32 page_size)
static NvU32 page_table_depth_maxwell(NvU64 page_size)
{
return 1;
}
static NvU32 page_sizes_maxwell_128(void)
static NvU64 page_sizes_maxwell_128(void)
{
return UVM_PAGE_SIZE_128K | UVM_PAGE_SIZE_4K;
}
static NvU32 page_sizes_maxwell_64(void)
static NvU64 page_sizes_maxwell_64(void)
{
return UVM_PAGE_SIZE_64K | UVM_PAGE_SIZE_4K;
}
static NvU64 unmapped_pte_maxwell(NvU32 page_size)
static NvU64 unmapped_pte_maxwell(NvU64 page_size)
{
// Setting the privilege bit on an otherwise-zeroed big PTE causes the
// corresponding 4k PTEs to be ignored. This allows the invalidation of a
@ -356,7 +356,7 @@ static uvm_mmu_mode_hal_t maxwell_128_mmu_mode_hal =
.page_sizes = page_sizes_maxwell_128
};
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_maxwell(NvU32 big_page_size)
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_maxwell(NvU64 big_page_size)
{
UVM_ASSERT(big_page_size == UVM_PAGE_SIZE_64K || big_page_size == UVM_PAGE_SIZE_128K);
if (big_page_size == UVM_PAGE_SIZE_64K)

View File

@ -290,15 +290,15 @@ uvm_chunk_sizes_mask_t uvm_mem_kernel_chunk_sizes(uvm_gpu_t *gpu)
// Get the mmu mode hal directly as the internal address space tree has not
// been created yet.
uvm_mmu_mode_hal_t *hal = gpu->parent->arch_hal->mmu_mode_hal(gpu->big_page.internal_size);
NvU32 page_sizes = hal->page_sizes();
NvU64 page_sizes = hal->page_sizes();
return (uvm_chunk_sizes_mask_t)(page_sizes & UVM_CHUNK_SIZES_MASK);
}
static NvU32 mem_pick_chunk_size(uvm_mem_t *mem)
static NvU64 mem_pick_chunk_size(uvm_mem_t *mem)
{
NvU32 biggest_page_size;
NvU32 chunk_size;
NvU64 biggest_page_size;
NvU64 chunk_size;
if (uvm_mem_is_sysmem(mem))
return PAGE_SIZE;
@ -315,12 +315,12 @@ static NvU32 mem_pick_chunk_size(uvm_mem_t *mem)
// When UVM_PAGE_SIZE_DEFAULT is used on NUMA-enabled GPUs, we force
// chunk_size to be PAGE_SIZE at least, to allow CPU mappings.
if (mem->backing_gpu->mem_info.numa.enabled)
chunk_size = max(chunk_size, (NvU32)PAGE_SIZE);
chunk_size = max(chunk_size, (NvU64)PAGE_SIZE);
return chunk_size;
}
static NvU32 mem_pick_gpu_page_size(uvm_mem_t *mem, uvm_gpu_t *gpu, uvm_page_tree_t *gpu_page_tree)
static NvU64 mem_pick_gpu_page_size(uvm_mem_t *mem, uvm_gpu_t *gpu, uvm_page_tree_t *gpu_page_tree)
{
if (uvm_mem_is_vidmem(mem)) {
// For vidmem allocations the chunk size is picked out of the supported
@ -467,7 +467,7 @@ static NV_STATUS mem_alloc_sysmem_dma_chunks(uvm_mem_t *mem, gfp_t gfp_flags)
NvU64 *dma_addrs;
UVM_ASSERT_MSG(mem->chunk_size == PAGE_SIZE,
"mem->chunk_size is 0x%x. PAGE_SIZE is only supported.",
"mem->chunk_size is 0x%llx. PAGE_SIZE is only supported.",
mem->chunk_size);
UVM_ASSERT(uvm_mem_is_sysmem_dma(mem));
@ -528,10 +528,9 @@ static NV_STATUS mem_alloc_sysmem_chunks(uvm_mem_t *mem, gfp_t gfp_flags)
// In case of failure, the caller is required to handle cleanup by calling
// uvm_mem_free
static NV_STATUS mem_alloc_vidmem_chunks(uvm_mem_t *mem, bool zero, bool is_unprotected)
static NV_STATUS mem_alloc_vidmem_chunks(uvm_mem_t *mem, bool zero)
{
NV_STATUS status;
uvm_pmm_gpu_memory_type_t mem_type;
UVM_ASSERT(uvm_mem_is_vidmem(mem));
@ -548,23 +547,15 @@ static NV_STATUS mem_alloc_vidmem_chunks(uvm_mem_t *mem, bool zero, bool is_unpr
if (!mem->vidmem.chunks)
return NV_ERR_NO_MEMORY;
// When CC is disabled the behavior is identical to that of PMM, and the
// protection flag is ignored (squashed by PMM internally).
if (is_unprotected)
mem_type = UVM_PMM_GPU_MEMORY_TYPE_KERNEL_UNPROTECTED;
else
mem_type = UVM_PMM_GPU_MEMORY_TYPE_KERNEL_PROTECTED;
status = uvm_pmm_gpu_alloc(&mem->backing_gpu->pmm,
mem->chunks_count,
mem->chunk_size,
mem_type,
UVM_PMM_ALLOC_FLAGS_NONE,
mem->vidmem.chunks,
NULL);
status = uvm_pmm_gpu_alloc_kernel(&mem->backing_gpu->pmm,
mem->chunks_count,
mem->chunk_size,
UVM_PMM_ALLOC_FLAGS_NONE,
mem->vidmem.chunks,
NULL);
if (status != NV_OK) {
UVM_ERR_PRINT("uvm_pmm_gpu_alloc (count=%zd, size=0x%x) failed: %s\n",
UVM_ERR_PRINT("uvm_pmm_gpu_alloc_kernel (count=%zd, size=0x%llx) failed: %s\n",
mem->chunks_count,
mem->chunk_size,
nvstatusToString(status));
@ -574,7 +565,7 @@ static NV_STATUS mem_alloc_vidmem_chunks(uvm_mem_t *mem, bool zero, bool is_unpr
return NV_OK;
}
static NV_STATUS mem_alloc_chunks(uvm_mem_t *mem, struct mm_struct *mm, bool zero, bool is_unprotected)
static NV_STATUS mem_alloc_chunks(uvm_mem_t *mem, struct mm_struct *mm, bool zero)
{
if (uvm_mem_is_sysmem(mem)) {
gfp_t gfp_flags;
@ -596,7 +587,7 @@ static NV_STATUS mem_alloc_chunks(uvm_mem_t *mem, struct mm_struct *mm, bool zer
return status;
}
return mem_alloc_vidmem_chunks(mem, zero, is_unprotected);
return mem_alloc_vidmem_chunks(mem, zero);
}
NV_STATUS uvm_mem_map_kernel(uvm_mem_t *mem, const uvm_processor_mask_t *mask)
@ -626,7 +617,6 @@ NV_STATUS uvm_mem_alloc(const uvm_mem_alloc_params_t *params, uvm_mem_t **mem_ou
NV_STATUS status;
NvU64 physical_size;
uvm_mem_t *mem = NULL;
bool is_unprotected = false;
UVM_ASSERT(params->size > 0);
@ -648,12 +638,7 @@ NV_STATUS uvm_mem_alloc(const uvm_mem_alloc_params_t *params, uvm_mem_t **mem_ou
physical_size = UVM_ALIGN_UP(mem->size, mem->chunk_size);
mem->chunks_count = physical_size / mem->chunk_size;
if (params->is_unprotected)
UVM_ASSERT(uvm_mem_is_vidmem(mem));
is_unprotected = params->is_unprotected;
status = mem_alloc_chunks(mem, params->mm, params->zero, is_unprotected);
status = mem_alloc_chunks(mem, params->mm, params->zero);
if (status != NV_OK)
goto error;
@ -1050,7 +1035,7 @@ static NV_STATUS mem_map_gpu(uvm_mem_t *mem,
uvm_page_table_range_vec_t **range_vec)
{
NV_STATUS status;
NvU32 page_size;
NvU64 page_size;
uvm_pmm_alloc_flags_t pmm_flags = UVM_PMM_ALLOC_FLAGS_EVICT;
uvm_mem_pte_maker_data_t pte_maker_data = {
@ -1059,7 +1044,7 @@ static NV_STATUS mem_map_gpu(uvm_mem_t *mem,
};
page_size = mem_pick_gpu_page_size(mem, gpu, tree);
UVM_ASSERT_MSG(uvm_mmu_page_size_supported(tree, page_size), "page_size 0x%x\n", page_size);
UVM_ASSERT_MSG(uvm_mmu_page_size_supported(tree, page_size), "page_size 0x%llx\n", page_size);
// When the Confidential Computing feature is enabled, DMA allocations are
// majoritarily allocated and managed by a per-GPU DMA buffer pool

View File

@ -126,12 +126,7 @@ typedef struct
//
// CPU mappings will always use PAGE_SIZE, so the physical allocation chunk
// has to be aligned to PAGE_SIZE.
NvU32 page_size;
// The protection flag is only observed for vidmem allocations when CC is
// enabled. If set to true, the allocation returns unprotected vidmem;
// otherwise, the allocation returns protected vidmem.
bool is_unprotected;
NvU64 page_size;
// If true, the allocation is zeroed (scrubbed).
bool zero;
@ -199,7 +194,7 @@ struct uvm_mem_struct
size_t chunks_count;
// Size of each physical chunk (vidmem) or CPU page (sysmem)
NvU32 chunk_size;
NvU64 chunk_size;
// Size of the allocation
NvU64 size;

View File

@ -153,7 +153,7 @@ static NV_STATUS check_accessible_from_gpu(uvm_gpu_t *gpu, uvm_mem_t *mem)
for (i = 0; i < verif_size / sizeof(*sys_verif); ++i) {
if (sys_verif[i] != mem->size + i) {
UVM_TEST_PRINT("Verif failed for %zd = 0x%llx instead of 0x%llx, verif_size=0x%llx mem(size=0x%llx, page_size=%u, processor=%u)\n",
UVM_TEST_PRINT("Verif failed for %zd = 0x%llx instead of 0x%llx, verif_size=0x%llx mem(size=0x%llx, page_size=%llu, processor=%u)\n",
i,
sys_verif[i],
(NvU64)(verif_size + i),
@ -241,7 +241,7 @@ static NV_STATUS test_map_cpu(uvm_mem_t *mem)
return NV_OK;
}
static NV_STATUS test_alloc_sysmem(uvm_va_space_t *va_space, NvU32 page_size, size_t size, uvm_mem_t **mem_out)
static NV_STATUS test_alloc_sysmem(uvm_va_space_t *va_space, NvU64 page_size, size_t size, uvm_mem_t **mem_out)
{
NV_STATUS status;
uvm_mem_t *mem;
@ -299,7 +299,7 @@ error:
return status;
}
static NV_STATUS test_alloc_vidmem(uvm_gpu_t *gpu, NvU32 page_size, size_t size, uvm_mem_t **mem_out)
static NV_STATUS test_alloc_vidmem(uvm_gpu_t *gpu, NvU64 page_size, size_t size, uvm_mem_t **mem_out)
{
NV_STATUS status;
uvm_mem_t *mem;
@ -334,7 +334,7 @@ error:
return status;
}
static bool should_test_page_size(size_t alloc_size, NvU32 page_size)
static bool should_test_page_size(size_t alloc_size, NvU64 page_size)
{
if (g_uvm_global.num_simulated_devices == 0)
return true;
@ -359,7 +359,7 @@ static NV_STATUS test_all(uvm_va_space_t *va_space)
// size on pre-Pascal GPUs with 128K big page size.
// Ampere+ also supports 512M PTEs, but since UVM's maximum chunk size is
// 2M, we don't test for this page size.
static const NvU32 cpu_chunk_sizes = PAGE_SIZE | UVM_PAGE_SIZE_64K | UVM_PAGE_SIZE_128K | UVM_PAGE_SIZE_2M;
static const NvU64 cpu_chunk_sizes = PAGE_SIZE | UVM_PAGE_SIZE_64K | UVM_PAGE_SIZE_128K | UVM_PAGE_SIZE_2M;
// All supported page sizes will be tested, CPU has the most with 4 and +1
// for the default.
@ -494,41 +494,6 @@ done:
return status;
}
static NV_STATUS test_basic_vidmem_unprotected(uvm_gpu_t *gpu)
{
NV_STATUS status = NV_OK;
uvm_mem_t *mem = NULL;
uvm_mem_alloc_params_t params = { 0 };
params.size = UVM_PAGE_SIZE_4K;
params.backing_gpu = gpu;
params.page_size = UVM_PAGE_SIZE_4K;
// If CC is enabled, the protection flag is observed. Because currently all
// vidmem is in the protected region, the allocation should succeed.
//
// If CC is disabled, the protection flag is ignored.
params.is_unprotected = false;
TEST_NV_CHECK_RET(uvm_mem_alloc(&params, &mem));
uvm_mem_free(mem);
mem = NULL;
// If CC is enabled, the allocation should fail because currently the
// unprotected region is empty.
//
// If CC is disabled, the behavior should be identical to that of a
// protected allocation.
params.is_unprotected = true;
if (g_uvm_global.conf_computing_enabled)
TEST_CHECK_RET(uvm_mem_alloc(&params, &mem) == NV_ERR_NO_MEMORY);
else
TEST_NV_CHECK_RET(uvm_mem_alloc(&params, &mem));
uvm_mem_free(mem);
return status;
}
static NV_STATUS test_basic_sysmem(void)
{
NV_STATUS status = NV_OK;
@ -613,7 +578,6 @@ static NV_STATUS test_basic(uvm_va_space_t *va_space)
for_each_va_space_gpu(gpu, va_space) {
TEST_NV_CHECK_RET(test_basic_vidmem(gpu));
TEST_NV_CHECK_RET(test_basic_sysmem_dma(gpu));
TEST_NV_CHECK_RET(test_basic_vidmem_unprotected(gpu));
TEST_NV_CHECK_RET(test_basic_dma_pool(gpu));
}

View File

@ -153,20 +153,17 @@ static NV_STATUS phys_mem_allocate_sysmem(uvm_page_tree_t *tree, NvLength size,
// - UVM_APERTURE_VID biggest page size on vidmem mappings
// - UVM_APERTURE_SYS biggest page size on sysmem mappings
// - UVM_APERTURE_PEER_0-7 biggest page size on peer mappings
static NvU32 mmu_biggest_page_size(uvm_page_tree_t *tree, uvm_aperture_t aperture)
static NvU64 mmu_biggest_page_size(uvm_page_tree_t *tree, uvm_aperture_t aperture)
{
UVM_ASSERT(aperture < UVM_APERTURE_DEFAULT);
// There may be scenarios where the GMMU must use a subset of the supported
// page sizes, e.g., to comply with the vMMU supported page sizes due to
// segmentation sizes.
if (aperture == UVM_APERTURE_VID) {
UVM_ASSERT(tree->gpu->mem_info.max_vidmem_page_size <= NV_U32_MAX);
return (NvU32) tree->gpu->mem_info.max_vidmem_page_size;
}
else {
return 1 << __fls(tree->hal->page_sizes());
}
if (aperture == UVM_APERTURE_VID)
return uvm_mmu_biggest_page_size_up_to(tree, tree->gpu->mem_info.max_vidmem_page_size);
return 1ULL << __fls(tree->hal->page_sizes());
}
static NV_STATUS phys_mem_allocate_vidmem(uvm_page_tree_t *tree,
@ -254,7 +251,7 @@ static void phys_mem_deallocate(uvm_page_tree_t *tree, uvm_mmu_page_table_alloc_
}
static void page_table_range_init(uvm_page_table_range_t *range,
NvU32 page_size,
NvU64 page_size,
uvm_page_directory_t *dir,
NvU32 start_index,
NvU32 end_index)
@ -444,9 +441,9 @@ static void pde_fill(uvm_page_tree_t *tree,
pde_fill_cpu(tree, directory, start_index, pde_count, phys_addr);
}
static void phys_mem_init(uvm_page_tree_t *tree, NvU32 page_size, uvm_page_directory_t *dir, uvm_push_t *push)
static void phys_mem_init(uvm_page_tree_t *tree, NvU64 page_size, uvm_page_directory_t *dir, uvm_push_t *push)
{
NvU32 entries_count = uvm_mmu_page_tree_entries(tree, dir->depth, page_size);
NvU64 entries_count = uvm_mmu_page_tree_entries(tree, dir->depth, page_size);
NvU8 max_pde_depth = tree->hal->page_table_depth(UVM_PAGE_SIZE_AGNOSTIC) - 1;
// Passing in NULL for the phys_allocs will mark the child entries as
@ -497,7 +494,7 @@ static void phys_mem_init(uvm_page_tree_t *tree, NvU32 page_size, uvm_page_direc
}
static uvm_page_directory_t *allocate_directory(uvm_page_tree_t *tree,
NvU32 page_size,
NvU64 page_size,
NvU32 depth,
uvm_pmm_alloc_flags_t pmm_flags)
{
@ -546,7 +543,7 @@ static inline NvU32 entry_index_from_vaddr(NvU64 vaddr, NvU32 addr_bit_shift, Nv
return (NvU32)((vaddr >> addr_bit_shift) & mask);
}
static inline NvU32 index_to_entry(uvm_mmu_mode_hal_t *hal, NvU32 entry_index, NvU32 depth, NvU32 page_size)
static inline NvU32 index_to_entry(uvm_mmu_mode_hal_t *hal, NvU32 entry_index, NvU32 depth, NvU64 page_size)
{
return hal->entries_per_index(depth) * entry_index + hal->entry_offset(depth, page_size);
}
@ -583,7 +580,7 @@ static void pde_write(uvm_page_tree_t *tree,
pde_fill(tree, dir, entry_index, 1, phys_allocs, push);
}
static void host_pde_clear(uvm_page_tree_t *tree, uvm_page_directory_t *dir, NvU32 entry_index, NvU32 page_size)
static void host_pde_clear(uvm_page_tree_t *tree, uvm_page_directory_t *dir, NvU32 entry_index, NvU64 page_size)
{
UVM_ASSERT(dir->ref_count > 0);
@ -594,35 +591,38 @@ static void host_pde_clear(uvm_page_tree_t *tree, uvm_page_directory_t *dir, NvU
static void pde_clear(uvm_page_tree_t *tree,
uvm_page_directory_t *dir,
NvU32 entry_index,
NvU32 page_size,
NvU64 page_size,
uvm_push_t *push)
{
host_pde_clear(tree, dir, entry_index, page_size);
pde_write(tree, dir, entry_index, false, push);
}
static uvm_chunk_sizes_mask_t allocation_sizes_for_big_page_size(uvm_parent_gpu_t *parent_gpu, NvU32 big_page_size)
static uvm_chunk_sizes_mask_t allocation_sizes_for_big_page_size(uvm_parent_gpu_t *parent_gpu, NvU64 big_page_size)
{
uvm_chunk_sizes_mask_t alloc_sizes = 0;
uvm_mmu_mode_hal_t *hal = parent_gpu->arch_hal->mmu_mode_hal(big_page_size);
unsigned long page_sizes, page_size_log2;
uvm_chunk_sizes_mask_t alloc_sizes;
if (hal != NULL) {
unsigned long page_size_log2;
unsigned long page_sizes = hal->page_sizes();
BUILD_BUG_ON(sizeof(hal->page_sizes()) > sizeof(page_sizes));
if (hal == NULL)
return 0;
for_each_set_bit(page_size_log2, &page_sizes, BITS_PER_LONG) {
NvU32 i;
NvU32 page_size = (NvU32)(1ULL << page_size_log2);
for (i = 0; i <= hal->page_table_depth(page_size); i++)
alloc_sizes |= hal->allocation_size(i, page_size);
}
page_sizes = hal->page_sizes();
alloc_sizes = 0;
BUILD_BUG_ON(sizeof(hal->page_sizes()) > sizeof(page_sizes));
for_each_set_bit(page_size_log2, &page_sizes, BITS_PER_LONG) {
NvU32 i;
NvU64 page_size = 1ULL << page_size_log2;
for (i = 0; i <= hal->page_table_depth(page_size); i++)
alloc_sizes |= hal->allocation_size(i, page_size);
}
return alloc_sizes;
}
static NvU32 page_sizes_for_big_page_size(uvm_parent_gpu_t *parent_gpu, NvU32 big_page_size)
static NvU64 page_sizes_for_big_page_size(uvm_parent_gpu_t *parent_gpu, NvU64 big_page_size)
{
uvm_mmu_mode_hal_t *hal = parent_gpu->arch_hal->mmu_mode_hal(big_page_size);
@ -662,7 +662,7 @@ static NV_STATUS page_tree_end_and_wait(uvm_page_tree_t *tree, uvm_push_t *push)
}
static NV_STATUS write_gpu_state_cpu(uvm_page_tree_t *tree,
NvU32 page_size,
NvU64 page_size,
NvS32 invalidate_depth,
NvU32 used_count,
uvm_page_directory_t **dirs_used)
@ -713,7 +713,7 @@ static NV_STATUS write_gpu_state_cpu(uvm_page_tree_t *tree,
}
static NV_STATUS write_gpu_state_gpu(uvm_page_tree_t *tree,
NvU32 page_size,
NvU64 page_size,
NvS32 invalidate_depth,
NvU32 used_count,
uvm_page_directory_t **dirs_used)
@ -805,7 +805,7 @@ static NV_STATUS write_gpu_state_gpu(uvm_page_tree_t *tree,
// initialize new page tables and insert them into the tree
static NV_STATUS write_gpu_state(uvm_page_tree_t *tree,
NvU32 page_size,
NvU64 page_size,
NvS32 invalidate_depth,
NvU32 used_count,
uvm_page_directory_t **dirs_used)
@ -842,7 +842,7 @@ static void free_unused_directories(uvm_page_tree_t *tree,
}
}
static NV_STATUS allocate_page_table(uvm_page_tree_t *tree, NvU32 page_size, uvm_mmu_page_table_alloc_t *out)
static NV_STATUS allocate_page_table(uvm_page_tree_t *tree, NvU64 page_size, uvm_mmu_page_table_alloc_t *out)
{
NvU32 depth = tree->hal->page_table_depth(page_size);
NvLength alloc_size = tree->hal->allocation_size(depth, page_size);
@ -871,7 +871,7 @@ static NV_STATUS page_tree_ats_init(uvm_page_tree_t *tree)
{
NV_STATUS status;
NvU64 min_va_upper, max_va_lower;
NvU32 page_size;
NvU64 page_size;
if (!page_tree_ats_init_required(tree))
return NV_OK;
@ -1090,7 +1090,7 @@ static void page_tree_set_location(uvm_page_tree_t *tree, uvm_aperture_t locatio
NV_STATUS uvm_page_tree_init(uvm_gpu_t *gpu,
uvm_gpu_va_space_t *gpu_va_space,
uvm_page_tree_type_t type,
NvU32 big_page_size,
NvU64 big_page_size,
uvm_aperture_t location,
uvm_page_tree_t *tree)
{
@ -1110,7 +1110,7 @@ NV_STATUS uvm_page_tree_init(uvm_gpu_t *gpu,
tree->gpu_va_space = gpu_va_space;
tree->big_page_size = big_page_size;
UVM_ASSERT(gpu->mem_info.max_vidmem_page_size & tree->hal->page_sizes());
UVM_ASSERT(uvm_mmu_page_size_supported(tree, big_page_size));
page_tree_set_location(tree, location);
@ -1347,7 +1347,7 @@ NV_STATUS uvm_page_tree_wait(uvm_page_tree_t *tree)
}
static NV_STATUS try_get_ptes(uvm_page_tree_t *tree,
NvU32 page_size,
NvU64 page_size,
NvU64 start,
NvLength size,
uvm_page_table_range_t *range,
@ -1379,7 +1379,7 @@ static NV_STATUS try_get_ptes(uvm_page_tree_t *tree,
// This algorithm will work with unaligned ranges, but the caller's intent
// is unclear
UVM_ASSERT_MSG(start % page_size == 0 && size % page_size == 0,
"start 0x%llx size 0x%zx page_size 0x%x\n",
"start 0x%llx size 0x%zx page_size 0x%llx\n",
start,
(size_t)size,
page_size);
@ -1448,7 +1448,7 @@ static NV_STATUS map_remap(uvm_page_tree_t *tree, NvU64 start, NvLength size, uv
{
NV_STATUS status;
uvm_push_t push;
NvU32 page_sizes;
NvU64 page_sizes;
uvm_mmu_page_table_alloc_t *phys_alloc[1];
// TODO: Bug 2734399
@ -1460,7 +1460,7 @@ static NV_STATUS map_remap(uvm_page_tree_t *tree, NvU64 start, NvLength size, uv
status = page_tree_begin_acquire(tree,
&tree->tracker,
&push,
"map remap: [0x%llx, 0x%llx), page_size: %d",
"map remap: [0x%llx, 0x%llx), page_size: %lld",
start,
start + size,
range->page_size);
@ -1500,7 +1500,7 @@ static NV_STATUS map_remap(uvm_page_tree_t *tree, NvU64 start, NvLength size, uv
}
NV_STATUS uvm_page_tree_get_ptes_async(uvm_page_tree_t *tree,
NvU32 page_size,
NvU64 page_size,
NvU64 start,
NvLength size,
uvm_pmm_alloc_flags_t pmm_flags,
@ -1545,7 +1545,7 @@ NV_STATUS uvm_page_tree_get_ptes_async(uvm_page_tree_t *tree,
}
NV_STATUS uvm_page_tree_get_ptes(uvm_page_tree_t *tree,
NvU32 page_size,
NvU64 page_size,
NvU64 start,
NvLength size,
uvm_pmm_alloc_flags_t pmm_flags,
@ -1596,7 +1596,7 @@ void uvm_page_table_range_shrink(uvm_page_tree_t *tree, uvm_page_table_range_t *
}
NV_STATUS uvm_page_tree_get_entry(uvm_page_tree_t *tree,
NvU32 page_size,
NvU64 page_size,
NvU64 start,
uvm_pmm_alloc_flags_t pmm_flags,
uvm_page_table_range_t *single)
@ -1621,7 +1621,7 @@ void uvm_page_tree_clear_pde(uvm_page_tree_t *tree, uvm_page_table_range_t *sing
static NV_STATUS poison_ptes(uvm_page_tree_t *tree,
uvm_page_directory_t *pte_dir,
uvm_page_directory_t *parent,
NvU32 page_size)
NvU64 page_size)
{
NV_STATUS status;
uvm_push_t push;
@ -1633,7 +1633,7 @@ static NV_STATUS poison_ptes(uvm_page_tree_t *tree,
// The flat mappings should always be set up when executing this path
UVM_ASSERT(!uvm_mmu_use_cpu(tree));
status = page_tree_begin_acquire(tree, &tree->tracker, &push, "Poisoning child table of page size %u", page_size);
status = page_tree_begin_acquire(tree, &tree->tracker, &push, "Poisoning child table of page size %llu", page_size);
if (status != NV_OK)
return status;
@ -1660,7 +1660,7 @@ static NV_STATUS poison_ptes(uvm_page_tree_t *tree,
}
NV_STATUS uvm_page_tree_alloc_table(uvm_page_tree_t *tree,
NvU32 page_size,
NvU64 page_size,
uvm_pmm_alloc_flags_t pmm_flags,
uvm_page_table_range_t *single,
uvm_page_table_range_t *children)
@ -1768,7 +1768,7 @@ static size_t range_vec_calc_range_index(uvm_page_table_range_vec_t *range_vec,
NV_STATUS uvm_page_table_range_vec_init(uvm_page_tree_t *tree,
NvU64 start,
NvU64 size,
NvU32 page_size,
NvU64 page_size,
uvm_pmm_alloc_flags_t pmm_flags,
uvm_page_table_range_vec_t *range_vec)
{
@ -1776,8 +1776,8 @@ NV_STATUS uvm_page_table_range_vec_init(uvm_page_tree_t *tree,
size_t i;
UVM_ASSERT(size != 0);
UVM_ASSERT_MSG(IS_ALIGNED(start, page_size), "start 0x%llx page_size 0x%x\n", start, page_size);
UVM_ASSERT_MSG(IS_ALIGNED(size, page_size), "size 0x%llx page_size 0x%x\n", size, page_size);
UVM_ASSERT_MSG(IS_ALIGNED(start, page_size), "start 0x%llx page_size 0x%llx\n", start, page_size);
UVM_ASSERT_MSG(IS_ALIGNED(size, page_size), "size 0x%llx page_size 0x%llx\n", size, page_size);
range_vec->tree = tree;
range_vec->page_size = page_size;
@ -1826,7 +1826,7 @@ out:
NV_STATUS uvm_page_table_range_vec_create(uvm_page_tree_t *tree,
NvU64 start,
NvU64 size,
NvU32 page_size,
NvU64 page_size,
uvm_pmm_alloc_flags_t pmm_flags,
uvm_page_table_range_vec_t **range_vec_out)
{
@ -1952,7 +1952,7 @@ static NV_STATUS uvm_page_table_range_vec_clear_ptes_gpu(uvm_page_table_range_ve
size_t i;
uvm_page_tree_t *tree = range_vec->tree;
uvm_gpu_t *gpu = tree->gpu;
NvU32 page_size = range_vec->page_size;
NvU64 page_size = range_vec->page_size;
NvU32 entry_size = uvm_mmu_pte_size(tree, page_size);
NvU64 invalid_pte = 0;
uvm_push_t push;
@ -2237,7 +2237,7 @@ static NV_STATUS create_identity_mapping(uvm_gpu_t *gpu,
NvU64 size,
uvm_aperture_t aperture,
NvU64 phys_offset,
NvU32 page_size,
NvU64 page_size,
uvm_pmm_alloc_flags_t pmm_flags)
{
NV_STATUS status;
@ -2312,7 +2312,7 @@ bool uvm_mmu_parent_gpu_needs_dynamic_sysmem_mapping(uvm_parent_gpu_t *parent_gp
NV_STATUS create_static_vidmem_mapping(uvm_gpu_t *gpu)
{
NvU32 page_size;
NvU64 page_size;
NvU64 size;
uvm_aperture_t aperture = UVM_APERTURE_VID;
NvU64 phys_offset = 0;
@ -2351,7 +2351,7 @@ static void destroy_static_vidmem_mapping(uvm_gpu_t *gpu)
NV_STATUS uvm_mmu_create_peer_identity_mappings(uvm_gpu_t *gpu, uvm_gpu_t *peer)
{
NvU32 page_size;
NvU64 page_size;
NvU64 size;
uvm_aperture_t aperture;
NvU64 phys_offset;
@ -2535,7 +2535,7 @@ static void root_chunk_mapping_destroy(uvm_gpu_t *gpu, uvm_gpu_root_chunk_mappin
uvm_push_t push;
NvU32 entry_size;
uvm_pte_batch_t pte_batch;
NvU32 page_size;
NvU64 page_size;
NvU64 size;
NvU64 invalid_pte;
uvm_page_table_range_t *range = root_chunk_mapping->range;
@ -2585,7 +2585,7 @@ static NV_STATUS root_chunk_mapping_create(uvm_gpu_t *gpu, uvm_gpu_root_chunk_ma
uvm_push_t push;
NvU64 pte_bits;
NvU32 entry_size;
NvU32 page_size = UVM_CHUNK_SIZE_MAX;
NvU64 page_size = UVM_CHUNK_SIZE_MAX;
NvU64 size = UVM_CHUNK_SIZE_MAX;
range = uvm_kvmalloc_zero(sizeof(*range));
@ -2852,7 +2852,7 @@ NV_STATUS uvm_mmu_sysmem_map(uvm_gpu_t *gpu, NvU64 pa, NvU64 size)
if (sysmem_mapping->range_vec == NULL) {
uvm_gpu_address_t virtual_address = uvm_parent_gpu_address_virtual_from_sysmem_phys(gpu->parent, curr_pa);
NvU64 phys_offset = curr_pa;
NvU32 page_size = mmu_biggest_page_size(&gpu->address_space_tree, UVM_APERTURE_SYS);
NvU64 page_size = mmu_biggest_page_size(&gpu->address_space_tree, UVM_APERTURE_SYS);
uvm_pmm_alloc_flags_t pmm_flags;
// No eviction is requested when allocating the page tree storage,

View File

@ -208,7 +208,7 @@ struct uvm_mmu_mode_hal_struct
// This is an optimization which reduces TLB pressure, reduces the number of
// TLB invalidates we must issue, and means we don't have to initialize the
// 4k PTEs which are covered by big PTEs since the MMU will never read them.
NvU64 (*unmapped_pte)(NvU32 page_size);
NvU64 (*unmapped_pte)(NvU64 page_size);
// Bit pattern used for debug purposes to clobber PTEs which ought to be
// unused. In practice this will generate a PRIV violation or a physical
@ -234,23 +234,23 @@ struct uvm_mmu_mode_hal_struct
// For dual PDEs, this is ether 1 or 0, depending on the page size.
// This is used to index the host copy only. GPU PDEs are always entirely
// re-written using make_pde.
NvLength (*entry_offset)(NvU32 depth, NvU32 page_size);
NvLength (*entry_offset)(NvU32 depth, NvU64 page_size);
// number of virtual address bits used to index the directory/table at a
// given depth
NvU32 (*index_bits)(NvU32 depth, NvU32 page_size);
NvU32 (*index_bits)(NvU32 depth, NvU64 page_size);
// total number of bits that represent the virtual address space
NvU32 (*num_va_bits)(void);
// the size, in bytes, of a directory/table at a given depth.
NvLength (*allocation_size)(NvU32 depth, NvU32 page_size);
NvLength (*allocation_size)(NvU32 depth, NvU64 page_size);
// the depth which corresponds to the page tables
NvU32 (*page_table_depth)(NvU32 page_size);
NvU32 (*page_table_depth)(NvU64 page_size);
// bitwise-or of supported page sizes
NvU32 (*page_sizes)(void);
NvU64 (*page_sizes)(void);
};
struct uvm_page_table_range_struct
@ -258,7 +258,7 @@ struct uvm_page_table_range_struct
uvm_page_directory_t *table;
NvU32 start_index;
NvU32 entry_count;
NvU32 page_size;
NvU64 page_size;
};
typedef enum
@ -275,7 +275,7 @@ struct uvm_page_tree_struct
uvm_page_directory_t *root;
uvm_mmu_mode_hal_t *hal;
uvm_page_tree_type_t type;
NvU32 big_page_size;
NvU64 big_page_size;
// Pointer to the GPU VA space containing the page tree.
// This pointer is set only for page trees of type
@ -325,7 +325,7 @@ struct uvm_page_table_range_vec_struct
NvU64 size;
// Page size used for all the page table ranges
NvU32 page_size;
NvU64 page_size;
// Page table ranges covering the VA
uvm_page_table_range_t *ranges;
@ -352,7 +352,7 @@ void uvm_mmu_init_gpu_peer_addresses(uvm_gpu_t *gpu);
NV_STATUS uvm_page_tree_init(uvm_gpu_t *gpu,
uvm_gpu_va_space_t *gpu_va_space,
uvm_page_tree_type_t type,
NvU32 big_page_size,
NvU64 big_page_size,
uvm_aperture_t location,
uvm_page_tree_t *tree_out);
@ -374,7 +374,7 @@ void uvm_page_tree_deinit(uvm_page_tree_t *tree);
// an existing range or change the size of an existing range, use
// uvm_page_table_range_get_upper() and/or uvm_page_table_range_shrink().
NV_STATUS uvm_page_tree_get_ptes(uvm_page_tree_t *tree,
NvU32 page_size,
NvU64 page_size,
NvU64 start,
NvLength size,
uvm_pmm_alloc_flags_t pmm_flags,
@ -384,7 +384,7 @@ NV_STATUS uvm_page_tree_get_ptes(uvm_page_tree_t *tree,
//
// All pending operations can be waited on with uvm_page_tree_wait().
NV_STATUS uvm_page_tree_get_ptes_async(uvm_page_tree_t *tree,
NvU32 page_size,
NvU64 page_size,
NvU64 start,
NvLength size,
uvm_pmm_alloc_flags_t pmm_flags,
@ -395,7 +395,7 @@ NV_STATUS uvm_page_tree_get_ptes_async(uvm_page_tree_t *tree,
// This is equivalent to calling uvm_page_tree_get_ptes() with size equal to
// page_size.
NV_STATUS uvm_page_tree_get_entry(uvm_page_tree_t *tree,
NvU32 page_size,
NvU64 page_size,
NvU64 start,
uvm_pmm_alloc_flags_t pmm_flags,
uvm_page_table_range_t *single);
@ -426,7 +426,7 @@ void uvm_page_tree_clear_pde(uvm_page_tree_t *tree, uvm_page_table_range_t *sing
// It is the caller's responsibility to initialize the returned table before
// calling uvm_page_tree_write_pde.
NV_STATUS uvm_page_tree_alloc_table(uvm_page_tree_t *tree,
NvU32 page_size,
NvU64 page_size,
uvm_pmm_alloc_flags_t pmm_flags,
uvm_page_table_range_t *single,
uvm_page_table_range_t *children);
@ -480,7 +480,7 @@ static uvm_mmu_page_table_alloc_t *uvm_page_tree_pdb(uvm_page_tree_t *tree)
NV_STATUS uvm_page_table_range_vec_init(uvm_page_tree_t *tree,
NvU64 start,
NvU64 size,
NvU32 page_size,
NvU64 page_size,
uvm_pmm_alloc_flags_t pmm_flags,
uvm_page_table_range_vec_t *range_vec);
@ -489,7 +489,7 @@ NV_STATUS uvm_page_table_range_vec_init(uvm_page_tree_t *tree,
NV_STATUS uvm_page_table_range_vec_create(uvm_page_tree_t *tree,
NvU64 start,
NvU64 size,
NvU32 page_size,
NvU64 page_size,
uvm_pmm_alloc_flags_t pmm_flags,
uvm_page_table_range_vec_t **range_vec_out);
@ -601,12 +601,12 @@ void uvm_mmu_chunk_unmap(uvm_gpu_chunk_t *chunk, uvm_tracker_t *tracker);
// uvm_parent_gpu_map_cpu_pages for the given GPU.
NV_STATUS uvm_mmu_sysmem_map(uvm_gpu_t *gpu, NvU64 pa, NvU64 size);
static NvU64 uvm_mmu_page_tree_entries(uvm_page_tree_t *tree, NvU32 depth, NvU32 page_size)
static NvU64 uvm_mmu_page_tree_entries(uvm_page_tree_t *tree, NvU32 depth, NvU64 page_size)
{
return 1ull << tree->hal->index_bits(depth, page_size);
}
static NvU64 uvm_mmu_pde_coverage(uvm_page_tree_t *tree, NvU32 page_size)
static NvU64 uvm_mmu_pde_coverage(uvm_page_tree_t *tree, NvU64 page_size)
{
NvU32 depth = tree->hal->page_table_depth(page_size);
return uvm_mmu_page_tree_entries(tree, depth, page_size) * page_size;
@ -615,21 +615,21 @@ static NvU64 uvm_mmu_pde_coverage(uvm_page_tree_t *tree, NvU32 page_size)
// Page sizes supported by the GPU. Use uvm_mmu_biggest_page_size() to retrieve
// the largest page size supported in a given system, which considers the GMMU
// and vMMU page sizes and segment sizes.
static bool uvm_mmu_page_size_supported(uvm_page_tree_t *tree, NvU32 page_size)
static bool uvm_mmu_page_size_supported(uvm_page_tree_t *tree, NvU64 page_size)
{
UVM_ASSERT_MSG(is_power_of_2(page_size), "0x%x\n", page_size);
UVM_ASSERT_MSG(is_power_of_2(page_size), "0x%llx\n", page_size);
return (tree->hal->page_sizes() & page_size) != 0;
}
static NvU32 uvm_mmu_biggest_page_size_up_to(uvm_page_tree_t *tree, NvU32 max_page_size)
static NvU64 uvm_mmu_biggest_page_size_up_to(uvm_page_tree_t *tree, NvU64 max_page_size)
{
NvU32 gpu_page_sizes = tree->hal->page_sizes();
NvU32 smallest_gpu_page_size = gpu_page_sizes & ~(gpu_page_sizes - 1);
NvU32 page_sizes;
NvU32 page_size;
NvU64 gpu_page_sizes = tree->hal->page_sizes();
NvU64 smallest_gpu_page_size = gpu_page_sizes & ~(gpu_page_sizes - 1);
NvU64 page_sizes;
NvU64 page_size;
UVM_ASSERT_MSG(is_power_of_2(max_page_size), "0x%x\n", max_page_size);
UVM_ASSERT_MSG(is_power_of_2(max_page_size), "0x%llx\n", max_page_size);
if (max_page_size < smallest_gpu_page_size)
return 0;
@ -638,14 +638,14 @@ static NvU32 uvm_mmu_biggest_page_size_up_to(uvm_page_tree_t *tree, NvU32 max_pa
page_sizes = gpu_page_sizes & (max_page_size | (max_page_size - 1));
// And pick the biggest one of them
page_size = 1 << __fls(page_sizes);
page_size = 1ULL << __fls(page_sizes);
UVM_ASSERT_MSG(uvm_mmu_page_size_supported(tree, page_size), "page_size 0x%x", page_size);
UVM_ASSERT_MSG(uvm_mmu_page_size_supported(tree, page_size), "page_size 0x%llx", page_size);
return page_size;
}
static NvU32 uvm_mmu_pte_size(uvm_page_tree_t *tree, NvU32 page_size)
static NvU32 uvm_mmu_pte_size(uvm_page_tree_t *tree, NvU64 page_size)
{
return tree->hal->entry_size(tree->hal->page_table_depth(page_size));
}

View File

@ -96,7 +96,7 @@ typedef struct
{
NvU64 base;
NvU64 size;
NvU32 page_size;
NvU64 page_size;
NvU32 depth;
uvm_membar_t membar;
} fake_tlb_invalidate_t;
@ -153,7 +153,7 @@ static void fake_tlb_invalidate_va(uvm_push_t *push,
NvU32 depth,
NvU64 base,
NvU64 size,
NvU32 page_size,
NvU64 page_size,
uvm_membar_t membar)
{
if (!g_fake_tlb_invals_tracking_enabled)
@ -249,7 +249,11 @@ static bool assert_last_invalidate_all(NvU32 expected_depth, bool expected_memba
}
static bool assert_invalidate_range_specific(fake_tlb_invalidate_t *inval,
NvU64 base, NvU64 size, NvU32 page_size, NvU32 expected_depth, bool expected_membar)
NvU64 base,
NvU64 size,
NvU64 page_size,
NvU32 expected_depth,
bool expected_membar)
{
UVM_ASSERT(g_fake_tlb_invals_tracking_enabled);
@ -271,7 +275,7 @@ static bool assert_invalidate_range_specific(fake_tlb_invalidate_t *inval,
return false;
}
if (inval->page_size != page_size && inval->base != 0 && inval->size != -1) {
UVM_TEST_PRINT("Expected page size %u, got %u instead\n", page_size, inval->page_size);
UVM_TEST_PRINT("Expected page size %llu, got %llu instead\n", page_size, inval->page_size);
return false;
}
@ -280,7 +284,7 @@ static bool assert_invalidate_range_specific(fake_tlb_invalidate_t *inval,
static bool assert_invalidate_range(NvU64 base,
NvU64 size,
NvU32 page_size,
NvU64 page_size,
bool allow_inval_all,
NvU32 range_depth,
NvU32 all_depth,
@ -325,7 +329,7 @@ static NV_STATUS test_page_tree_init_kernel(uvm_gpu_t *gpu, NvU32 big_page_size,
}
static NV_STATUS test_page_tree_get_ptes(uvm_page_tree_t *tree,
NvU32 page_size,
NvU64 page_size,
NvU64 start,
NvLength size,
uvm_page_table_range_t *range)
@ -341,7 +345,7 @@ static NV_STATUS test_page_tree_get_ptes(uvm_page_tree_t *tree,
}
static NV_STATUS test_page_tree_get_entry(uvm_page_tree_t *tree,
NvU32 page_size,
NvU64 page_size,
NvU64 start,
uvm_page_table_range_t *single)
{
@ -355,14 +359,14 @@ static NV_STATUS test_page_tree_get_entry(uvm_page_tree_t *tree,
}
static NV_STATUS test_page_tree_alloc_table(uvm_page_tree_t *tree,
NvU32 page_size,
NvU64 page_size,
uvm_page_table_range_t *single,
uvm_page_table_range_t *children)
{
return uvm_page_tree_alloc_table(tree, page_size, UVM_PMM_ALLOC_FLAGS_NONE, single, children);
}
static bool assert_entry_no_invalidate(uvm_page_tree_t *tree, NvU32 page_size, NvU64 start)
static bool assert_entry_no_invalidate(uvm_page_tree_t *tree, NvU64 page_size, NvU64 start)
{
uvm_page_table_range_t entry;
bool result = true;
@ -378,7 +382,7 @@ static bool assert_entry_no_invalidate(uvm_page_tree_t *tree, NvU32 page_size, N
return assert_no_invalidate() && result;
}
static bool assert_entry_invalidate(uvm_page_tree_t *tree, NvU32 page_size, NvU64 start, NvU32 depth, bool membar)
static bool assert_entry_invalidate(uvm_page_tree_t *tree, NvU64 page_size, NvU64 start, NvU32 depth, bool membar)
{
uvm_page_table_range_t entry;
bool result = true;
@ -932,8 +936,8 @@ static NV_STATUS split_and_free(uvm_gpu_t *gpu)
static NV_STATUS check_sizes(uvm_gpu_t *gpu)
{
NvU32 user_sizes = UVM_PAGE_SIZE_2M;
NvU32 kernel_sizes = UVM_PAGE_SIZE_4K | 256;
NvU64 user_sizes = UVM_PAGE_SIZE_2M;
NvU64 kernel_sizes = UVM_PAGE_SIZE_4K | 256;
if (UVM_PAGE_SIZE_64K >= PAGE_SIZE)
user_sizes |= UVM_PAGE_SIZE_64K;
@ -1161,7 +1165,7 @@ static NV_STATUS test_tlb_batch_invalidates_case(uvm_page_tree_t *tree,
return status;
}
static NV_STATUS test_tlb_batch_invalidates(uvm_gpu_t *gpu, const NvU32 *page_sizes, const NvU32 page_sizes_count)
static NV_STATUS test_tlb_batch_invalidates(uvm_gpu_t *gpu, const NvU64 *page_sizes, const NvU32 page_sizes_count)
{
NV_STATUS status = NV_OK;
uvm_page_tree_t tree;
@ -1177,8 +1181,8 @@ static NV_STATUS test_tlb_batch_invalidates(uvm_gpu_t *gpu, const NvU32 *page_si
for (min_index = 0; min_index < page_sizes_count; ++min_index) {
for (max_index = min_index; max_index < page_sizes_count; ++max_index) {
for (size_index = 0; size_index < ARRAY_SIZE(sizes_in_max_pages); ++size_index) {
NvU32 min_page_size = page_sizes[min_index];
NvU32 max_page_size = page_sizes[max_index];
NvU64 min_page_size = page_sizes[min_index];
NvU64 max_page_size = page_sizes[max_index];
NvU64 size = (NvU64)sizes_in_max_pages[size_index] * max_page_size;
TEST_CHECK_GOTO(test_tlb_batch_invalidates_case(&tree,
@ -1282,7 +1286,7 @@ static NV_STATUS test_range_vec_clear_ptes(uvm_page_table_range_vec_t *range_vec
static NV_STATUS test_range_vec_create(uvm_page_tree_t *tree,
NvU64 start,
NvU64 size,
NvU32 page_size,
NvU64 page_size,
uvm_page_table_range_vec_t **range_vec_out)
{
uvm_page_table_range_vec_t *range_vec;
@ -1303,7 +1307,7 @@ static NV_STATUS test_range_vec_create(uvm_page_tree_t *tree,
// Test page table range vector APIs.
// Notably the test leaks the page_tree and range_vec on error as it's hard to
// clean up on failure and the destructors would likely assert.
static NV_STATUS test_range_vec(uvm_gpu_t *gpu, NvU32 big_page_size, NvU32 page_size)
static NV_STATUS test_range_vec(uvm_gpu_t *gpu, NvU32 big_page_size, NvU64 page_size)
{
NV_STATUS status = NV_OK;
uvm_page_tree_t tree;
@ -1511,7 +1515,7 @@ static uvm_mmu_page_table_alloc_t fake_table_alloc(uvm_aperture_t aperture, NvU6
// Queries the supported page sizes of the GPU(uvm_gpu_t) and fills the
// page_sizes array up to MAX_NUM_PAGE_SIZE. Returns the number of elements in
// page_sizes;
size_t get_page_sizes(uvm_gpu_t *gpu, NvU32 *page_sizes)
size_t get_page_sizes(uvm_gpu_t *gpu, NvU64 *page_sizes)
{
unsigned long page_size_log2;
unsigned long page_sizes_bitvec;
@ -1524,7 +1528,7 @@ size_t get_page_sizes(uvm_gpu_t *gpu, NvU32 *page_sizes)
page_sizes_bitvec = hal->page_sizes();
for_each_set_bit(page_size_log2, &page_sizes_bitvec, BITS_PER_LONG) {
NvU32 page_size = (NvU32)(1ULL << page_size_log2);
NvU64 page_size = 1ULL << page_size_log2;
UVM_ASSERT(count < MAX_NUM_PAGE_SIZES);
page_sizes[count++] = page_size;
}
@ -1572,7 +1576,7 @@ typedef NV_STATUS (*entry_test_page_size_func)(uvm_gpu_t *gpu, size_t page_size)
static NV_STATUS entry_test_maxwell(uvm_gpu_t *gpu)
{
static const NvU32 big_page_sizes[] = {UVM_PAGE_SIZE_64K, UVM_PAGE_SIZE_128K};
static const NvU64 big_page_sizes[] = {UVM_PAGE_SIZE_64K, UVM_PAGE_SIZE_128K};
NvU64 pde_bits;
uvm_mmu_page_table_alloc_t *phys_allocs[2];
uvm_mmu_page_table_alloc_t alloc_sys = fake_table_alloc(UVM_APERTURE_SYS, 0x9999999000LL);
@ -1663,7 +1667,7 @@ static NV_STATUS entry_test_maxwell(uvm_gpu_t *gpu)
static NV_STATUS entry_test_pascal(uvm_gpu_t *gpu, entry_test_page_size_func entry_test_page_size)
{
NvU32 page_sizes[MAX_NUM_PAGE_SIZES];
NvU64 page_sizes[MAX_NUM_PAGE_SIZES];
NvU64 pde_bits[2];
size_t i, num_page_sizes;
uvm_mmu_page_table_alloc_t *phys_allocs[2] = {NULL, NULL};
@ -1759,7 +1763,7 @@ static NV_STATUS entry_test_pascal(uvm_gpu_t *gpu, entry_test_page_size_func ent
static NV_STATUS entry_test_volta(uvm_gpu_t *gpu, entry_test_page_size_func entry_test_page_size)
{
NvU32 page_sizes[MAX_NUM_PAGE_SIZES];
NvU64 page_sizes[MAX_NUM_PAGE_SIZES];
NvU64 pde_bits[2];
size_t i, num_page_sizes;
uvm_mmu_page_table_alloc_t *phys_allocs[2] = {NULL, NULL};
@ -1833,7 +1837,7 @@ static NV_STATUS entry_test_volta(uvm_gpu_t *gpu, entry_test_page_size_func entr
static NV_STATUS entry_test_ampere(uvm_gpu_t *gpu, entry_test_page_size_func entry_test_page_size)
{
NvU32 page_sizes[MAX_NUM_PAGE_SIZES];
NvU64 page_sizes[MAX_NUM_PAGE_SIZES];
NvU32 i, num_page_sizes;
num_page_sizes = get_page_sizes(gpu, page_sizes);
@ -1847,7 +1851,7 @@ static NV_STATUS entry_test_ampere(uvm_gpu_t *gpu, entry_test_page_size_func ent
static NV_STATUS entry_test_hopper(uvm_gpu_t *gpu, entry_test_page_size_func entry_test_page_size)
{
NV_STATUS status = NV_OK;
NvU32 page_sizes[MAX_NUM_PAGE_SIZES];
NvU64 page_sizes[MAX_NUM_PAGE_SIZES];
NvU64 pde_bits[2];
uvm_page_directory_t *dirs[5];
size_t i, num_page_sizes;
@ -2290,8 +2294,8 @@ static NV_STATUS fake_gpu_init_hopper(uvm_gpu_t *fake_gpu)
static NV_STATUS maxwell_test_page_tree(uvm_gpu_t *maxwell)
{
// create a fake Maxwell GPU for this test.
static const NvU32 big_page_sizes[] = {UVM_PAGE_SIZE_64K, UVM_PAGE_SIZE_128K};
NvU32 i, j, big_page_size, page_size;
static const NvU64 big_page_sizes[] = {UVM_PAGE_SIZE_64K, UVM_PAGE_SIZE_128K};
NvU64 i, j, big_page_size, page_size;
TEST_CHECK_RET(fake_gpu_init_maxwell(maxwell) == NV_OK);
@ -2320,7 +2324,7 @@ static NV_STATUS pascal_test_page_tree(uvm_gpu_t *pascal)
// create a fake Pascal GPU for this test.
NvU32 tlb_batch_saved_max_pages;
NvU32 i;
NvU32 page_sizes[MAX_NUM_PAGE_SIZES];
NvU64 page_sizes[MAX_NUM_PAGE_SIZES];
size_t num_page_sizes;
TEST_CHECK_RET(fake_gpu_init_pascal(pascal) == NV_OK);
@ -2381,7 +2385,7 @@ static NV_STATUS volta_test_page_tree(uvm_gpu_t *volta)
static NV_STATUS ampere_test_page_tree(uvm_gpu_t *ampere)
{
NvU32 i, tlb_batch_saved_max_pages;
NvU32 page_sizes[MAX_NUM_PAGE_SIZES];
NvU64 page_sizes[MAX_NUM_PAGE_SIZES];
size_t num_page_sizes;
TEST_CHECK_RET(fake_gpu_init_ampere(ampere) == NV_OK);

View File

@ -92,7 +92,13 @@ void uvm_hal_pascal_host_tlb_invalidate_all(uvm_push_t *push, uvm_gpu_phys_addre
uvm_hal_tlb_invalidate_membar(push, membar);
}
void uvm_hal_pascal_host_tlb_invalidate_va(uvm_push_t *push, uvm_gpu_phys_address_t pdb, NvU32 depth, NvU64 base, NvU64 size, NvU32 page_size, uvm_membar_t membar)
void uvm_hal_pascal_host_tlb_invalidate_va(uvm_push_t *push,
uvm_gpu_phys_address_t pdb,
NvU32 depth,
NvU64 base,
NvU64 size,
NvU64 page_size,
uvm_membar_t membar)
{
NvU32 aperture_value;
NvU32 page_table_level;
@ -127,9 +133,9 @@ void uvm_hal_pascal_host_tlb_invalidate_va(uvm_push_t *push, uvm_gpu_phys_addres
ack_value = HWCONST(C06F, MEM_OP_C, TLB_INVALIDATE_ACK_TYPE, GLOBALLY);
}
UVM_ASSERT_MSG(IS_ALIGNED(page_size, 1 << 12), "page_size 0x%x\n", page_size);
UVM_ASSERT_MSG(IS_ALIGNED(base, page_size), "base 0x%llx page_size 0x%x\n", base, page_size);
UVM_ASSERT_MSG(IS_ALIGNED(size, page_size), "size 0x%llx page_size 0x%x\n", size, page_size);
UVM_ASSERT_MSG(IS_ALIGNED(page_size, 1 << 12), "page_size 0x%llx\n", page_size);
UVM_ASSERT_MSG(IS_ALIGNED(base, page_size), "base 0x%llx page_size 0x%llx\n", base, page_size);
UVM_ASSERT_MSG(IS_ALIGNED(size, page_size), "size 0x%llx page_size 0x%llx\n", size, page_size);
UVM_ASSERT_MSG(size > 0, "size 0x%llx\n", size);
base >>= 12;

View File

@ -54,7 +54,7 @@ static NvU32 entries_per_index_pascal(NvU32 depth)
return 1;
}
static NvLength entry_offset_pascal(NvU32 depth, NvU32 page_size)
static NvLength entry_offset_pascal(NvU32 depth, NvU64 page_size)
{
UVM_ASSERT(depth < 5);
if (page_size == UVM_PAGE_SIZE_4K && depth == 3)
@ -178,7 +178,7 @@ static NvLength entry_size_pascal(NvU32 depth)
return 8;
}
static NvU32 index_bits_pascal(NvU32 depth, NvU32 page_size)
static NvU32 index_bits_pascal(NvU32 depth, NvU64 page_size)
{
static const NvU32 bit_widths[] = {2, 9, 9, 8};
// some code paths keep on querying this until they get a 0, meaning only the page offset remains.
@ -204,7 +204,7 @@ static NvU32 num_va_bits_pascal(void)
return 49;
}
static NvLength allocation_size_pascal(NvU32 depth, NvU32 page_size)
static NvLength allocation_size_pascal(NvU32 depth, NvU64 page_size)
{
UVM_ASSERT(depth < 5);
if (depth == 4 && page_size == UVM_PAGE_SIZE_64K)
@ -213,7 +213,7 @@ static NvLength allocation_size_pascal(NvU32 depth, NvU32 page_size)
return 4096;
}
static NvU32 page_table_depth_pascal(NvU32 page_size)
static NvU32 page_table_depth_pascal(NvU64 page_size)
{
if (page_size == UVM_PAGE_SIZE_2M)
return 3;
@ -221,12 +221,12 @@ static NvU32 page_table_depth_pascal(NvU32 page_size)
return 4;
}
static NvU32 page_sizes_pascal(void)
static NvU64 page_sizes_pascal(void)
{
return UVM_PAGE_SIZE_2M | UVM_PAGE_SIZE_64K | UVM_PAGE_SIZE_4K;
}
static NvU64 unmapped_pte_pascal(NvU32 page_size)
static NvU64 unmapped_pte_pascal(NvU64 page_size)
{
// Setting the privilege bit on an otherwise-zeroed big PTE causes the
// corresponding 4k PTEs to be ignored. This allows the invalidation of a
@ -362,7 +362,7 @@ static uvm_mmu_mode_hal_t pascal_mmu_mode_hal =
.page_sizes = page_sizes_pascal
};
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_pascal(NvU32 big_page_size)
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_pascal(NvU64 big_page_size)
{
UVM_ASSERT(big_page_size == UVM_PAGE_SIZE_64K || big_page_size == UVM_PAGE_SIZE_128K);

View File

@ -162,7 +162,7 @@ static void grow_fault_granularity_if_no_thrashing(uvm_perf_prefetch_bitmap_tree
}
static void grow_fault_granularity(uvm_perf_prefetch_bitmap_tree_t *bitmap_tree,
NvU32 big_page_size,
NvU64 big_page_size,
uvm_va_block_region_t big_pages_region,
uvm_va_block_region_t max_prefetch_region,
const uvm_page_mask_t *faulted_pages,
@ -245,7 +245,7 @@ static void update_bitmap_tree_from_va_block(uvm_perf_prefetch_bitmap_tree_t *bi
uvm_va_block_region_t max_prefetch_region)
{
NvU32 big_page_size;
NvU64 big_page_size;
uvm_va_block_region_t big_pages_region;
uvm_va_space_t *va_space;
const uvm_page_mask_t *thrashing_pages;

View File

@ -1987,21 +1987,12 @@ NV_STATUS uvm_perf_thrashing_init(void)
UVM_PERF_THRASHING_PIN_THRESHOLD_DEFAULT,
UVM_PERF_THRASHING_PIN_THRESHOLD_MAX);
// In Confidential Computing, the DMA path is slower due to cryptographic
// operations & other associated overhead. Enforce a larger window to allow
// the thrashing mitigation mechanisms to work properly.
if (g_uvm_global.conf_computing_enabled)
INIT_THRASHING_PARAMETER_NONZERO(uvm_perf_thrashing_lapse_usec, UVM_PERF_THRASHING_LAPSE_USEC_DEFAULT * 10);
else
INIT_THRASHING_PARAMETER_NONZERO(uvm_perf_thrashing_lapse_usec, UVM_PERF_THRASHING_LAPSE_USEC_DEFAULT);
INIT_THRASHING_PARAMETER_NONZERO(uvm_perf_thrashing_lapse_usec, UVM_PERF_THRASHING_LAPSE_USEC_DEFAULT);
INIT_THRASHING_PARAMETER_NONZERO_MAX(uvm_perf_thrashing_nap,
UVM_PERF_THRASHING_NAP_DEFAULT,
UVM_PERF_THRASHING_NAP_MAX);
INIT_THRASHING_PARAMETER_NONZERO(uvm_perf_thrashing_epoch, UVM_PERF_THRASHING_EPOCH_DEFAULT);
INIT_THRASHING_PARAMETER(uvm_perf_thrashing_pin, UVM_PERF_THRASHING_PIN_DEFAULT);

View File

@ -1890,8 +1890,11 @@ static uvm_gpu_chunk_t *claim_free_chunk(uvm_pmm_gpu_t *pmm, uvm_pmm_gpu_memory_
if (!chunk)
goto out;
UVM_ASSERT_MSG(uvm_gpu_chunk_get_size(chunk) == chunk_size, "chunk size %u expected %u\n",
uvm_gpu_chunk_get_size(chunk), chunk_size);
UVM_ASSERT_MSG(uvm_gpu_chunk_get_size(chunk) == chunk_size,
"chunk size %u expected %u\n",
uvm_gpu_chunk_get_size(chunk),
chunk_size);
UVM_ASSERT(chunk->type == type);
UVM_ASSERT(chunk->state == UVM_PMM_GPU_CHUNK_STATE_FREE);
UVM_ASSERT(!chunk_is_in_eviction(pmm, chunk));
@ -2756,7 +2759,7 @@ static bool uvm_pmm_should_inject_pma_eviction_error(uvm_pmm_gpu_t *pmm)
// See the documentation of pmaEvictPagesCb_t in pma.h for details of the
// expected semantics.
static NV_STATUS uvm_pmm_gpu_pma_evict_pages(void *void_pmm,
NvU32 page_size,
NvU64 page_size,
NvU64 *pages,
NvU32 num_pages_to_evict,
NvU64 phys_start,
@ -2861,7 +2864,7 @@ error:
}
static NV_STATUS uvm_pmm_gpu_pma_evict_pages_wrapper(void *void_pmm,
NvU32 page_size,
NvU64 page_size,
NvU64 *pages,
NvU32 num_pages_to_evict,
NvU64 phys_start,

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2023 NVIDIA Corporation
Copyright (c) 2015-2024 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -65,30 +65,30 @@
typedef enum
{
UVM_CHUNK_SIZE_1 = 1ULL,
UVM_CHUNK_SIZE_2 = 2ULL,
UVM_CHUNK_SIZE_4 = 4ULL,
UVM_CHUNK_SIZE_8 = 8ULL,
UVM_CHUNK_SIZE_16 = 16ULL,
UVM_CHUNK_SIZE_32 = 32ULL,
UVM_CHUNK_SIZE_64 = 64ULL,
UVM_CHUNK_SIZE_128 = 128ULL,
UVM_CHUNK_SIZE_256 = 256ULL,
UVM_CHUNK_SIZE_512 = 512ULL,
UVM_CHUNK_SIZE_1K = 1024ULL,
UVM_CHUNK_SIZE_2K = 2*1024ULL,
UVM_CHUNK_SIZE_4K = 4*1024ULL,
UVM_CHUNK_SIZE_8K = 8*1024ULL,
UVM_CHUNK_SIZE_16K = 16*1024ULL,
UVM_CHUNK_SIZE_32K = 32*1024ULL,
UVM_CHUNK_SIZE_64K = 64*1024ULL,
UVM_CHUNK_SIZE_128K = 128*1024ULL,
UVM_CHUNK_SIZE_256K = 256*1024ULL,
UVM_CHUNK_SIZE_512K = 512*1024ULL,
UVM_CHUNK_SIZE_1M = 1024*1024ULL,
UVM_CHUNK_SIZE_2M = 2*1024*1024ULL,
UVM_CHUNK_SIZE_1 = 1,
UVM_CHUNK_SIZE_2 = 2,
UVM_CHUNK_SIZE_4 = 4,
UVM_CHUNK_SIZE_8 = 8,
UVM_CHUNK_SIZE_16 = 16,
UVM_CHUNK_SIZE_32 = 32,
UVM_CHUNK_SIZE_64 = 64,
UVM_CHUNK_SIZE_128 = 128,
UVM_CHUNK_SIZE_256 = 256,
UVM_CHUNK_SIZE_512 = 512,
UVM_CHUNK_SIZE_1K = 1024,
UVM_CHUNK_SIZE_2K = 2*1024,
UVM_CHUNK_SIZE_4K = 4*1024,
UVM_CHUNK_SIZE_8K = 8*1024,
UVM_CHUNK_SIZE_16K = 16*1024,
UVM_CHUNK_SIZE_32K = 32*1024,
UVM_CHUNK_SIZE_64K = 64*1024,
UVM_CHUNK_SIZE_128K = 128*1024,
UVM_CHUNK_SIZE_256K = 256*1024,
UVM_CHUNK_SIZE_512K = 512*1024,
UVM_CHUNK_SIZE_1M = 1024*1024,
UVM_CHUNK_SIZE_2M = 2*1024*1024,
UVM_CHUNK_SIZE_MAX = UVM_CHUNK_SIZE_2M,
UVM_CHUNK_SIZE_INVALID = UVM_CHUNK_SIZE_MAX * 2ULL
UVM_CHUNK_SIZE_INVALID = UVM_CHUNK_SIZE_MAX * 2
} uvm_chunk_size_t;
#define UVM_CHUNK_SIZES_MASK (uvm_chunk_sizes_mask_t)(UVM_CHUNK_SIZE_MAX | (UVM_CHUNK_SIZE_MAX-1))

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2017-2023 NVIDIA Corporation
Copyright (c) 2017-2024 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -43,7 +43,7 @@ NV_STATUS uvm_pmm_sysmem_init(void)
// Ensure that only supported CPU chunk sizes are enabled.
uvm_cpu_chunk_allocation_sizes &= UVM_CPU_CHUNK_SIZES;
if (!uvm_cpu_chunk_allocation_sizes || !(uvm_cpu_chunk_allocation_sizes & PAGE_SIZE)) {
pr_info("Invalid value for uvm_cpu_chunk_allocation_sizes = 0x%x, using 0x%lx instead\n",
pr_info("Invalid value for uvm_cpu_chunk_allocation_sizes = 0x%x, using 0x%llx instead\n",
uvm_cpu_chunk_allocation_sizes,
UVM_CPU_CHUNK_SIZES);
uvm_cpu_chunk_allocation_sizes = UVM_CPU_CHUNK_SIZES;
@ -461,69 +461,12 @@ static NvU32 compute_gpu_mappings_entry_index(uvm_parent_processor_mask_t *dma_a
return uvm_parent_processor_mask_get_gpu_count(&subset_mask);
}
static void cpu_chunk_release(nv_kref_t *kref)
{
uvm_cpu_chunk_t *chunk = container_of(kref, uvm_cpu_chunk_t, refcount);
uvm_parent_processor_mask_t *mapping_mask;
uvm_parent_processor_id_t id;
uvm_cpu_physical_chunk_t *phys_chunk = NULL;
uvm_cpu_logical_chunk_t *logical_chunk = NULL;
if (uvm_cpu_chunk_is_physical(chunk)) {
phys_chunk = uvm_cpu_chunk_to_physical(chunk);
uvm_assert_mutex_unlocked(&phys_chunk->lock);
mapping_mask = &phys_chunk->gpu_mappings.dma_addrs_mask;
}
else {
logical_chunk = uvm_cpu_chunk_to_logical(chunk);
mapping_mask = &logical_chunk->mapped_gpus;
}
for_each_parent_id_in_mask(id, mapping_mask) {
uvm_parent_gpu_t *parent_gpu = uvm_parent_gpu_get(id);
uvm_cpu_chunk_unmap_parent_gpu_phys(chunk, parent_gpu);
}
if (uvm_cpu_chunk_is_physical(chunk)) {
if (phys_chunk->gpu_mappings.max_entries > 1)
uvm_kvfree(phys_chunk->gpu_mappings.dynamic_entries);
if (uvm_cpu_chunk_get_size(chunk) > PAGE_SIZE &&
!bitmap_empty(phys_chunk->dirty_bitmap, uvm_cpu_chunk_num_pages(chunk)))
SetPageDirty(phys_chunk->common.page);
uvm_kvfree(phys_chunk->dirty_bitmap);
if (chunk->type != UVM_CPU_CHUNK_TYPE_HMM)
put_page(phys_chunk->common.page);
}
else {
uvm_cpu_chunk_free(logical_chunk->parent);
}
uvm_kvfree(chunk);
}
static void uvm_cpu_chunk_get(uvm_cpu_chunk_t *chunk)
{
UVM_ASSERT(chunk);
nv_kref_get(&chunk->refcount);
}
void uvm_cpu_chunk_free(uvm_cpu_chunk_t *chunk)
{
if (!chunk)
return;
nv_kref_put(&chunk->refcount, cpu_chunk_release);
}
static uvm_cpu_physical_chunk_t *get_physical_parent(uvm_cpu_chunk_t *chunk)
{
UVM_ASSERT(chunk);
UVM_ASSERT(chunk->page);
while (!uvm_cpu_chunk_is_physical(chunk))
while (uvm_cpu_chunk_is_logical(chunk))
chunk = uvm_cpu_chunk_to_logical(chunk)->parent;
return uvm_cpu_chunk_to_physical(chunk);
@ -581,6 +524,7 @@ static uvm_cpu_phys_mapping_t *chunk_phys_mapping_alloc(uvm_cpu_physical_chunk_t
static uvm_cpu_phys_mapping_t *chunk_phys_mapping_get(uvm_cpu_physical_chunk_t *chunk, uvm_parent_gpu_id_t id)
{
uvm_assert_mutex_locked(&chunk->lock);
if (uvm_parent_processor_mask_test(&chunk->gpu_mappings.dma_addrs_mask, id)) {
if (chunk->gpu_mappings.max_entries == 1) {
return &chunk->gpu_mappings.static_entry;
@ -598,7 +542,6 @@ static void chunk_inc_gpu_mapping(uvm_cpu_physical_chunk_t *chunk, uvm_parent_gp
{
uvm_cpu_phys_mapping_t *mapping;
uvm_assert_mutex_locked(&chunk->lock);
mapping = chunk_phys_mapping_get(chunk, id);
UVM_ASSERT(mapping);
mapping->map_count++;
@ -608,7 +551,6 @@ static void chunk_dec_gpu_mapping(uvm_cpu_physical_chunk_t *chunk, uvm_parent_gp
{
uvm_cpu_phys_mapping_t *mapping;
uvm_assert_mutex_locked(&chunk->lock);
mapping = chunk_phys_mapping_get(chunk, id);
UVM_ASSERT(mapping);
UVM_ASSERT(mapping->dma_addr && mapping->map_count);
@ -616,6 +558,8 @@ static void chunk_dec_gpu_mapping(uvm_cpu_physical_chunk_t *chunk, uvm_parent_gp
if (mapping->map_count == 0) {
uvm_parent_gpu_t *parent_gpu = uvm_parent_gpu_get(id);
UVM_ASSERT(uvm_sub_processor_mask_empty(&mapping->sub_processors));
uvm_parent_gpu_unmap_cpu_pages(parent_gpu, mapping->dma_addr, uvm_cpu_chunk_get_size(&chunk->common));
mapping->dma_addr = 0;
if (chunk->gpu_mappings.max_entries > 1) {
@ -631,7 +575,7 @@ static void chunk_dec_gpu_mapping(uvm_cpu_physical_chunk_t *chunk, uvm_parent_gp
}
}
NvU64 uvm_cpu_chunk_get_parent_gpu_phys_addr(uvm_cpu_chunk_t *chunk, uvm_parent_gpu_t *parent_gpu)
NvU64 uvm_cpu_chunk_get_gpu_phys_addr(uvm_cpu_chunk_t *chunk, uvm_gpu_t *gpu)
{
uvm_cpu_physical_chunk_t *phys_chunk = get_physical_parent(chunk);
uvm_cpu_phys_mapping_t *mapping;
@ -641,36 +585,41 @@ NvU64 uvm_cpu_chunk_get_parent_gpu_phys_addr(uvm_cpu_chunk_t *chunk, uvm_parent_
if (uvm_cpu_chunk_is_logical(chunk)) {
uvm_cpu_logical_chunk_t *logical_chunk = uvm_cpu_chunk_to_logical(chunk);
if (!uvm_parent_processor_mask_test(&logical_chunk->mapped_gpus, parent_gpu->id))
if (!uvm_processor_mask_test(&logical_chunk->mapped_gpus, gpu->id))
return 0;
parent_offset = cpu_chunk_get_phys_index(logical_chunk);
}
uvm_mutex_lock(&phys_chunk->lock);
mapping = chunk_phys_mapping_get(phys_chunk, parent_gpu->id);
if (mapping)
mapping = chunk_phys_mapping_get(phys_chunk, gpu->parent->id);
if (mapping &&
(uvm_cpu_chunk_is_logical(chunk) ||
uvm_sub_processor_mask_test(&mapping->sub_processors, uvm_id_sub_processor_index(gpu->id))))
dma_addr = mapping->dma_addr + (parent_offset * PAGE_SIZE);
uvm_mutex_unlock(&phys_chunk->lock);
return dma_addr;
}
// Create a DMA mapping for the chunk on the given parent GPU. This will map the
// entire parent physical chunk on the GPU.
// Create a DMA mapping for the chunk on the given GPU. This will map the
// entire physical chunk on the parent GPU and record that a given MIG
// partition is using the mapping.
//
// Returns NV_OK on success. On error, any of the errors returned by
// uvm_parent_gpu_map_cpu_pages() can be returned. In the case that the DMA
// mapping structure could not be allocated, NV_ERR_NO_MEMORY is returned.
static NV_STATUS cpu_chunk_map_parent_gpu_phys(uvm_cpu_chunk_t *chunk, uvm_parent_gpu_t *parent_gpu)
static NV_STATUS cpu_chunk_map_gpu_phys(uvm_cpu_chunk_t *chunk, uvm_gpu_t *gpu)
{
uvm_parent_gpu_t *parent_gpu = gpu->parent;
uvm_cpu_physical_chunk_t *phys_chunk;
uvm_cpu_logical_chunk_t *logical_chunk = NULL;
uvm_cpu_phys_mapping_t *mapping;
NV_STATUS status = NV_OK;
if (uvm_cpu_chunk_is_logical(chunk)) {
logical_chunk = uvm_cpu_chunk_to_logical(chunk);
if (uvm_parent_processor_mask_test(&logical_chunk->mapped_gpus, parent_gpu->id))
if (uvm_processor_mask_test(&logical_chunk->mapped_gpus, gpu->id))
return status;
}
@ -679,7 +628,6 @@ static NV_STATUS cpu_chunk_map_parent_gpu_phys(uvm_cpu_chunk_t *chunk, uvm_paren
if (!uvm_parent_processor_mask_test(&phys_chunk->gpu_mappings.dma_addrs_mask, parent_gpu->id)) {
uvm_chunk_size_t chunk_size = uvm_cpu_chunk_get_size(&phys_chunk->common);
uvm_cpu_phys_mapping_t *mapping;
NvU64 dma_addr;
status = uvm_parent_gpu_map_cpu_pages(parent_gpu, phys_chunk->common.page, chunk_size, &dma_addr);
@ -695,39 +643,59 @@ static NV_STATUS cpu_chunk_map_parent_gpu_phys(uvm_cpu_chunk_t *chunk, uvm_paren
mapping->dma_addr = dma_addr;
mapping->map_count = 1;
uvm_sub_processor_mask_zero(&mapping->sub_processors);
if (!logical_chunk)
uvm_sub_processor_mask_set(&mapping->sub_processors, uvm_id_sub_processor_index(gpu->id));
uvm_parent_processor_mask_set(&phys_chunk->gpu_mappings.dma_addrs_mask, parent_gpu->id);
}
else {
// The mapping count on the physical chunk is only increased when
// mapping logical chunks.
if (uvm_cpu_chunk_is_logical(chunk))
chunk_inc_gpu_mapping(phys_chunk, parent_gpu->id);
mapping = chunk_phys_mapping_get(phys_chunk, parent_gpu->id);
UVM_ASSERT(mapping);
// Increment the map_count for logical chunks or the first time a
// MIG partition is sharing a physical chunk.
if (logical_chunk ||
!uvm_sub_processor_mask_test_and_set(&mapping->sub_processors, uvm_id_sub_processor_index(gpu->id)))
mapping->map_count++;
}
if (logical_chunk) {
uvm_processor_mask_set(&logical_chunk->mapped_gpus, gpu->id);
UVM_ASSERT(uvm_sub_processor_mask_empty(&mapping->sub_processors));
}
else {
UVM_ASSERT(!uvm_sub_processor_mask_empty(&mapping->sub_processors));
UVM_ASSERT(uvm_sub_processor_mask_get_count(&mapping->sub_processors) == mapping->map_count);
}
done:
uvm_mutex_unlock(&phys_chunk->lock);
if (status == NV_OK && uvm_cpu_chunk_is_logical(chunk))
uvm_parent_processor_mask_set(&logical_chunk->mapped_gpus, parent_gpu->id);
return status;
}
void uvm_cpu_chunk_unmap_parent_gpu_phys(uvm_cpu_chunk_t *chunk, uvm_parent_gpu_t *parent_gpu)
static void cpu_chunk_unmap_gpu_phys(uvm_cpu_chunk_t *chunk, uvm_gpu_id_t gpu_id)
{
uvm_cpu_physical_chunk_t *phys_chunk;
uvm_cpu_logical_chunk_t *logical_chunk;
uvm_cpu_physical_chunk_t *phys_chunk = get_physical_parent(chunk);
uvm_parent_gpu_id_t id = uvm_parent_gpu_id_from_gpu_id(gpu_id);
uvm_mutex_lock(&phys_chunk->lock);
if (uvm_cpu_chunk_is_logical(chunk)) {
logical_chunk = uvm_cpu_chunk_to_logical(chunk);
if (!uvm_parent_processor_mask_test_and_clear(&logical_chunk->mapped_gpus, parent_gpu->id))
return;
}
uvm_processor_mask_t *mapping_mask = &uvm_cpu_chunk_to_logical(chunk)->mapped_gpus;
phys_chunk = get_physical_parent(chunk);
uvm_mutex_lock(&phys_chunk->lock);
if (uvm_parent_processor_mask_test(&phys_chunk->gpu_mappings.dma_addrs_mask, parent_gpu->id))
chunk_dec_gpu_mapping(phys_chunk, parent_gpu->id);
if (uvm_processor_mask_test_and_clear(mapping_mask, gpu_id))
chunk_dec_gpu_mapping(phys_chunk, id);
}
else {
if (uvm_parent_processor_mask_test(&phys_chunk->gpu_mappings.dma_addrs_mask, id)) {
uvm_cpu_phys_mapping_t *mapping = chunk_phys_mapping_get(phys_chunk, id);
if (uvm_sub_processor_mask_test_and_clear(&mapping->sub_processors, uvm_id_sub_processor_index(gpu_id)))
chunk_dec_gpu_mapping(phys_chunk, id);
}
}
uvm_mutex_unlock(&phys_chunk->lock);
}
@ -737,17 +705,112 @@ NV_STATUS uvm_cpu_chunk_map_gpu(uvm_cpu_chunk_t *chunk, uvm_gpu_t *gpu)
NV_STATUS status;
uvm_chunk_size_t chunk_size = uvm_cpu_chunk_get_size(chunk);
status = cpu_chunk_map_parent_gpu_phys(chunk, gpu->parent);
status = cpu_chunk_map_gpu_phys(chunk, gpu);
if (status != NV_OK)
return status;
status = uvm_mmu_sysmem_map(gpu, uvm_cpu_chunk_get_parent_gpu_phys_addr(chunk, gpu->parent), chunk_size);
status = uvm_mmu_sysmem_map(gpu, uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu), chunk_size);
if (status != NV_OK)
uvm_cpu_chunk_unmap_parent_gpu_phys(chunk, gpu->parent);
cpu_chunk_unmap_gpu_phys(chunk, gpu->id);
return status;
}
void uvm_cpu_chunk_unmap_gpu(uvm_cpu_chunk_t *chunk, uvm_gpu_t *gpu)
{
cpu_chunk_unmap_gpu_phys(chunk, gpu->id);
// Note: there is no corresponding uvm_mmu_sysmem_unmap() for
// uvm_mmu_sysmem_map().
}
static void cpu_logical_chunk_release(uvm_cpu_logical_chunk_t *logical_chunk)
{
uvm_cpu_physical_chunk_t *phys_chunk = get_physical_parent(logical_chunk->parent);
uvm_processor_id_t gpu_id;
uvm_mutex_lock(&phys_chunk->lock);
for_each_id_in_mask(gpu_id, &logical_chunk->mapped_gpus)
chunk_dec_gpu_mapping(phys_chunk, uvm_parent_gpu_id_from_gpu_id(gpu_id));
uvm_mutex_unlock(&phys_chunk->lock);
uvm_cpu_chunk_free(logical_chunk->parent);
}
static void cpu_physical_chunk_release(uvm_cpu_chunk_t *chunk)
{
uvm_cpu_physical_chunk_t *phys_chunk = uvm_cpu_chunk_to_physical(chunk);
uvm_parent_processor_id_t id;
uvm_assert_mutex_unlocked(&phys_chunk->lock);
// There should be no other threads using this chunk but we lock it because
// of assertions in chunk_phys_mapping_get() and chunk_dec_gpu_mapping().
uvm_mutex_lock(&phys_chunk->lock);
for_each_parent_id_in_mask(id, &phys_chunk->gpu_mappings.dma_addrs_mask) {
uvm_cpu_phys_mapping_t *mapping = chunk_phys_mapping_get(phys_chunk, id);
NvU32 count;
UVM_ASSERT(mapping);
UVM_ASSERT(!uvm_sub_processor_mask_empty(&mapping->sub_processors));
// Get a count of set bits in the sub_processors mask then clear it so
// that chunk_dec_gpu_mapping() sees an empty mask when map_count == 0.
// Using for_each_sub_processor_in_mask could try to dereference
// mapping after map_count == 0 in the loop below.
count = uvm_sub_processor_mask_get_count(&mapping->sub_processors);
uvm_sub_processor_mask_zero(&mapping->sub_processors);
for (; count; count--)
chunk_dec_gpu_mapping(phys_chunk, id);
}
uvm_mutex_unlock(&phys_chunk->lock);
UVM_ASSERT(uvm_parent_processor_mask_empty(&phys_chunk->gpu_mappings.dma_addrs_mask));
if (phys_chunk->gpu_mappings.max_entries > 1)
uvm_kvfree(phys_chunk->gpu_mappings.dynamic_entries);
if (uvm_cpu_chunk_get_size(chunk) > PAGE_SIZE &&
!bitmap_empty(phys_chunk->dirty_bitmap, uvm_cpu_chunk_num_pages(chunk)))
SetPageDirty(chunk->page);
uvm_kvfree(phys_chunk->dirty_bitmap);
if (chunk->type != UVM_CPU_CHUNK_TYPE_HMM)
put_page(chunk->page);
}
static void cpu_chunk_release(nv_kref_t *kref)
{
uvm_cpu_chunk_t *chunk = container_of(kref, uvm_cpu_chunk_t, refcount);
if (uvm_cpu_chunk_is_logical(chunk))
cpu_logical_chunk_release(uvm_cpu_chunk_to_logical(chunk));
else
cpu_physical_chunk_release(chunk);
uvm_kvfree(chunk);
}
static void uvm_cpu_chunk_get(uvm_cpu_chunk_t *chunk)
{
UVM_ASSERT(chunk);
nv_kref_get(&chunk->refcount);
}
void uvm_cpu_chunk_free(uvm_cpu_chunk_t *chunk)
{
if (!chunk)
return;
nv_kref_put(&chunk->refcount, cpu_chunk_release);
}
static struct page *uvm_cpu_chunk_alloc_page(uvm_chunk_size_t alloc_size,
int nid,
uvm_cpu_chunk_alloc_flags_t alloc_flags)
@ -876,14 +939,37 @@ int uvm_cpu_chunk_get_numa_node(uvm_cpu_chunk_t *chunk)
return page_to_nid(chunk->page);
}
// Convert the mask of DMA mapped parent GPUs and the sub-processor mask into
// one uvm_processor_mask_t in 'dma_map_mask'.
static void get_dma_map_mask(uvm_cpu_physical_chunk_t *chunk, uvm_processor_mask_t *dma_map_mask)
{
uvm_parent_processor_id_t id;
NvU32 sub_index;
uvm_assert_mutex_locked(&chunk->lock);
for_each_parent_id_in_mask(id, &chunk->gpu_mappings.dma_addrs_mask) {
uvm_cpu_phys_mapping_t *mapping = chunk_phys_mapping_get(chunk, id);
for_each_sub_processor_index_in_mask(sub_index, &mapping->sub_processors) {
uvm_processor_id_t gpu_id = uvm_gpu_id_from_sub_processor(id, sub_index);
uvm_sub_processor_mask_clear(&mapping->sub_processors, sub_index);
uvm_processor_mask_set(dma_map_mask, gpu_id);
}
UVM_ASSERT(uvm_sub_processor_mask_empty(&mapping->sub_processors));
}
}
NV_STATUS uvm_cpu_chunk_split(uvm_cpu_chunk_t *chunk, uvm_cpu_chunk_t **new_chunks)
{
NV_STATUS status = NV_OK;
uvm_cpu_logical_chunk_t *new_chunk;
uvm_cpu_physical_chunk_t *phys_chunk = get_physical_parent(chunk);
uvm_cpu_logical_chunk_t *logical_chunk = NULL;
uvm_parent_processor_id_t id;
uvm_parent_processor_mask_t *dma_map_mask;
uvm_processor_id_t gpu_id;
uvm_processor_mask_t *dma_map_mask = NULL;
uvm_chunk_size_t new_size;
size_t num_new_chunks;
size_t num_subchunk_pages;
@ -902,21 +988,20 @@ NV_STATUS uvm_cpu_chunk_split(uvm_cpu_chunk_t *chunk, uvm_cpu_chunk_t **new_chun
// Get the largest size below the size of the input chunk.
new_size = uvm_chunk_find_prev_size(uvm_cpu_chunk_get_allocation_sizes(), uvm_cpu_chunk_get_size(chunk));
UVM_ASSERT(new_size);
UVM_ASSERT(new_size != UVM_CHUNK_SIZE_INVALID);
num_new_chunks = uvm_cpu_chunk_get_size(chunk) / new_size;
num_subchunk_pages = new_size / PAGE_SIZE;
if (uvm_cpu_chunk_is_physical(chunk)) {
dma_map_mask = &phys_chunk->gpu_mappings.dma_addrs_mask;
}
else {
if (uvm_cpu_chunk_is_logical(chunk)) {
logical_chunk = uvm_cpu_chunk_to_logical(chunk);
dma_map_mask = &logical_chunk->mapped_gpus;
}
uvm_mutex_lock(&phys_chunk->lock);
for (i = 0; i < num_new_chunks; i++) {
new_chunk = uvm_kvmalloc_zero(sizeof(*logical_chunk));
new_chunk = uvm_kvmalloc_zero(sizeof(*new_chunk));
if (!new_chunk) {
uvm_mutex_unlock(&phys_chunk->lock);
status = NV_ERR_NO_MEMORY;
@ -929,19 +1014,25 @@ NV_STATUS uvm_cpu_chunk_split(uvm_cpu_chunk_t *chunk, uvm_cpu_chunk_t **new_chun
nv_kref_init(&new_chunk->common.refcount);
new_chunk->parent = chunk;
uvm_cpu_chunk_get(new_chunk->parent);
for_each_parent_id_in_mask(id, dma_map_mask)
chunk_inc_gpu_mapping(phys_chunk, id);
uvm_parent_processor_mask_copy(&new_chunk->mapped_gpus, dma_map_mask);
if (i == 0 && !logical_chunk) {
dma_map_mask = &new_chunk->mapped_gpus;
get_dma_map_mask(phys_chunk, dma_map_mask);
}
else {
uvm_processor_mask_copy(&new_chunk->mapped_gpus, dma_map_mask);
}
for_each_id_in_mask(gpu_id, dma_map_mask)
chunk_inc_gpu_mapping(phys_chunk, uvm_parent_gpu_id_from_gpu_id(gpu_id));
new_chunks[i] = &new_chunk->common;
}
// Release the references that are held by the chunk being split.
for_each_parent_id_in_mask(id, dma_map_mask)
chunk_dec_gpu_mapping(phys_chunk, id);
for_each_id_in_mask(gpu_id, dma_map_mask)
chunk_dec_gpu_mapping(phys_chunk, uvm_parent_gpu_id_from_gpu_id(gpu_id));
// If the chunk being split is a logical chunk clear it's mapped_gpus mask.
if (uvm_cpu_chunk_is_logical(chunk))
uvm_parent_processor_mask_zero(&logical_chunk->mapped_gpus);
if (logical_chunk)
uvm_processor_mask_zero(&logical_chunk->mapped_gpus);
uvm_mutex_unlock(&phys_chunk->lock);
@ -963,7 +1054,7 @@ static bool verify_merging_chunks(uvm_cpu_chunk_t **chunks, size_t num_chunks)
{
uvm_cpu_logical_chunk_t *logical_chunk;
uvm_cpu_chunk_t *first_chunk_parent;
uvm_parent_processor_mask_t *first_chunk_mapped_gpus;
uvm_processor_mask_t *first_chunk_mapped_gpus;
uvm_chunk_size_t first_chunk_size;
size_t i;
@ -994,7 +1085,7 @@ static bool verify_merging_chunks(uvm_cpu_chunk_t **chunks, size_t num_chunks)
// 2.1 All mappings to GPUs in each of child chunks' masks that are
// not also present in the parent chunk's mask are destroyed.
// 2.2 mapped_gpus mask of the parent chunk remains unmodified.
UVM_ASSERT(uvm_parent_processor_mask_equal(&logical_chunk->mapped_gpus, first_chunk_mapped_gpus));
UVM_ASSERT(uvm_processor_mask_equal(&logical_chunk->mapped_gpus, first_chunk_mapped_gpus));
}
return true;
@ -1005,14 +1096,14 @@ uvm_cpu_chunk_t *uvm_cpu_chunk_merge(uvm_cpu_chunk_t **chunks)
uvm_cpu_chunk_t *parent;
uvm_cpu_logical_chunk_t *logical_chunk;
uvm_cpu_physical_chunk_t *phys_chunk;
uvm_parent_processor_id_t id;
uvm_processor_id_t gpu_id;
uvm_chunk_size_t chunk_size;
uvm_chunk_size_t parent_chunk_size;
size_t num_merge_chunks;
size_t i;
UVM_ASSERT(chunks);
UVM_ASSERT(!uvm_cpu_chunk_is_physical(chunks[0]));
UVM_ASSERT(uvm_cpu_chunk_is_logical(chunks[0]));
logical_chunk = uvm_cpu_chunk_to_logical(chunks[0]);
parent = logical_chunk->parent;
@ -1033,11 +1124,22 @@ uvm_cpu_chunk_t *uvm_cpu_chunk_merge(uvm_cpu_chunk_t **chunks)
phys_chunk = get_physical_parent(chunks[0]);
uvm_mutex_lock(&phys_chunk->lock);
for_each_parent_id_in_mask(id, &logical_chunk->mapped_gpus)
chunk_inc_gpu_mapping(phys_chunk, id);
if (!uvm_cpu_chunk_is_physical(parent))
uvm_parent_processor_mask_copy(&uvm_cpu_chunk_to_logical(parent)->mapped_gpus, &logical_chunk->mapped_gpus);
for_each_id_in_mask(gpu_id, &logical_chunk->mapped_gpus)
chunk_inc_gpu_mapping(phys_chunk, uvm_parent_gpu_id_from_gpu_id(gpu_id));
if (uvm_cpu_chunk_is_logical(parent)) {
uvm_processor_mask_copy(&uvm_cpu_chunk_to_logical(parent)->mapped_gpus, &logical_chunk->mapped_gpus);
}
else {
// Restore the mapping->sub_processors mask for each mapped GPU.
for_each_id_in_mask(gpu_id, &logical_chunk->mapped_gpus) {
uvm_cpu_phys_mapping_t *mapping = chunk_phys_mapping_get(phys_chunk, uvm_parent_gpu_id_from_gpu_id(gpu_id));
UVM_ASSERT(mapping);
uvm_sub_processor_mask_set(&mapping->sub_processors, uvm_id_sub_processor_index(gpu_id));
}
}
uvm_mutex_unlock(&phys_chunk->lock);

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2017-2023 NVIDIA Corporation
Copyright (c) 2017-2024 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -246,8 +246,19 @@ struct uvm_cpu_chunk_struct
typedef struct
{
// Physical GPU DMA address of the CPU chunk.
NvU64 dma_addr;
// Reference count of all sub_processors using this mapping across logical
// and physical chunks.
NvU32 map_count;
// Mask of MIG instances or physical GPU.
// This is only valid for physical CPU chunks that have not been split into
// logical chunks. When the chunk is split, all the
// uvm_cpu_logical_chunk_t::mapped_gpus masks have a bit set for each
// count in map_count and sub_processors is set to zero.
uvm_sub_processor_mask_t sub_processors;
} uvm_cpu_phys_mapping_t;
typedef struct
@ -304,7 +315,9 @@ typedef struct
// Pointer to the parent chunk (which could also be a logical chunk).
uvm_cpu_chunk_t *parent;
uvm_parent_processor_mask_t mapped_gpus;
// This is a reference per bit but also recorded in mapping->map_count.
uvm_processor_mask_t mapped_gpus;
} uvm_cpu_logical_chunk_t;
// Return the set of allowed CPU chunk allocation sizes.
@ -417,15 +430,15 @@ void uvm_cpu_chunk_free(uvm_cpu_chunk_t *chunk);
// For more details see uvm_mmu_sysmem_map().
NV_STATUS uvm_cpu_chunk_map_gpu(uvm_cpu_chunk_t *chunk, uvm_gpu_t *gpu);
// Destroy a CPU chunk's DMA mapping for the parent GPU.
// Destroy a CPU chunk's DMA mapping for the given GPU.
// If chunk is a logical chunk, this call may not necessarily destroy the DMA
// mapping of the parent physical chunk since all logical chunks share the
// parent's DMA mapping.
void uvm_cpu_chunk_unmap_parent_gpu_phys(uvm_cpu_chunk_t *chunk, uvm_parent_gpu_t *parent_gpu);
// mapping of the parent physical chunk since all logical chunks and MIG
// partitions share the parent's DMA mapping.
void uvm_cpu_chunk_unmap_gpu(uvm_cpu_chunk_t *chunk, uvm_gpu_t *gpu);
// Get the CPU chunk's DMA mapping address for the specified GPU ID.
// If there is no mapping for the GPU, 0 is returned.
NvU64 uvm_cpu_chunk_get_parent_gpu_phys_addr(uvm_cpu_chunk_t *chunk, uvm_parent_gpu_t *parent_gpu);
NvU64 uvm_cpu_chunk_get_gpu_phys_addr(uvm_cpu_chunk_t *chunk, uvm_gpu_t *gpu);
// Split a CPU chunk into a set of CPU chunks of the next size down from the set
// of enabled CPU chunk sizes.

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2017-2023 NVIDIA Corporation
Copyright (c) 2017-2024 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -626,7 +626,7 @@ static NV_STATUS test_cpu_chunk_mapping_access(uvm_cpu_chunk_t *chunk, uvm_gpu_t
TEST_NV_CHECK_RET(cpu_chunk_map_on_cpu(chunk, (void **)&cpu_addr));
memset(cpu_addr, 0, chunk_size);
dma_addr = uvm_cpu_chunk_get_parent_gpu_phys_addr(chunk, gpu->parent);
dma_addr = uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu);
gpu_addr = uvm_gpu_address_copy(gpu, uvm_gpu_phys_address(UVM_APERTURE_SYS, dma_addr));
TEST_NV_CHECK_GOTO(uvm_push_begin_acquire(gpu->channel_manager,
@ -733,21 +733,21 @@ static NV_STATUS test_cpu_chunk_mapping_basic_verify(uvm_gpu_t *gpu,
// - no GPU mapping address.
TEST_CHECK_GOTO(phys_chunk->gpu_mappings.max_entries == 1, done);
TEST_CHECK_GOTO(uvm_parent_processor_mask_get_gpu_count(&phys_chunk->gpu_mappings.dma_addrs_mask) == 0, done);
TEST_CHECK_GOTO(uvm_cpu_chunk_get_parent_gpu_phys_addr(chunk, gpu->parent) == 0, done);
TEST_CHECK_GOTO(uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu) == 0, done);
TEST_NV_CHECK_GOTO(uvm_cpu_chunk_map_gpu(chunk, gpu), done);
// Test basic access.
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(chunk, gpu), done);
// Test double map is harmless.
dma_addr = uvm_cpu_chunk_get_parent_gpu_phys_addr(chunk, gpu->parent);
dma_addr = uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu);
TEST_NV_CHECK_GOTO(uvm_cpu_chunk_map_gpu(chunk, gpu), done);
TEST_CHECK_GOTO(uvm_cpu_chunk_get_parent_gpu_phys_addr(chunk, gpu->parent) == dma_addr, done);
TEST_CHECK_GOTO(uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu) == dma_addr, done);
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(chunk, gpu), done);
// Test unmap, remap.
uvm_cpu_chunk_unmap_parent_gpu_phys(chunk, gpu->parent);
TEST_CHECK_GOTO(uvm_cpu_chunk_get_parent_gpu_phys_addr(chunk, gpu->parent) == 0, done);
uvm_cpu_chunk_unmap_gpu(chunk, gpu);
TEST_CHECK_GOTO(uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu) == 0, done);
TEST_NV_CHECK_GOTO(uvm_cpu_chunk_map_gpu(chunk, gpu), done);
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(chunk, gpu), done);
@ -768,6 +768,39 @@ static NV_STATUS test_cpu_chunk_mapping_basic(uvm_gpu_t *gpu, uvm_cpu_chunk_allo
return NV_OK;
}
// TODO: Bug 4351121: This won't actually test anything until uvm_test
// enumerates multiple MIG instances.
static NV_STATUS test_cpu_chunk_mig(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1)
{
NV_STATUS status = NV_OK;
uvm_cpu_chunk_t *chunk;
uvm_cpu_physical_chunk_t *phys_chunk;
NvU64 dma_addr_gpu0;
UVM_ASSERT(gpu0->parent == gpu1->parent);
TEST_NV_CHECK_RET(test_cpu_chunk_alloc(PAGE_SIZE, UVM_CPU_CHUNK_ALLOC_FLAGS_NONE, NUMA_NO_NODE, &chunk));
phys_chunk = uvm_cpu_chunk_to_physical(chunk);
TEST_NV_CHECK_GOTO(uvm_cpu_chunk_map_gpu(chunk, gpu0), done);
TEST_NV_CHECK_GOTO(uvm_cpu_chunk_map_gpu(chunk, gpu1), done);
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(chunk, gpu0), done);
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(chunk, gpu1), done);
// MIG instances in the same physical GPU share the same DMA addresses.
dma_addr_gpu0 = uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu0);
TEST_CHECK_GOTO(uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu1) == dma_addr_gpu0, done);
// Unmapping one GPU shouldn't affect the other.
uvm_cpu_chunk_unmap_gpu(chunk, gpu0);
TEST_CHECK_GOTO(uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu0) == 0, done);
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(chunk, gpu1), done);
done:
uvm_cpu_chunk_free(chunk);
return status;
}
static NV_STATUS test_cpu_chunk_mapping_array(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1, uvm_gpu_t *gpu2)
{
NV_STATUS status = NV_OK;
@ -783,8 +816,8 @@ static NV_STATUS test_cpu_chunk_mapping_array(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1,
TEST_NV_CHECK_GOTO(uvm_cpu_chunk_map_gpu(chunk, gpu2), done);
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(chunk, gpu1), done);
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(chunk, gpu2), done);
dma_addr_gpu1 = uvm_cpu_chunk_get_parent_gpu_phys_addr(chunk, gpu1->parent);
uvm_cpu_chunk_unmap_parent_gpu_phys(chunk, gpu2->parent);
dma_addr_gpu1 = uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu1);
uvm_cpu_chunk_unmap_gpu(chunk, gpu2);
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(chunk, gpu1), done);
TEST_NV_CHECK_GOTO(uvm_cpu_chunk_map_gpu(chunk, gpu0), done);
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(chunk, gpu0), done);
@ -798,7 +831,9 @@ static NV_STATUS test_cpu_chunk_mapping_array(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1,
// GPU1. It's true that we may get a false negative if both addresses
// happened to alias and we had a bug in how the addresses are shifted in
// the dense array, but that's better than intermittent failure.
TEST_CHECK_GOTO(uvm_cpu_chunk_get_parent_gpu_phys_addr(chunk, gpu1->parent) == dma_addr_gpu1, done);
// Also note that multiple MIG instances in the same physical GPU share the
// parent's physical DMA mapping.
TEST_CHECK_GOTO(uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu1) == dma_addr_gpu1, done);
done:
uvm_cpu_chunk_free(chunk);
@ -828,7 +863,7 @@ static NV_STATUS do_test_cpu_chunk_split_and_merge(uvm_cpu_chunk_t *chunk, uvm_g
TEST_NV_CHECK_GOTO(uvm_cpu_chunk_map_gpu(chunk, gpu), done_free);
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(chunk, gpu), done_free);
uvm_cpu_chunk_unmap_parent_gpu_phys(chunk, gpu->parent);
uvm_cpu_chunk_unmap_gpu(chunk, gpu);
TEST_NV_CHECK_GOTO(uvm_cpu_chunk_split(chunk, split_chunks), done_free);
TEST_CHECK_GOTO(nv_kref_read(&chunk->refcount) == num_split_chunks, done);
@ -845,13 +880,14 @@ static NV_STATUS do_test_cpu_chunk_split_and_merge(uvm_cpu_chunk_t *chunk, uvm_g
merged_chunk = uvm_cpu_chunk_merge(split_chunks);
TEST_CHECK_GOTO(uvm_cpu_chunk_get_size(merged_chunk) == size, done_free);
TEST_CHECK_GOTO(merged_chunk == chunk, done_free);
TEST_CHECK_GOTO(nv_kref_read(&chunk->refcount) == 1, done_free);
// Since all logical chunks were mapped, the entire merged chunk should
// be accessible without needing to map it.
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(merged_chunk, gpu), done_free);
// Test that GPU mappings are transferred after a split
phys_dma_addr = uvm_cpu_chunk_get_parent_gpu_phys_addr(chunk, gpu->parent);
phys_dma_addr = uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu);
TEST_NV_CHECK_GOTO(uvm_cpu_chunk_split(chunk, split_chunks), done_free);
@ -859,9 +895,9 @@ static NV_STATUS do_test_cpu_chunk_split_and_merge(uvm_cpu_chunk_t *chunk, uvm_g
NvU64 dma_addr;
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(split_chunks[i], gpu), done);
dma_addr = uvm_cpu_chunk_get_parent_gpu_phys_addr(split_chunks[i], gpu->parent);
dma_addr = uvm_cpu_chunk_get_gpu_phys_addr(split_chunks[i], gpu);
TEST_CHECK_GOTO(dma_addr == phys_dma_addr + (i * split_size), done);
uvm_cpu_chunk_unmap_parent_gpu_phys(split_chunks[i], gpu->parent);
uvm_cpu_chunk_unmap_gpu(split_chunks[i], gpu);
}
// Test that mapping one logical chunk does not affect others.
@ -871,7 +907,7 @@ static NV_STATUS do_test_cpu_chunk_split_and_merge(uvm_cpu_chunk_t *chunk, uvm_g
for (i = 0; i < num_split_chunks; i++) {
if (i != map_chunk)
TEST_CHECK_GOTO(uvm_cpu_chunk_get_parent_gpu_phys_addr(split_chunks[i], gpu->parent) == 0, done);
TEST_CHECK_GOTO(uvm_cpu_chunk_get_gpu_phys_addr(split_chunks[i], gpu) == 0, done);
}
if (split_size > PAGE_SIZE) {
@ -927,6 +963,118 @@ static NV_STATUS test_cpu_chunk_split_and_merge(uvm_gpu_t *gpu)
return NV_OK;
}
static NV_STATUS do_test_cpu_chunk_split_and_merge_2(uvm_cpu_chunk_t *chunk, uvm_gpu_t *gpu0, uvm_gpu_t *gpu1)
{
NV_STATUS status = NV_OK;
uvm_chunk_size_t size = uvm_cpu_chunk_get_size(chunk);
uvm_chunk_sizes_mask_t alloc_sizes = uvm_cpu_chunk_get_allocation_sizes();
size_t num_split_chunks;
uvm_cpu_chunk_t **split_chunks;
uvm_cpu_chunk_t *merged_chunk;
uvm_chunk_size_t split_size;
size_t i;
split_size = uvm_chunk_find_prev_size(alloc_sizes, size);
UVM_ASSERT(split_size != UVM_CHUNK_SIZE_INVALID);
num_split_chunks = size / split_size;
split_chunks = uvm_kvmalloc_zero(num_split_chunks * sizeof(*split_chunks));
if (!split_chunks)
return NV_ERR_NO_MEMORY;
// Map both GPUs.
TEST_NV_CHECK_GOTO(uvm_cpu_chunk_map_gpu(chunk, gpu0), done_free);
TEST_NV_CHECK_GOTO(uvm_cpu_chunk_map_gpu(chunk, gpu1), done_free);
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(chunk, gpu0), done_free);
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(chunk, gpu1), done_free);
// Then split.
TEST_NV_CHECK_GOTO(uvm_cpu_chunk_split(chunk, split_chunks), done_free);
TEST_CHECK_GOTO(nv_kref_read(&chunk->refcount) == num_split_chunks, done);
// Unmap gpu0 from all split chunks.
for (i = 0; i < num_split_chunks; i++) {
TEST_CHECK_GOTO(split_chunks[i], done);
TEST_CHECK_GOTO(uvm_cpu_chunk_is_logical(split_chunks[i]), done);
TEST_CHECK_GOTO(uvm_cpu_chunk_get_size(split_chunks[i]) == split_size, done);
uvm_cpu_chunk_unmap_gpu(split_chunks[i], gpu0);
TEST_CHECK_GOTO(uvm_cpu_chunk_get_gpu_phys_addr(split_chunks[i], gpu0) == 0, done);
// Test that gpu1 still has access.
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(split_chunks[i], gpu1), done);
}
// Test CPU chunk merging.
merged_chunk = uvm_cpu_chunk_merge(split_chunks);
TEST_CHECK_GOTO(uvm_cpu_chunk_get_size(merged_chunk) == size, done_free);
TEST_CHECK_GOTO(merged_chunk == chunk, done_free);
TEST_CHECK_GOTO(nv_kref_read(&chunk->refcount) == 1, done_free);
// Since all logical chunks were mapped, the entire merged chunk should
// be accessible without needing to map it.
TEST_CHECK_GOTO(uvm_cpu_chunk_get_gpu_phys_addr(merged_chunk, gpu0) == 0, done_free);
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(merged_chunk, gpu1), done_free);
// Unmap gpu1 so we start with a fully unmapped physical chunk.
uvm_cpu_chunk_unmap_gpu(chunk, gpu1);
// Split the physical chunk.
TEST_NV_CHECK_GOTO(uvm_cpu_chunk_split(chunk, split_chunks), done_free);
// Now map everything.
for (i = 0; i < num_split_chunks; i++) {
TEST_NV_CHECK_GOTO(uvm_cpu_chunk_map_gpu(split_chunks[i], gpu0), done);
TEST_NV_CHECK_GOTO(uvm_cpu_chunk_map_gpu(split_chunks[i], gpu1), done);
}
// Test CPU chunk merging with everything mapped.
merged_chunk = uvm_cpu_chunk_merge(split_chunks);
// At this point, all split chunks have been merged.
num_split_chunks = 0;
TEST_CHECK_GOTO(uvm_cpu_chunk_get_size(merged_chunk) == size, done_free);
TEST_CHECK_GOTO(merged_chunk == chunk, done_free);
// Since all logical chunks were mapped, the entire merged chunk should
// be accessible without needing to map it.
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(merged_chunk, gpu0), done_free);
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(merged_chunk, gpu1), done_free);
done:
for (i = 0; i < num_split_chunks; i++)
uvm_cpu_chunk_free(split_chunks[i]);
done_free:
uvm_kvfree(split_chunks);
return status;
}
static NV_STATUS test_cpu_chunk_split_and_merge_2(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1)
{
uvm_chunk_sizes_mask_t alloc_sizes = uvm_cpu_chunk_get_allocation_sizes();
uvm_chunk_size_t size;
size = uvm_chunk_find_next_size(alloc_sizes, PAGE_SIZE);
for_each_chunk_size_from(size, alloc_sizes) {
uvm_cpu_chunk_t *chunk;
NV_STATUS status;
// It is possible that the allocation fails due to lack of large pages
// rather than an API issue, which will result in a false negative.
// However, that should be very rare.
TEST_NV_CHECK_RET(test_cpu_chunk_alloc(size, UVM_CPU_CHUNK_ALLOC_FLAGS_NONE, NUMA_NO_NODE, &chunk));
status = do_test_cpu_chunk_split_and_merge_2(chunk, gpu0, gpu1);
uvm_cpu_chunk_free(chunk);
if (status != NV_OK)
return status;
}
return NV_OK;
}
static NV_STATUS test_cpu_chunk_dirty_split(uvm_cpu_chunk_t *chunk)
{
uvm_chunk_size_t size = uvm_cpu_chunk_get_size(chunk);
@ -1072,7 +1220,9 @@ done:
return status;
}
NV_STATUS do_test_cpu_chunk_free(uvm_cpu_chunk_t *chunk, uvm_va_space_t *va_space, uvm_processor_mask_t *test_gpus)
NV_STATUS do_test_cpu_chunk_free(uvm_cpu_chunk_t *chunk,
uvm_va_space_t *va_space,
const uvm_processor_mask_t *test_gpus)
{
NV_STATUS status = NV_OK;
uvm_cpu_chunk_t **split_chunks;
@ -1099,7 +1249,7 @@ NV_STATUS do_test_cpu_chunk_free(uvm_cpu_chunk_t *chunk, uvm_va_space_t *va_spac
chunk = NULL;
// Map every other chunk.
// The call to uvm_cpu_chunk_unmap_parent_gpu_phys() is here in case this
// The call to uvm_cpu_chunk_unmap_gpu() is here in case this
// is part of a double split (see below). In that case, the parent chunk
// would be either mapped or unmapped.
//
@ -1111,7 +1261,7 @@ NV_STATUS do_test_cpu_chunk_free(uvm_cpu_chunk_t *chunk, uvm_va_space_t *va_spac
if (i & (1 << uvm_id_gpu_index(gpu->id)))
TEST_NV_CHECK_GOTO(uvm_cpu_chunk_map_gpu(split_chunks[i], gpu), done);
else
uvm_cpu_chunk_unmap_parent_gpu_phys(split_chunks[i], gpu->parent);
uvm_cpu_chunk_unmap_gpu(split_chunks[i], gpu);
}
}
@ -1147,9 +1297,9 @@ NV_STATUS do_test_cpu_chunk_free(uvm_cpu_chunk_t *chunk, uvm_va_space_t *va_spac
TEST_CHECK_GOTO(uvm_cpu_chunk_get_size(split_chunks[j]) == split_size, done);
for_each_va_space_gpu_in_mask(gpu, va_space, test_gpus) {
if (j & (1 << uvm_id_gpu_index(gpu->id)))
TEST_CHECK_GOTO(uvm_cpu_chunk_get_parent_gpu_phys_addr(split_chunks[j], gpu->parent), done);
TEST_CHECK_GOTO(uvm_cpu_chunk_get_gpu_phys_addr(split_chunks[j], gpu), done);
else
TEST_CHECK_GOTO(!uvm_cpu_chunk_get_parent_gpu_phys_addr(split_chunks[j], gpu->parent), done);
TEST_CHECK_GOTO(!uvm_cpu_chunk_get_gpu_phys_addr(split_chunks[j], gpu), done);
}
}
}
@ -1168,7 +1318,8 @@ done_free:
return status;
}
NV_STATUS test_cpu_chunk_free(uvm_va_space_t *va_space, uvm_processor_mask_t *test_gpus)
NV_STATUS test_cpu_chunk_free(uvm_va_space_t *va_space,
const uvm_processor_mask_t *test_gpus)
{
uvm_cpu_chunk_t *chunk;
uvm_chunk_sizes_mask_t alloc_sizes = uvm_cpu_chunk_get_allocation_sizes();
@ -1204,6 +1355,50 @@ static NV_STATUS test_cpu_chunk_numa_alloc(uvm_va_space_t *va_space)
return NV_OK;
}
static uvm_gpu_t *find_first_parent_gpu(const uvm_processor_mask_t *test_gpus,
uvm_va_space_t *va_space)
{
return uvm_processor_mask_find_first_va_space_gpu(test_gpus, va_space);
}
static uvm_gpu_t *find_next_parent_gpu(const uvm_processor_mask_t *test_gpus,
uvm_va_space_t *va_space,
uvm_gpu_t *gpu)
{
uvm_gpu_t *next_gpu = gpu;
while (next_gpu) {
next_gpu = uvm_processor_mask_find_next_va_space_gpu(test_gpus, va_space, next_gpu);
if (!next_gpu || next_gpu->parent != gpu->parent)
break;
}
return next_gpu;
}
static void find_shared_gpu_pair(const uvm_processor_mask_t *test_gpus,
uvm_va_space_t *va_space,
uvm_gpu_t **out_gpu0,
uvm_gpu_t **out_gpu1)
{
uvm_gpu_t *gpu0 = uvm_processor_mask_find_first_va_space_gpu(test_gpus, va_space);
uvm_gpu_t *gpu1 = uvm_processor_mask_find_next_va_space_gpu(test_gpus, va_space, gpu0);
while (gpu1) {
if (gpu0->parent == gpu1->parent) {
*out_gpu0 = gpu0;
*out_gpu1 = gpu1;
return;
}
gpu0 = gpu1;
gpu1 = uvm_processor_mask_find_next_va_space_gpu(test_gpus, va_space, gpu0);
}
*out_gpu0 = NULL;
*out_gpu1 = NULL;
}
NV_STATUS uvm_test_cpu_chunk_api(UVM_TEST_CPU_CHUNK_API_PARAMS *params, struct file *filp)
{
uvm_va_space_t *va_space = uvm_va_space_get(filp);
@ -1228,13 +1423,29 @@ NV_STATUS uvm_test_cpu_chunk_api(UVM_TEST_CPU_CHUNK_API_PARAMS *params, struct f
TEST_NV_CHECK_GOTO(test_cpu_chunk_free(va_space, test_gpus), done);
TEST_NV_CHECK_GOTO(test_cpu_chunk_numa_alloc(va_space), done);
if (uvm_processor_mask_get_gpu_count(test_gpus) >= 3) {
uvm_gpu_t *gpu2, *gpu3;
if (uvm_processor_mask_get_gpu_count(test_gpus) >= 2) {
uvm_gpu_t *gpu2, *gpu3 = NULL;
gpu = uvm_processor_mask_find_first_va_space_gpu(test_gpus, va_space);
gpu2 = uvm_processor_mask_find_next_va_space_gpu(test_gpus, va_space, gpu);
gpu3 = uvm_processor_mask_find_next_va_space_gpu(test_gpus, va_space, gpu2);
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_array(gpu, gpu2, gpu3), done);
// Look for a pair of GPUs that don't share a common parent.
gpu = find_first_parent_gpu(test_gpus, va_space);
gpu2 = find_next_parent_gpu(test_gpus, va_space, gpu);
if (gpu2) {
TEST_NV_CHECK_GOTO(test_cpu_chunk_split_and_merge_2(gpu, gpu2), done);
// Look for a third physical GPU.
gpu3 = find_next_parent_gpu(test_gpus, va_space, gpu2);
if (gpu3)
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_array(gpu, gpu2, gpu3), done);
}
// Look for a pair of GPUs that share a common parent.
find_shared_gpu_pair(test_gpus, va_space, &gpu, &gpu2);
if (gpu) {
// Test MIG instances within the same parent GPU.
TEST_NV_CHECK_GOTO(test_cpu_chunk_split_and_merge_2(gpu, gpu2), done);
TEST_NV_CHECK_GOTO(test_cpu_chunk_mig(gpu, gpu2), done);
}
}
done:

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2023 NVIDIA Corporation
Copyright (c) 2023-2024 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -30,6 +30,8 @@ const uvm_processor_mask_t g_uvm_processor_mask_empty = { };
NV_STATUS uvm_processor_mask_cache_init(void)
{
BUILD_BUG_ON((8 * sizeof(((uvm_sub_processor_mask_t *)0)->bitmap)) < UVM_PARENT_ID_MAX_SUB_PROCESSORS);
g_uvm_processor_mask_cache = NV_KMEM_CACHE_CREATE("uvm_processor_mask_t", uvm_processor_mask_t);
if (!g_uvm_processor_mask_cache)
return NV_ERR_NO_MEMORY;

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2016-2023 NVIDIA Corporation
Copyright (c) 2016-2024 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -277,8 +277,6 @@ typedef uvm_processor_id_t uvm_gpu_id_t;
#define UVM_PARENT_ID_MAX_GPUS NV_MAX_DEVICES
#define UVM_PARENT_ID_MAX_PROCESSORS (UVM_PARENT_ID_MAX_GPUS + 1)
#define UVM_PARENT_ID_MAX_SUB_PROCESSORS 8
#define UVM_ID_MAX_GPUS (UVM_PARENT_ID_MAX_GPUS * UVM_PARENT_ID_MAX_SUB_PROCESSORS)
#define UVM_ID_MAX_PROCESSORS (UVM_ID_MAX_GPUS + 1)
#define UVM_MAX_UNIQUE_GPU_PAIRS SUM_FROM_0_TO_N(UVM_ID_MAX_GPUS - 1)
@ -292,6 +290,9 @@ typedef uvm_processor_id_t uvm_gpu_id_t;
#define UVM_ID_CHECK_BOUNDS(id) UVM_ASSERT_MSG(id.val <= UVM_ID_MAX_PROCESSORS, "id %u\n", id.val)
#define UVM_SUB_PROCESSOR_INDEX_CHECK_BOUNDS(sub_index) \
UVM_ASSERT_MSG((sub_index) < UVM_PARENT_ID_MAX_SUB_PROCESSORS, "sub_index %u\n", (sub_index))
static int uvm_parent_id_cmp(uvm_parent_processor_id_t id1, uvm_parent_processor_id_t id2)
{
UVM_PARENT_ID_CHECK_BOUNDS(id1);
@ -493,11 +494,16 @@ static uvm_gpu_id_t uvm_gpu_id_from_parent_gpu_id(const uvm_parent_gpu_id_t id)
static uvm_gpu_id_t uvm_gpu_id_from_sub_processor_index(NvU32 index, NvU32 sub_index)
{
UVM_ASSERT(index < UVM_PARENT_ID_MAX_GPUS);
UVM_ASSERT(sub_index < UVM_PARENT_ID_MAX_SUB_PROCESSORS);
UVM_SUB_PROCESSOR_INDEX_CHECK_BOUNDS(sub_index);
return uvm_gpu_id_from_index(index * UVM_PARENT_ID_MAX_SUB_PROCESSORS + sub_index);
}
static uvm_gpu_id_t uvm_gpu_id_from_sub_processor(uvm_parent_gpu_id_t id, NvU32 sub_index)
{
return uvm_gpu_id_from_sub_processor_index(uvm_parent_id_gpu_index(id), sub_index);
}
static uvm_parent_gpu_id_t uvm_parent_gpu_id_from_gpu_id(const uvm_gpu_id_t id)
{
UVM_ASSERT(UVM_ID_IS_GPU(id));
@ -525,6 +531,71 @@ UVM_PROCESSOR_MASK(uvm_processor_mask_t, \
extern const uvm_processor_mask_t g_uvm_processor_mask_cpu;
extern const uvm_processor_mask_t g_uvm_processor_mask_empty;
// This is similar to uvm_parent_processor_mask_t and uvm_processor_mask_t
// but defined as a NvU8 in order to save memory since DECLARE_BITMAP() uses
// unsigned long. It also means we need to define our own bitops.
// Note that these are not atomic operations.
typedef struct
{
NvU8 bitmap;
} uvm_sub_processor_mask_t;
static bool uvm_sub_processor_mask_test(const uvm_sub_processor_mask_t *mask, NvU32 sub_index)
{
UVM_SUB_PROCESSOR_INDEX_CHECK_BOUNDS(sub_index);
return mask->bitmap & (1 << sub_index);
}
static void uvm_sub_processor_mask_set(uvm_sub_processor_mask_t *mask, NvU32 sub_index)
{
UVM_SUB_PROCESSOR_INDEX_CHECK_BOUNDS(sub_index);
mask->bitmap |= 1 << sub_index;
}
static void uvm_sub_processor_mask_clear(uvm_sub_processor_mask_t *mask, NvU32 sub_index)
{
UVM_SUB_PROCESSOR_INDEX_CHECK_BOUNDS(sub_index);
mask->bitmap &= ~(1 << sub_index);
}
static bool uvm_sub_processor_mask_test_and_set(uvm_sub_processor_mask_t *mask, NvU32 sub_index)
{
bool result = uvm_sub_processor_mask_test(mask, sub_index);
if (!result)
uvm_sub_processor_mask_set(mask, sub_index);
return result;
}
static bool uvm_sub_processor_mask_test_and_clear(uvm_sub_processor_mask_t *mask, NvU32 sub_index)
{
bool result = uvm_sub_processor_mask_test(mask, sub_index);
if (result)
uvm_sub_processor_mask_clear(mask, sub_index);
return result;
}
static void uvm_sub_processor_mask_zero(uvm_sub_processor_mask_t *mask)
{
mask->bitmap = 0;
}
static bool uvm_sub_processor_mask_empty(const uvm_sub_processor_mask_t *mask)
{
return mask->bitmap == 0;
}
static NvU32 uvm_sub_processor_mask_get_count(const uvm_sub_processor_mask_t *mask)
{
return hweight8(mask->bitmap);
}
// Like uvm_processor_mask_subset() but ignores the CPU in the subset mask.
// Returns whether the GPUs in subset are a subset of the GPUs in mask.
bool uvm_processor_mask_gpu_subset(const uvm_processor_mask_t *subset,
@ -571,8 +642,28 @@ void uvm_parent_gpus_from_processor_mask(uvm_parent_processor_mask_t *parent_mas
i = uvm_gpu_id_next(i))
// Helper to iterate over all sub processor indexes.
#define for_each_sub_processor_index(i) \
for (i = 0; i < UVM_PARENT_ID_MAX_SUB_PROCESSORS; i++)
#define for_each_sub_processor_index(sub_index) \
for ((sub_index) = 0; (sub_index) < UVM_PARENT_ID_MAX_SUB_PROCESSORS; (sub_index)++)
static NvU32 uvm_sub_processor_mask_find_first_index(const uvm_sub_processor_mask_t *mask)
{
unsigned long bitmap = mask->bitmap;
return find_first_bit(&bitmap, UVM_PARENT_ID_MAX_SUB_PROCESSORS);
}
static NvU32 uvm_sub_processor_mask_find_next_index(const uvm_sub_processor_mask_t *mask, NvU32 min_index)
{
unsigned long bitmap = mask->bitmap;
return find_next_bit(&bitmap, UVM_PARENT_ID_MAX_SUB_PROCESSORS, min_index);
}
// Helper to iterate over all sub processor indexes in a given mask.
#define for_each_sub_processor_index_in_mask(sub_index, sub_mask) \
for ((sub_index) = uvm_sub_processor_mask_find_first_index((sub_mask)); \
(sub_index) < UVM_PARENT_ID_MAX_SUB_PROCESSORS; \
(sub_index) = uvm_sub_processor_mask_find_next_index((sub_mask), (sub_index) + 1))
// Helper to iterate over all valid processor ids.
#define for_each_id(i) for (i = UVM_ID_CPU; UVM_ID_IS_VALID(i); i = uvm_id_next(i))

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2022 NVidia Corporation
Copyright (c) 2015-2024 NVidia Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -191,7 +191,7 @@ typedef struct
NvU32 read_duplication; // Out (UVM_TEST_READ_DUPLICATION_POLICY)
NvProcessorUuid preferred_location; // Out
NvS32 preferred_cpu_nid; // Out
NvProcessorUuid accessed_by[UVM_MAX_PROCESSORS_V2]; // Out
NvProcessorUuid accessed_by[UVM_MAX_PROCESSORS]; // Out
NvU32 accessed_by_count; // Out
NvU32 type; // Out (UVM_TEST_VA_RANGE_TYPE)
union
@ -624,7 +624,7 @@ typedef struct
// Array of processors which have a resident copy of the page containing
// lookup_address.
NvProcessorUuid resident_on[UVM_MAX_PROCESSORS_V2]; // Out
NvProcessorUuid resident_on[UVM_MAX_PROCESSORS]; // Out
NvU32 resident_on_count; // Out
// If the memory is resident on the CPU, the NUMA node on which the page
@ -635,24 +635,24 @@ typedef struct
// system-page-sized portion of this allocation which contains
// lookup_address is guaranteed to be resident on the corresponding
// processor.
NvU32 resident_physical_size[UVM_MAX_PROCESSORS_V2]; // Out
NvU32 resident_physical_size[UVM_MAX_PROCESSORS]; // Out
// The physical address of the physical allocation backing lookup_address.
NvU64 resident_physical_address[UVM_MAX_PROCESSORS_V2] NV_ALIGN_BYTES(8); // Out
NvU64 resident_physical_address[UVM_MAX_PROCESSORS] NV_ALIGN_BYTES(8); // Out
// Array of processors which have a virtual mapping covering lookup_address.
NvProcessorUuid mapped_on[UVM_MAX_PROCESSORS_V2]; // Out
NvU32 mapping_type[UVM_MAX_PROCESSORS_V2]; // Out
NvU64 mapping_physical_address[UVM_MAX_PROCESSORS_V2] NV_ALIGN_BYTES(8); // Out
NvProcessorUuid mapped_on[UVM_MAX_PROCESSORS]; // Out
NvU32 mapping_type[UVM_MAX_PROCESSORS]; // Out
NvU64 mapping_physical_address[UVM_MAX_PROCESSORS] NV_ALIGN_BYTES(8); // Out
NvU32 mapped_on_count; // Out
// The size of the virtual mapping covering lookup_address on each
// mapped_on processor.
NvU32 page_size[UVM_MAX_PROCESSORS_V2]; // Out
NvU32 page_size[UVM_MAX_PROCESSORS]; // Out
// Array of processors which have physical memory populated that would back
// lookup_address if it was resident.
NvProcessorUuid populated_on[UVM_MAX_PROCESSORS_V2]; // Out
NvProcessorUuid populated_on[UVM_MAX_PROCESSORS]; // Out
NvU32 populated_on_count; // Out
NV_STATUS rmStatus; // Out

View File

@ -30,18 +30,18 @@ void uvm_tlb_batch_begin(uvm_page_tree_t *tree, uvm_tlb_batch_t *batch)
batch->tree = tree;
}
static NvU32 smallest_page_size(NvU32 page_sizes)
static NvU64 smallest_page_size(NvU64 page_sizes)
{
UVM_ASSERT(page_sizes != 0);
return 1u << __ffs(page_sizes);
return 1ULL << __ffs(page_sizes);
}
static NvU32 biggest_page_size(NvU32 page_sizes)
static NvU64 biggest_page_size(NvU64 page_sizes)
{
UVM_ASSERT(page_sizes != 0);
return 1u << __fls(page_sizes);
return 1ULL << __fls(page_sizes);
}
static void tlb_batch_flush_invalidate_per_va(uvm_tlb_batch_t *batch, uvm_push_t *push)
@ -53,8 +53,8 @@ static void tlb_batch_flush_invalidate_per_va(uvm_tlb_batch_t *batch, uvm_push_t
for (i = 0; i < batch->count; ++i) {
uvm_tlb_batch_range_t *entry = &batch->ranges[i];
NvU32 min_page_size = smallest_page_size(entry->page_sizes);
NvU32 max_page_size = biggest_page_size(entry->page_sizes);
NvU64 min_page_size = smallest_page_size(entry->page_sizes);
NvU64 max_page_size = biggest_page_size(entry->page_sizes);
// Use the depth of the max page size as it's the broadest
NvU32 depth = tree->hal->page_table_depth(max_page_size);
@ -113,7 +113,7 @@ void uvm_tlb_batch_end(uvm_tlb_batch_t *batch, uvm_push_t *push, uvm_membar_t tl
tlb_batch_flush_invalidate_per_va(batch, push);
}
void uvm_tlb_batch_invalidate(uvm_tlb_batch_t *batch, NvU64 start, NvU64 size, NvU32 page_sizes, uvm_membar_t tlb_membar)
void uvm_tlb_batch_invalidate(uvm_tlb_batch_t *batch, NvU64 start, NvU64 size, NvU64 page_sizes, uvm_membar_t tlb_membar)
{
uvm_tlb_batch_range_t *new_entry;

View File

@ -41,7 +41,7 @@ typedef struct
NvU64 size;
// Min and max page size ored together
NvU32 page_sizes;
NvU64 page_sizes;
} uvm_tlb_batch_range_t;
struct uvm_tlb_batch_struct
@ -63,7 +63,7 @@ struct uvm_tlb_batch_struct
NvU32 count;
// Biggest page size across all queued up invalidates
NvU32 biggest_page_size;
NvU64 biggest_page_size;
// Max membar across all queued up invalidates
uvm_membar_t membar;
@ -81,7 +81,7 @@ void uvm_tlb_batch_begin(uvm_page_tree_t *tree, uvm_tlb_batch_t *batch);
// If the membar parameter is not UVM_MEMBAR_NONE, the specified membar will
// be performed logically after the TLB invalidate such that all physical memory
// accesses using the old translations are ordered to the scope of the membar.
void uvm_tlb_batch_invalidate(uvm_tlb_batch_t *batch, NvU64 start, NvU64 size, NvU32 page_sizes, uvm_membar_t tlb_membar);
void uvm_tlb_batch_invalidate(uvm_tlb_batch_t *batch, NvU64 start, NvU64 size, NvU64 page_sizes, uvm_membar_t tlb_membar);
// End a TLB invalidate batch
//
@ -97,8 +97,12 @@ void uvm_tlb_batch_end(uvm_tlb_batch_t *batch, uvm_push_t *push, uvm_membar_t tl
// Helper for invalidating a single range immediately.
//
// Internally begins and ends a TLB batch.
static void uvm_tlb_batch_single_invalidate(uvm_page_tree_t *tree, uvm_push_t *push,
NvU64 start, NvU64 size, NvU32 page_sizes, uvm_membar_t tlb_membar)
static void uvm_tlb_batch_single_invalidate(uvm_page_tree_t *tree,
uvm_push_t *push,
NvU64 start,
NvU64 size,
NvU64 page_sizes,
uvm_membar_t tlb_membar)
{
uvm_tlb_batch_t batch;

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2016-2023 NVIDIA Corporation
Copyright (c) 2016-2024 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -57,20 +57,12 @@ typedef struct
struct list_head queue_nodes[UvmEventNumTypesAll];
struct page **queue_buffer_pages;
union
{
UvmEventEntry_V1 *queue_v1;
UvmEventEntry_V2 *queue_v2;
};
void *queue_buffer;
NvU32 queue_buffer_count;
NvU32 notification_threshold;
struct page **control_buffer_pages;
union
{
UvmToolsEventControlData_V1 *control_v1;
UvmToolsEventControlData_V2 *control_v2;
};
UvmToolsEventControlData *control;
wait_queue_head_t wait_queue;
bool is_wakeup_get_valid;
@ -398,16 +390,12 @@ static void destroy_event_tracker(uvm_tools_event_tracker_t *event_tracker)
if (event_tracker->is_queue) {
uvm_tools_queue_t *queue = &event_tracker->queue;
NvU64 buffer_size, control_size;
NvU64 buffer_size;
if (event_tracker->version == UvmToolsEventQueueVersion_V1) {
if (event_tracker->version == UvmToolsEventQueueVersion_V1)
buffer_size = queue->queue_buffer_count * sizeof(UvmEventEntry_V1);
control_size = sizeof(UvmToolsEventControlData_V1);
}
else {
else
buffer_size = queue->queue_buffer_count * sizeof(UvmEventEntry_V2);
control_size = sizeof(UvmToolsEventControlData_V2);
}
remove_event_tracker(va_space,
queue->queue_nodes,
@ -415,16 +403,16 @@ static void destroy_event_tracker(uvm_tools_event_tracker_t *event_tracker)
queue->subscribed_queues,
&queue->subscribed_queues);
if (queue->queue_v2 != NULL) {
if (queue->queue_buffer != NULL) {
unmap_user_pages(queue->queue_buffer_pages,
queue->queue_v2,
queue->queue_buffer,
buffer_size);
}
if (queue->control_v2 != NULL) {
if (queue->control != NULL) {
unmap_user_pages(queue->control_buffer_pages,
queue->control_v2,
control_size);
queue->control,
sizeof(UvmToolsEventControlData));
}
}
else {
@ -456,9 +444,9 @@ static void destroy_event_tracker(uvm_tools_event_tracker_t *event_tracker)
kmem_cache_free(g_tools_event_tracker_cache, event_tracker);
}
static void enqueue_event_v1(const UvmEventEntry_V1 *entry, uvm_tools_queue_t *queue)
static void enqueue_event(const void *entry, size_t entry_size, NvU8 eventType, uvm_tools_queue_t *queue)
{
UvmToolsEventControlData_V1 *ctrl = queue->control_v1;
UvmToolsEventControlData *ctrl = queue->control;
uvm_tools_queue_snapshot_t sn;
NvU32 queue_size = queue->queue_buffer_count;
NvU32 queue_mask = queue_size - 1;
@ -481,11 +469,11 @@ static void enqueue_event_v1(const UvmEventEntry_V1 *entry, uvm_tools_queue_t *q
// one free element means that the queue is full
if (((queue_size + sn.get_behind - sn.put_behind) & queue_mask) == 1) {
atomic64_inc((atomic64_t *)&ctrl->dropped + entry->eventData.eventType);
atomic64_inc((atomic64_t *)&ctrl->dropped + eventType);
goto unlock;
}
memcpy(queue->queue_v1 + sn.put_behind, entry, sizeof(*entry));
memcpy((char *)queue->queue_buffer + sn.put_behind * entry_size, entry, entry_size);
sn.put_behind = sn.put_ahead;
@ -509,79 +497,45 @@ unlock:
uvm_spin_unlock(&queue->lock);
}
static void enqueue_event_v1(const UvmEventEntry_V1 *entry, uvm_tools_queue_t *queue)
{
enqueue_event(entry, sizeof(*entry), entry->eventData.eventType, queue);
}
static void enqueue_event_v2(const UvmEventEntry_V2 *entry, uvm_tools_queue_t *queue)
{
UvmToolsEventControlData_V2 *ctrl = queue->control_v2;
uvm_tools_queue_snapshot_t sn;
NvU32 queue_size = queue->queue_buffer_count;
NvU32 queue_mask = queue_size - 1;
enqueue_event(entry, sizeof(*entry), entry->eventData.eventType, queue);
}
// Prevent processor speculation prior to accessing user-mapped memory to
// avoid leaking information from side-channel attacks. There are many
// possible paths leading to this point and it would be difficult and error-
// prone to audit all of them to determine whether user mode could guide
// this access to kernel memory under speculative execution, so to be on the
// safe side we'll just always block speculation.
nv_speculation_barrier();
static void uvm_tools_record_event(struct list_head *head,
const void *entry,
size_t entry_size,
NvU8 eventType)
{
uvm_tools_queue_t *queue;
uvm_spin_lock(&queue->lock);
UVM_ASSERT(eventType < UvmEventNumTypesAll);
// ctrl is mapped into user space with read and write permissions,
// so its values cannot be trusted.
sn.get_behind = atomic_read((atomic_t *)&ctrl->get_behind) & queue_mask;
sn.put_behind = atomic_read((atomic_t *)&ctrl->put_behind) & queue_mask;
sn.put_ahead = (sn.put_behind + 1) & queue_mask;
// one free element means that the queue is full
if (((queue_size + sn.get_behind - sn.put_behind) & queue_mask) == 1) {
atomic64_inc((atomic64_t *)&ctrl->dropped + entry->eventData.eventType);
goto unlock;
}
memcpy(queue->queue_v2 + sn.put_behind, entry, sizeof(*entry));
sn.put_behind = sn.put_ahead;
// put_ahead and put_behind will always be the same outside of queue->lock
// this allows the user-space consumer to choose either a 2 or 4 pointer synchronization approach
atomic_set((atomic_t *)&ctrl->put_ahead, sn.put_behind);
atomic_set((atomic_t *)&ctrl->put_behind, sn.put_behind);
sn.get_ahead = atomic_read((atomic_t *)&ctrl->get_ahead);
// if the queue needs to be woken up, only signal if we haven't signaled before for this value of get_ahead
if (queue_needs_wakeup(queue, &sn) && !(queue->is_wakeup_get_valid && queue->wakeup_get == sn.get_ahead)) {
queue->is_wakeup_get_valid = true;
queue->wakeup_get = sn.get_ahead;
wake_up_all(&queue->wait_queue);
}
unlock:
uvm_spin_unlock(&queue->lock);
list_for_each_entry(queue, head + eventType, queue_nodes[eventType])
enqueue_event(entry, entry_size, eventType, queue);
}
static void uvm_tools_record_event_v1(uvm_va_space_t *va_space, const UvmEventEntry_V1 *entry)
{
NvU8 eventType = entry->eventData.eventType;
uvm_tools_queue_t *queue;
UVM_ASSERT(eventType < UvmEventNumTypesAll);
uvm_assert_rwsem_locked(&va_space->tools.lock);
list_for_each_entry(queue, va_space->tools.queues_v1 + eventType, queue_nodes[eventType])
enqueue_event_v1(entry, queue);
uvm_tools_record_event(va_space->tools.queues_v1, entry, sizeof(*entry), eventType);
}
static void uvm_tools_record_event_v2(uvm_va_space_t *va_space, const UvmEventEntry_V2 *entry)
{
NvU8 eventType = entry->eventData.eventType;
uvm_tools_queue_t *queue;
UVM_ASSERT(eventType < UvmEventNumTypesAll);
uvm_assert_rwsem_locked(&va_space->tools.lock);
list_for_each_entry(queue, va_space->tools.queues_v2 + eventType, queue_nodes[eventType])
enqueue_event_v2(entry, queue);
uvm_tools_record_event(va_space->tools.queues_v2, entry, sizeof(*entry), eventType);
}
static bool counter_matches_processor(UvmCounterName counter, const NvProcessorUuid *processor)
@ -751,7 +705,7 @@ static unsigned uvm_tools_poll(struct file *filp, poll_table *wait)
int flags = 0;
uvm_tools_queue_snapshot_t sn;
uvm_tools_event_tracker_t *event_tracker;
UvmToolsEventControlData_V2 *ctrl;
UvmToolsEventControlData *ctrl;
if (uvm_global_get_status() != NV_OK)
return POLLERR;
@ -763,7 +717,7 @@ static unsigned uvm_tools_poll(struct file *filp, poll_table *wait)
uvm_spin_lock(&event_tracker->queue.lock);
event_tracker->queue.is_wakeup_get_valid = false;
ctrl = event_tracker->queue.control_v2;
ctrl = event_tracker->queue.control;
sn.get_ahead = atomic_read((atomic_t *)&ctrl->get_ahead);
sn.put_behind = atomic_read((atomic_t *)&ctrl->put_behind);
@ -878,6 +832,24 @@ static void record_gpu_fault_instance(uvm_gpu_t *gpu,
}
}
static void record_cpu_fault(UvmEventCpuFaultInfo *info, uvm_perf_event_data_t *event_data)
{
info->eventType = UvmEventTypeCpuFault;
if (event_data->fault.cpu.is_write)
info->accessType = UvmEventMemoryAccessTypeWrite;
else
info->accessType = UvmEventMemoryAccessTypeRead;
info->address = event_data->fault.cpu.fault_va;
info->timeStamp = NV_GETTIME();
// assume that current owns va_space
info->pid = uvm_get_stale_process_id();
info->threadId = uvm_get_stale_thread_id();
info->pc = event_data->fault.cpu.pc;
// TODO: Bug 4515381: set info->nid when we decide if it's NUMA node ID or
// CPU ID.
}
static void uvm_tools_record_fault(uvm_perf_event_t event_id, uvm_perf_event_data_t *event_data)
{
uvm_va_space_t *va_space = event_data->fault.space;
@ -895,41 +867,17 @@ static void uvm_tools_record_fault(uvm_perf_event_t event_id, uvm_perf_event_dat
if (UVM_ID_IS_CPU(event_data->fault.proc_id)) {
if (tools_is_event_enabled_version(va_space, UvmEventTypeCpuFault, UvmToolsEventQueueVersion_V1)) {
UvmEventEntry_V1 entry;
UvmEventCpuFaultInfo_V1 *info = &entry.eventData.cpuFault;
memset(&entry, 0, sizeof(entry));
info->eventType = UvmEventTypeCpuFault;
if (event_data->fault.cpu.is_write)
info->accessType = UvmEventMemoryAccessTypeWrite;
else
info->accessType = UvmEventMemoryAccessTypeRead;
info->address = event_data->fault.cpu.fault_va;
info->timeStamp = NV_GETTIME();
// assume that current owns va_space
info->pid = uvm_get_stale_process_id();
info->threadId = uvm_get_stale_thread_id();
info->pc = event_data->fault.cpu.pc;
record_cpu_fault(&entry.eventData.cpuFault, event_data);
uvm_tools_record_event_v1(va_space, &entry);
}
if (tools_is_event_enabled_version(va_space, UvmEventTypeCpuFault, UvmToolsEventQueueVersion_V2)) {
UvmEventEntry_V2 entry;
UvmEventCpuFaultInfo_V2 *info = &entry.eventData.cpuFault;
memset(&entry, 0, sizeof(entry));
info->eventType = UvmEventTypeCpuFault;
if (event_data->fault.cpu.is_write)
info->accessType = UvmEventMemoryAccessTypeWrite;
else
info->accessType = UvmEventMemoryAccessTypeRead;
info->address = event_data->fault.cpu.fault_va;
info->timeStamp = NV_GETTIME();
// assume that current owns va_space
info->pid = uvm_get_stale_process_id();
info->threadId = uvm_get_stale_thread_id();
info->pc = event_data->fault.cpu.pc;
record_cpu_fault(&entry.eventData.cpuFault, event_data);
uvm_tools_record_event_v2(va_space, &entry);
}
@ -1834,7 +1782,7 @@ void uvm_tools_record_thrashing(uvm_va_space_t *va_space,
info->size = region_size;
info->timeStamp = NV_GETTIME();
BUILD_BUG_ON(UVM_MAX_PROCESSORS_V2 < UVM_ID_MAX_PROCESSORS);
BUILD_BUG_ON(UVM_MAX_PROCESSORS < UVM_ID_MAX_PROCESSORS);
bitmap_copy((long unsigned *)&info->processors, processors->bitmap, UVM_ID_MAX_PROCESSORS);
uvm_tools_record_event_v2(va_space, &entry);
@ -2151,7 +2099,7 @@ NV_STATUS uvm_api_tools_init_event_tracker(UVM_TOOLS_INIT_EVENT_TRACKER_PARAMS *
event_tracker->is_queue = params->queueBufferSize != 0;
if (event_tracker->is_queue) {
uvm_tools_queue_t *queue = &event_tracker->queue;
NvU64 buffer_size, control_size;
NvU64 buffer_size;
uvm_spin_lock_init(&queue->lock, UVM_LOCK_ORDER_LEAF);
init_waitqueue_head(&queue->wait_queue);
@ -2170,25 +2118,21 @@ NV_STATUS uvm_api_tools_init_event_tracker(UVM_TOOLS_INIT_EVENT_TRACKER_PARAMS *
goto fail;
}
if (event_tracker->version == UvmToolsEventQueueVersion_V1) {
if (event_tracker->version == UvmToolsEventQueueVersion_V1)
buffer_size = queue->queue_buffer_count * sizeof(UvmEventEntry_V1);
control_size = sizeof(UvmToolsEventControlData_V1);
}
else {
else
buffer_size = queue->queue_buffer_count * sizeof(UvmEventEntry_V2);
control_size = sizeof(UvmToolsEventControlData_V2);
}
status = map_user_pages(params->queueBuffer,
buffer_size,
(void **)&queue->queue_v2,
&queue->queue_buffer,
&queue->queue_buffer_pages);
if (status != NV_OK)
goto fail;
status = map_user_pages(params->controlBuffer,
control_size,
(void **)&queue->control_v2,
sizeof(UvmToolsEventControlData),
(void **)&queue->control,
&queue->control_buffer_pages);
if (status != NV_OK)
@ -2224,6 +2168,7 @@ NV_STATUS uvm_api_tools_set_notification_threshold(UVM_TOOLS_SET_NOTIFICATION_TH
{
uvm_tools_queue_snapshot_t sn;
uvm_tools_event_tracker_t *event_tracker = tools_event_tracker(filp);
UvmToolsEventControlData *ctrl;
if (!tracker_is_queue(event_tracker))
return NV_ERR_INVALID_ARGUMENT;
@ -2232,18 +2177,9 @@ NV_STATUS uvm_api_tools_set_notification_threshold(UVM_TOOLS_SET_NOTIFICATION_TH
event_tracker->queue.notification_threshold = params->notificationThreshold;
if (event_tracker->version == UvmToolsEventQueueVersion_V1) {
UvmToolsEventControlData_V1 *ctrl = event_tracker->queue.control_v1;
sn.put_behind = atomic_read((atomic_t *)&ctrl->put_behind);
sn.get_ahead = atomic_read((atomic_t *)&ctrl->get_ahead);
}
else {
UvmToolsEventControlData_V2 *ctrl = event_tracker->queue.control_v2;
sn.put_behind = atomic_read((atomic_t *)&ctrl->put_behind);
sn.get_ahead = atomic_read((atomic_t *)&ctrl->get_ahead);
}
ctrl = event_tracker->queue.control;
sn.put_behind = atomic_read((atomic_t *)&ctrl->put_behind);
sn.get_ahead = atomic_read((atomic_t *)&ctrl->get_ahead);
if (queue_needs_wakeup(&event_tracker->queue, &sn))
wake_up_all(&event_tracker->queue.wait_queue);

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2017-2021 NVIDIA Corporation
Copyright (c) 2017-2024 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -104,3 +104,248 @@ void uvm_hal_turing_host_set_gpfifo_entry(NvU64 *fifo_entry,
*fifo_entry = fifo_entry_value;
}
void uvm_hal_turing_host_tlb_invalidate_all(uvm_push_t *push,
uvm_gpu_phys_address_t pdb,
NvU32 depth,
uvm_membar_t membar)
{
NvU32 aperture_value;
NvU32 page_table_level;
NvU32 pdb_lo;
NvU32 pdb_hi;
NvU32 ack_value = 0;
NvU32 sysmembar_value = 0;
UVM_ASSERT_MSG(pdb.aperture == UVM_APERTURE_VID || pdb.aperture == UVM_APERTURE_SYS, "aperture: %u", pdb.aperture);
if (pdb.aperture == UVM_APERTURE_VID)
aperture_value = HWCONST(C46F, MEM_OP_C, TLB_INVALIDATE_PDB_APERTURE, VID_MEM);
else
aperture_value = HWCONST(C46F, MEM_OP_C, TLB_INVALIDATE_PDB_APERTURE, SYS_MEM_COHERENT);
UVM_ASSERT_MSG(IS_ALIGNED(pdb.address, 1 << 12), "pdb 0x%llx\n", pdb.address);
pdb.address >>= 12;
pdb_lo = pdb.address & HWMASK(C46F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO);
pdb_hi = pdb.address >> HWSIZE(C46F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO);
// PDE3 is the highest level on Pascal-Turing, see the comment in
// uvm_pascal_mmu.c for details.
UVM_ASSERT_MSG(depth < NVC46F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE3, "depth %u", depth);
page_table_level = NVC46F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE3 - depth;
if (membar != UVM_MEMBAR_NONE) {
// If a GPU or SYS membar is needed, ACK_TYPE needs to be set to
// GLOBALLY to make sure all the pending accesses can be picked up by
// the membar.
ack_value = HWCONST(C46F, MEM_OP_C, TLB_INVALIDATE_ACK_TYPE, GLOBALLY);
}
if (membar == UVM_MEMBAR_SYS)
sysmembar_value = HWCONST(C46F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, EN);
else
sysmembar_value = HWCONST(C46F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS);
NV_PUSH_4U(C46F, MEM_OP_A, sysmembar_value,
MEM_OP_B, 0,
MEM_OP_C, HWCONST(C46F, MEM_OP_C, TLB_INVALIDATE_PDB, ONE) |
HWVALUE(C46F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO, pdb_lo) |
HWCONST(C46F, MEM_OP_C, TLB_INVALIDATE_GPC, ENABLE) |
HWCONST(C46F, MEM_OP_C, TLB_INVALIDATE_REPLAY, NONE) |
HWVALUE(C46F, MEM_OP_C, TLB_INVALIDATE_PAGE_TABLE_LEVEL, page_table_level) |
aperture_value |
ack_value,
MEM_OP_D, HWCONST(C46F, MEM_OP_D, OPERATION, MMU_TLB_INVALIDATE) |
HWVALUE(C46F, MEM_OP_D, TLB_INVALIDATE_PDB_ADDR_HI, pdb_hi));
// GPU membar still requires an explicit membar method.
if (membar == UVM_MEMBAR_GPU)
uvm_push_get_gpu(push)->parent->host_hal->membar_gpu(push);
}
void uvm_hal_turing_host_tlb_invalidate_va(uvm_push_t *push,
uvm_gpu_phys_address_t pdb,
NvU32 depth,
NvU64 base,
NvU64 size,
NvU64 page_size,
uvm_membar_t membar)
{
NvU32 aperture_value;
NvU32 page_table_level;
NvU32 pdb_lo;
NvU32 pdb_hi;
NvU32 ack_value = 0;
NvU32 sysmembar_value = 0;
NvU32 va_lo;
NvU32 va_hi;
NvU64 end;
NvU64 actual_base;
NvU64 actual_size;
NvU64 actual_end;
NvU32 log2_invalidation_size;
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
UVM_ASSERT_MSG(IS_ALIGNED(page_size, 1 << 12), "page_size 0x%llx\n", page_size);
UVM_ASSERT_MSG(IS_ALIGNED(base, page_size), "base 0x%llx page_size 0x%llx\n", base, page_size);
UVM_ASSERT_MSG(IS_ALIGNED(size, page_size), "size 0x%llx page_size 0x%llx\n", size, page_size);
UVM_ASSERT_MSG(size > 0, "size 0x%llx\n", size);
// The invalidation size must be a power-of-two number of pages containing
// the passed interval
end = base + size - 1;
log2_invalidation_size = __fls((unsigned long)(end ^ base)) + 1;
if (log2_invalidation_size == 64) {
// Invalidate everything
gpu->parent->host_hal->tlb_invalidate_all(push, pdb, depth, membar);
return;
}
// The hardware aligns the target address down to the invalidation size.
actual_size = 1ULL << log2_invalidation_size;
actual_base = UVM_ALIGN_DOWN(base, actual_size);
actual_end = actual_base + actual_size - 1;
UVM_ASSERT(actual_end >= end);
// The invalidation size field expects log2(invalidation size in 4K), not
// log2(invalidation size in bytes)
log2_invalidation_size -= 12;
// Address to invalidate, as a multiple of 4K.
base >>= 12;
va_lo = base & HWMASK(C46F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO);
va_hi = base >> HWSIZE(C46F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO);
UVM_ASSERT_MSG(pdb.aperture == UVM_APERTURE_VID || pdb.aperture == UVM_APERTURE_SYS, "aperture: %u", pdb.aperture);
if (pdb.aperture == UVM_APERTURE_VID)
aperture_value = HWCONST(C46F, MEM_OP_C, TLB_INVALIDATE_PDB_APERTURE, VID_MEM);
else
aperture_value = HWCONST(C46F, MEM_OP_C, TLB_INVALIDATE_PDB_APERTURE, SYS_MEM_COHERENT);
UVM_ASSERT_MSG(IS_ALIGNED(pdb.address, 1 << 12), "pdb 0x%llx\n", pdb.address);
pdb.address >>= 12;
pdb_lo = pdb.address & HWMASK(C46F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO);
pdb_hi = pdb.address >> HWSIZE(C46F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO);
// PDE3 is the highest level on Pascal-Turing, see the comment in
// uvm_pascal_mmu.c for details.
UVM_ASSERT_MSG(depth < NVC46F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE3, "depth %u", depth);
page_table_level = NVC46F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE3 - depth;
if (membar != UVM_MEMBAR_NONE) {
// If a GPU or SYS membar is needed, ACK_TYPE needs to be set to
// GLOBALLY to make sure all the pending accesses can be picked up by
// the membar.
ack_value = HWCONST(C46F, MEM_OP_C, TLB_INVALIDATE_ACK_TYPE, GLOBALLY);
}
if (membar == UVM_MEMBAR_SYS)
sysmembar_value = HWCONST(C46F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, EN);
else
sysmembar_value = HWCONST(C46F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS);
NV_PUSH_4U(C46F, MEM_OP_A, HWVALUE(C46F, MEM_OP_A, TLB_INVALIDATE_INVALIDATION_SIZE, log2_invalidation_size) |
sysmembar_value |
HWVALUE(C46F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO, va_lo),
MEM_OP_B, HWVALUE(C46F, MEM_OP_B, TLB_INVALIDATE_TARGET_ADDR_HI, va_hi),
MEM_OP_C, HWCONST(C46F, MEM_OP_C, TLB_INVALIDATE_PDB, ONE) |
HWVALUE(C46F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO, pdb_lo) |
HWCONST(C46F, MEM_OP_C, TLB_INVALIDATE_GPC, ENABLE) |
HWCONST(C46F, MEM_OP_C, TLB_INVALIDATE_REPLAY, NONE) |
HWVALUE(C46F, MEM_OP_C, TLB_INVALIDATE_PAGE_TABLE_LEVEL, page_table_level) |
aperture_value |
ack_value,
MEM_OP_D, HWCONST(C46F, MEM_OP_D, OPERATION, MMU_TLB_INVALIDATE_TARGETED) |
HWVALUE(C46F, MEM_OP_D, TLB_INVALIDATE_PDB_ADDR_HI, pdb_hi));
// GPU membar still requires an explicit membar method.
if (membar == UVM_MEMBAR_GPU)
gpu->parent->host_hal->membar_gpu(push);
}
void uvm_hal_turing_host_tlb_invalidate_test(uvm_push_t *push,
uvm_gpu_phys_address_t pdb,
UVM_TEST_INVALIDATE_TLB_PARAMS *params)
{
NvU32 ack_value = 0;
NvU32 sysmembar_value = 0;
NvU32 invalidate_gpc_value = 0;
NvU32 aperture_value = 0;
NvU32 pdb_lo = 0;
NvU32 pdb_hi = 0;
NvU32 page_table_level = 0;
UVM_ASSERT_MSG(pdb.aperture == UVM_APERTURE_VID || pdb.aperture == UVM_APERTURE_SYS, "aperture: %u", pdb.aperture);
if (pdb.aperture == UVM_APERTURE_VID)
aperture_value = HWCONST(C46F, MEM_OP_C, TLB_INVALIDATE_PDB_APERTURE, VID_MEM);
else
aperture_value = HWCONST(C46F, MEM_OP_C, TLB_INVALIDATE_PDB_APERTURE, SYS_MEM_COHERENT);
UVM_ASSERT_MSG(IS_ALIGNED(pdb.address, 1 << 12), "pdb 0x%llx\n", pdb.address);
pdb.address >>= 12;
pdb_lo = pdb.address & HWMASK(C46F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO);
pdb_hi = pdb.address >> HWSIZE(C46F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO);
if (params->page_table_level != UvmInvalidatePageTableLevelAll) {
// PDE3 is the highest level on Pascal-Turing, see the comment in
// uvm_pascal_mmu.c for details.
page_table_level = min((NvU32)UvmInvalidatePageTableLevelPde3, params->page_table_level) - 1;
}
if (params->membar != UvmInvalidateTlbMemBarNone) {
// If a GPU or SYS membar is needed, ack_value needs to be set to
// GLOBALLY to make sure all the pending accesses can be picked up by
// the membar.
ack_value = HWCONST(C46F, MEM_OP_C, TLB_INVALIDATE_ACK_TYPE, GLOBALLY);
}
if (params->membar == UvmInvalidateTlbMemBarSys)
sysmembar_value = HWCONST(C46F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, EN);
else
sysmembar_value = HWCONST(C46F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS);
if (params->disable_gpc_invalidate)
invalidate_gpc_value = HWCONST(C46F, MEM_OP_C, TLB_INVALIDATE_GPC, DISABLE);
else
invalidate_gpc_value = HWCONST(C46F, MEM_OP_C, TLB_INVALIDATE_GPC, ENABLE);
if (params->target_va_mode == UvmTargetVaModeTargeted) {
NvU64 va = params->va >> 12;
NvU32 va_lo = va & HWMASK(C46F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO);
NvU32 va_hi = va >> HWSIZE(C46F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO);
NV_PUSH_4U(C46F, MEM_OP_A, sysmembar_value |
HWVALUE(C46F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO, va_lo),
MEM_OP_B, HWVALUE(C46F, MEM_OP_B, TLB_INVALIDATE_TARGET_ADDR_HI, va_hi),
MEM_OP_C, HWCONST(C46F, MEM_OP_C, TLB_INVALIDATE_REPLAY, NONE) |
HWVALUE(C46F, MEM_OP_C, TLB_INVALIDATE_PAGE_TABLE_LEVEL, page_table_level) |
HWCONST(C46F, MEM_OP_C, TLB_INVALIDATE_PDB, ONE) |
HWVALUE(C46F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO, pdb_lo) |
invalidate_gpc_value |
aperture_value |
ack_value,
MEM_OP_D, HWCONST(C46F, MEM_OP_D, OPERATION, MMU_TLB_INVALIDATE_TARGETED) |
HWVALUE(C46F, MEM_OP_D, TLB_INVALIDATE_PDB_ADDR_HI, pdb_hi));
}
else {
NV_PUSH_4U(C46F, MEM_OP_A, sysmembar_value,
MEM_OP_B, 0,
MEM_OP_C, HWCONST(C46F, MEM_OP_C, TLB_INVALIDATE_REPLAY, NONE) |
HWVALUE(C46F, MEM_OP_C, TLB_INVALIDATE_PAGE_TABLE_LEVEL, page_table_level) |
HWCONST(C46F, MEM_OP_C, TLB_INVALIDATE_PDB, ONE) |
HWVALUE(C46F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO, pdb_lo) |
invalidate_gpc_value |
aperture_value |
ack_value,
MEM_OP_D, HWCONST(C46F, MEM_OP_D, OPERATION, MMU_TLB_INVALIDATE) |
HWVALUE(C46F, MEM_OP_D, TLB_INVALIDATE_PDB_ADDR_HI, pdb_hi));
}
// GPU membar still requires an explicit membar method.
if (params->membar == UvmInvalidateTlbMemBarLocal)
uvm_push_get_gpu(push)->parent->host_hal->membar_gpu(push);
}

View File

@ -138,7 +138,7 @@ static NvU64 poisoned_pte_turing(void)
static uvm_mmu_mode_hal_t turing_mmu_mode_hal;
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_turing(NvU32 big_page_size)
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_turing(NvU64 big_page_size)
{
static bool initialized = false;

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2013-2023 NVidia Corporation
Copyright (c) 2013-2024 NVidia Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -52,19 +52,18 @@ typedef enum
typedef unsigned long long UvmStream;
// The maximum number of sub-processors per parent GPU.
#define UVM_PARENT_ID_MAX_SUB_PROCESSORS 8
// The maximum number of GPUs changed when multiple MIG instances per
// uvm_parent_gpu_t were added. See UvmEventQueueCreate().
// uvm_parent_gpu_t were added. The old version is kept as a convenience
// for code that needs to maintain forward compatibility.
#define UVM_MAX_GPUS_V1 NV_MAX_DEVICES
#define UVM_MAX_PROCESSORS_V1 (UVM_MAX_GPUS_V1 + 1)
#define UVM_MAX_GPUS_V2 (NV_MAX_DEVICES * NV_MAX_SUBDEVICES)
#define UVM_MAX_PROCESSORS_V2 (UVM_MAX_GPUS_V2 + 1)
#define UVM_MAX_GPUS (NV_MAX_DEVICES * UVM_PARENT_ID_MAX_SUB_PROCESSORS)
#define UVM_MAX_PROCESSORS (UVM_MAX_GPUS + 1)
// For backward compatibility:
// TODO: Bug 4465348: remove these after replacing old references.
#define UVM_MAX_GPUS UVM_MAX_GPUS_V1
#define UVM_MAX_PROCESSORS UVM_MAX_PROCESSORS_V1
#define UVM_PROCESSOR_MASK_SIZE ((UVM_MAX_PROCESSORS_V2 + (sizeof(NvU64) * 8) - 1) / (sizeof(NvU64) * 8))
#define UVM_PROCESSOR_MASK_SIZE ((UVM_MAX_PROCESSORS + (sizeof(NvU64) * 8) - 1) / (sizeof(NvU64) * 8))
#define UVM_INIT_FLAGS_DISABLE_HMM ((NvU64)0x1)
#define UVM_INIT_FLAGS_MULTI_PROCESS_SHARING_MODE ((NvU64)0x2)
@ -423,29 +422,7 @@ typedef struct
NvU32 pid; // process id causing the fault
NvU32 threadId; // thread id causing the fault
NvU64 pc; // address of the instruction causing the fault
} UvmEventCpuFaultInfo_V1;
typedef struct
{
//
// eventType has to be 1st argument of this structure. Setting eventType to
// UvmEventTypeMemoryViolation helps to identify event data in a queue.
//
NvU8 eventType;
NvU8 accessType; // read/write violation (UvmEventMemoryAccessType)
//
// This structure is shared between UVM kernel and tools.
// Manually padding the structure so that compiler options like pragma pack
// or malign-double will have no effect on the field offsets.
//
NvU16 padding16Bits;
NvS32 nid; // NUMA node ID of faulting CPU
NvU64 address; // faulting address
NvU64 timeStamp; // cpu time when the fault occurred
NvU32 pid; // process id causing the fault
NvU32 threadId; // thread id causing the fault
NvU64 pc; // address of the instruction causing the fault
} UvmEventCpuFaultInfo_V2;
} UvmEventCpuFaultInfo;
typedef enum
{
@ -721,13 +698,7 @@ typedef struct
//
NvU8 eventType;
NvU8 faultType; // type of gpu fault, refer UvmEventFaultType
NvU8 accessType; // memory access type, refer UvmEventMemoryAccessType
//
// This structure is shared between UVM kernel and tools.
// Manually padding the structure so that compiler options like pragma pack
// or malign-double will have no effect on the field offsets
//
NvU8 padding8Bits_1;
NvU16 gpuIndex; // GPU that experienced the fault
union
{
NvU16 gpcId; // If this is a replayable fault, this field contains
@ -759,14 +730,13 @@ typedef struct
// UvmEventFaultClientTypeGpc indicates replayable
// fault, while UvmEventFaultClientTypeHub indicates
// non-replayable fault.
NvU8 accessType; // memory access type, refer UvmEventMemoryAccessType
//
// This structure is shared between UVM kernel and tools.
// Manually padding the structure so that compiler options like pragma pack
// or malign-double will have no effect on the field offsets
//
NvU8 padding8Bits_2;
NvU16 gpuIndex; // GPU that experienced the fault
NvU16 padding16bits;
} UvmEventGpuFaultInfo_V2;
//------------------------------------------------------------------------------
@ -1108,8 +1078,8 @@ typedef struct
// or malign-double will have no effect on the field offsets
//
NvU8 padding8bits;
NvU16 padding16bits[2];
NvU16 processorIndex; // index of the cpu/gpu that was throttled
NvU32 padding32bits;
NvU64 address; // address of the page whose servicing is being
// throttled
NvU64 timeStamp; // cpu start time stamp for the throttling operation
@ -1150,8 +1120,8 @@ typedef struct
// or malign-double will have no effect on the field offsets
//
NvU8 padding8bits;
NvU16 padding16bits[2];
NvU16 processorIndex; // index of the cpu/gpu that was throttled
NvU32 padding32bits;
NvU64 address; // address of the page whose servicing is being
// throttled
NvU64 timeStamp; // cpu end time stamp for the throttling operation
@ -1409,7 +1379,7 @@ typedef struct
NvU8 eventType;
UvmEventMigrationInfo_Lite migration_Lite;
UvmEventCpuFaultInfo_V1 cpuFault;
UvmEventCpuFaultInfo cpuFault;
UvmEventMigrationInfo_V1 migration;
UvmEventGpuFaultInfo_V1 gpuFault;
UvmEventGpuFaultReplayInfo_V1 gpuFaultReplay;
@ -1443,7 +1413,7 @@ typedef struct
NvU8 eventType;
UvmEventMigrationInfo_Lite migration_Lite;
UvmEventCpuFaultInfo_V2 cpuFault;
UvmEventCpuFaultInfo cpuFault;
UvmEventMigrationInfo_V2 migration;
UvmEventGpuFaultInfo_V2 gpuFault;
UvmEventGpuFaultReplayInfo_V2 gpuFaultReplay;
@ -1510,19 +1480,7 @@ typedef enum {
UvmToolsEventQueueVersion_V2 = 2,
} UvmToolsEventQueueVersion;
typedef struct UvmEventControlData_V1_tag {
// entries between get_ahead and get_behind are currently being read
volatile NvU32 get_ahead;
volatile NvU32 get_behind;
// entries between put_ahead and put_behind are currently being written
volatile NvU32 put_ahead;
volatile NvU32 put_behind;
// counter of dropped events
NvU64 dropped[UvmEventNumTypesAll];
} UvmToolsEventControlData_V1;
typedef struct UvmEventControlData_V2_tag {
typedef struct UvmEventControlData_tag {
// entries between get_ahead and get_behind are currently being read
volatile NvU32 get_ahead;
volatile NvU32 get_behind;
@ -1531,19 +1489,12 @@ typedef struct UvmEventControlData_V2_tag {
volatile NvU32 put_ahead;
volatile NvU32 put_behind;
// The version values are limited to UvmToolsEventQueueVersion and
// initialized by UvmToolsCreateEventQueue().
NvU32 version;
NvU32 padding32Bits;
// counter of dropped events
NvU64 dropped[UvmEventNumTypesAll];
} UvmToolsEventControlData_V2;
} UvmToolsEventControlData;
// For backward compatibility:
// TODO: Bug 4465348: remove these after replacing old references.
typedef UvmToolsEventControlData_V1 UvmToolsEventControlData;
typedef UvmEventEntry_V1 UvmEventEntry;
// TODO: Bug 4465348: remove this after replacing old references.
typedef UvmToolsEventControlData UvmToolsEventControlData_V1;
//------------------------------------------------------------------------------
// UVM Tools forward types (handles) definitions

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2023 NVIDIA Corporation
Copyright (c) 2015-2024 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -1328,12 +1328,12 @@ error_block_free:
static void cpu_chunk_remove_sysmem_gpu_mapping(uvm_cpu_chunk_t *chunk, uvm_gpu_t *gpu)
{
NvU64 gpu_mapping_addr = uvm_cpu_chunk_get_parent_gpu_phys_addr(chunk, gpu->parent);
NvU64 gpu_mapping_addr = uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu);
if (gpu_mapping_addr == 0)
return;
uvm_pmm_sysmem_mappings_remove_gpu_mapping(&gpu->pmm_reverse_sysmem_mappings, gpu_mapping_addr);
uvm_cpu_chunk_unmap_parent_gpu_phys(chunk, gpu->parent);
uvm_cpu_chunk_unmap_gpu(chunk, gpu);
}
static NV_STATUS cpu_chunk_add_sysmem_gpu_mapping(uvm_cpu_chunk_t *chunk,
@ -1356,17 +1356,14 @@ static NV_STATUS cpu_chunk_add_sysmem_gpu_mapping(uvm_cpu_chunk_t *chunk,
chunk_size = uvm_cpu_chunk_get_size(chunk);
// TODO: Bug 3744779: Handle benign assertion in
// pmm_sysmem_mappings_remove_gpu_mapping() in case of a
// failure.
status = uvm_pmm_sysmem_mappings_add_gpu_mapping(&gpu->pmm_reverse_sysmem_mappings,
uvm_cpu_chunk_get_parent_gpu_phys_addr(chunk, gpu->parent),
uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu),
uvm_va_block_cpu_page_address(block, page_index),
chunk_size,
block,
UVM_ID_CPU);
if (status != NV_OK)
cpu_chunk_remove_sysmem_gpu_mapping(chunk, gpu);
uvm_cpu_chunk_unmap_gpu(chunk, gpu);
return status;
}
@ -1395,10 +1392,10 @@ static NV_STATUS block_gpu_map_phys_all_cpu_pages(uvm_va_block_t *block, uvm_gpu
for_each_possible_uvm_node(nid) {
for_each_cpu_chunk_in_block(chunk, page_index, block, nid) {
UVM_ASSERT_MSG(uvm_cpu_chunk_get_parent_gpu_phys_addr(chunk, gpu->parent) == 0,
UVM_ASSERT_MSG(uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu) == 0,
"GPU%u DMA address 0x%llx\n",
uvm_id_value(gpu->id),
uvm_cpu_chunk_get_parent_gpu_phys_addr(chunk, gpu->parent));
uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu));
status = cpu_chunk_add_sysmem_gpu_mapping(chunk, block, page_index, gpu);
if (status != NV_OK)
@ -1561,8 +1558,7 @@ NV_STATUS uvm_va_block_gpu_state_alloc(uvm_va_block_t *va_block)
}
void uvm_va_block_unmap_cpu_chunk_on_gpus(uvm_va_block_t *block,
uvm_cpu_chunk_t *chunk,
uvm_page_index_t page_index)
uvm_cpu_chunk_t *chunk)
{
uvm_gpu_id_t id;
@ -1601,7 +1597,7 @@ NV_STATUS uvm_va_block_map_cpu_chunk_on_gpus(uvm_va_block_t *block,
return NV_OK;
error:
uvm_va_block_unmap_cpu_chunk_on_gpus(block, chunk, page_index);
uvm_va_block_unmap_cpu_chunk_on_gpus(block, chunk);
return status;
}
@ -1620,7 +1616,7 @@ void uvm_va_block_remove_cpu_chunks(uvm_va_block_t *va_block, uvm_va_block_regio
uvm_page_mask_region_clear(&va_block->cpu.pte_bits[UVM_PTE_BITS_CPU_WRITE], chunk_region);
uvm_va_block_cpu_clear_resident_region(va_block, nid, chunk_region);
uvm_cpu_chunk_remove_from_block(va_block, nid, page_index);
uvm_va_block_unmap_cpu_chunk_on_gpus(va_block, chunk, page_index);
uvm_va_block_unmap_cpu_chunk_on_gpus(va_block, chunk);
uvm_cpu_chunk_free(chunk);
}
}
@ -2308,7 +2304,7 @@ static bool block_gpu_supports_2m(uvm_va_block_t *block, uvm_gpu_t *gpu)
return uvm_mmu_page_size_supported(&gpu_va_space->page_tables, UVM_PAGE_SIZE_2M);
}
NvU32 uvm_va_block_gpu_big_page_size(uvm_va_block_t *va_block, uvm_gpu_t *gpu)
NvU64 uvm_va_block_gpu_big_page_size(uvm_va_block_t *va_block, uvm_gpu_t *gpu)
{
uvm_gpu_va_space_t *gpu_va_space;
@ -2316,7 +2312,7 @@ NvU32 uvm_va_block_gpu_big_page_size(uvm_va_block_t *va_block, uvm_gpu_t *gpu)
return gpu_va_space->page_tables.big_page_size;
}
static uvm_va_block_region_t range_big_page_region_all(NvU64 start, NvU64 end, NvU32 big_page_size)
static uvm_va_block_region_t range_big_page_region_all(NvU64 start, NvU64 end, NvU64 big_page_size)
{
NvU64 first_addr = UVM_ALIGN_UP(start, big_page_size);
NvU64 outer_addr = UVM_ALIGN_DOWN(end + 1, big_page_size);
@ -2330,20 +2326,20 @@ static uvm_va_block_region_t range_big_page_region_all(NvU64 start, NvU64 end, N
return uvm_va_block_region((first_addr - start) / PAGE_SIZE, (outer_addr - start) / PAGE_SIZE);
}
static size_t range_num_big_pages(NvU64 start, NvU64 end, NvU32 big_page_size)
static size_t range_num_big_pages(NvU64 start, NvU64 end, NvU64 big_page_size)
{
uvm_va_block_region_t region = range_big_page_region_all(start, end, big_page_size);
return (size_t)uvm_div_pow2_64(uvm_va_block_region_size(region), big_page_size);
}
uvm_va_block_region_t uvm_va_block_big_page_region_all(uvm_va_block_t *va_block, NvU32 big_page_size)
uvm_va_block_region_t uvm_va_block_big_page_region_all(uvm_va_block_t *va_block, NvU64 big_page_size)
{
return range_big_page_region_all(va_block->start, va_block->end, big_page_size);
}
uvm_va_block_region_t uvm_va_block_big_page_region_subset(uvm_va_block_t *va_block,
uvm_va_block_region_t region,
NvU32 big_page_size)
NvU64 big_page_size)
{
NvU64 start = uvm_va_block_region_start(va_block, region);
NvU64 end = uvm_va_block_region_end(va_block, region);
@ -2361,12 +2357,12 @@ uvm_va_block_region_t uvm_va_block_big_page_region_subset(uvm_va_block_t *va_blo
return big_region;
}
size_t uvm_va_block_num_big_pages(uvm_va_block_t *va_block, NvU32 big_page_size)
size_t uvm_va_block_num_big_pages(uvm_va_block_t *va_block, NvU64 big_page_size)
{
return range_num_big_pages(va_block->start, va_block->end, big_page_size);
}
NvU64 uvm_va_block_big_page_addr(uvm_va_block_t *va_block, size_t big_page_index, NvU32 big_page_size)
NvU64 uvm_va_block_big_page_addr(uvm_va_block_t *va_block, size_t big_page_index, NvU64 big_page_size)
{
NvU64 addr = UVM_ALIGN_UP(va_block->start, big_page_size) + (big_page_index * big_page_size);
UVM_ASSERT(addr >= va_block->start);
@ -2374,7 +2370,7 @@ NvU64 uvm_va_block_big_page_addr(uvm_va_block_t *va_block, size_t big_page_index
return addr;
}
uvm_va_block_region_t uvm_va_block_big_page_region(uvm_va_block_t *va_block, size_t big_page_index, NvU32 big_page_size)
uvm_va_block_region_t uvm_va_block_big_page_region(uvm_va_block_t *va_block, size_t big_page_index, NvU64 big_page_size)
{
NvU64 page_addr = uvm_va_block_big_page_addr(va_block, big_page_index, big_page_size);
@ -2390,7 +2386,7 @@ uvm_va_block_region_t uvm_va_block_big_page_region(uvm_va_block_t *va_block, siz
// uvm_va_block_gpu_state_t::big_ptes) corresponding to page_index. If
// page_index cannot be covered by a big PTE due to alignment or block size,
// MAX_BIG_PAGES_PER_UVM_VA_BLOCK is returned.
size_t uvm_va_block_big_page_index(uvm_va_block_t *va_block, uvm_page_index_t page_index, NvU32 big_page_size)
size_t uvm_va_block_big_page_index(uvm_va_block_t *va_block, uvm_page_index_t page_index, NvU64 big_page_size)
{
uvm_va_block_region_t big_region_all = uvm_va_block_big_page_region_all(va_block, big_page_size);
size_t big_index;
@ -2415,7 +2411,7 @@ static void uvm_page_mask_init_from_big_ptes(uvm_va_block_t *block,
{
uvm_va_block_region_t big_region;
size_t big_page_index;
NvU32 big_page_size = uvm_va_block_gpu_big_page_size(block, gpu);
NvU64 big_page_size = uvm_va_block_gpu_big_page_size(block, gpu);
uvm_page_mask_zero(mask_out);
@ -2425,7 +2421,7 @@ static void uvm_page_mask_init_from_big_ptes(uvm_va_block_t *block,
}
}
NvU32 uvm_va_block_page_size_cpu(uvm_va_block_t *va_block, uvm_page_index_t page_index)
NvU64 uvm_va_block_page_size_cpu(uvm_va_block_t *va_block, uvm_page_index_t page_index)
{
if (!uvm_page_mask_test(&va_block->cpu.pte_bits[UVM_PTE_BITS_CPU_READ], page_index))
return 0;
@ -2439,7 +2435,7 @@ NvU32 uvm_va_block_page_size_cpu(uvm_va_block_t *va_block, uvm_page_index_t page
return PAGE_SIZE;
}
NvU32 uvm_va_block_page_size_gpu(uvm_va_block_t *va_block, uvm_gpu_id_t gpu_id, uvm_page_index_t page_index)
NvU64 uvm_va_block_page_size_gpu(uvm_va_block_t *va_block, uvm_gpu_id_t gpu_id, uvm_page_index_t page_index)
{
uvm_va_block_gpu_state_t *gpu_state = uvm_va_block_gpu_state_get(va_block, gpu_id);
size_t big_page_size, big_page_index;
@ -2467,7 +2463,7 @@ NvU32 uvm_va_block_page_size_gpu(uvm_va_block_t *va_block, uvm_gpu_id_t gpu_id,
// resident. Note that this is different from uvm_va_block_page_size_* because
// those return the size of the PTE which maps the page index, which may be
// smaller than the physical allocation.
static NvU32 block_phys_page_size(uvm_va_block_t *block, block_phys_page_t page)
static NvU64 block_phys_page_size(uvm_va_block_t *block, block_phys_page_t page)
{
uvm_va_block_gpu_state_t *gpu_state;
uvm_chunk_size_t chunk_size;
@ -2480,7 +2476,7 @@ static NvU32 block_phys_page_size(uvm_va_block_t *block, block_phys_page_t page)
return 0;
UVM_ASSERT(uvm_processor_mask_test(&block->resident, UVM_ID_CPU));
return (NvU32)uvm_cpu_chunk_get_size(chunk);
return uvm_cpu_chunk_get_size(chunk);
}
gpu_state = uvm_va_block_gpu_state_get(block, page.processor);
@ -2489,10 +2485,10 @@ static NvU32 block_phys_page_size(uvm_va_block_t *block, block_phys_page_t page)
UVM_ASSERT(uvm_processor_mask_test(&block->resident, page.processor));
block_gpu_chunk_index(block, block_get_gpu(block, page.processor), page.page_index, &chunk_size);
return (NvU32)chunk_size;
return chunk_size;
}
NvU32 uvm_va_block_get_physical_size(uvm_va_block_t *block,
NvU64 uvm_va_block_get_physical_size(uvm_va_block_t *block,
uvm_processor_id_t processor,
uvm_page_index_t page_index)
{
@ -3344,7 +3340,7 @@ static uvm_gpu_phys_address_t block_phys_page_address(uvm_va_block_t *block,
if (UVM_ID_IS_CPU(block_page.processor)) {
uvm_cpu_chunk_t *chunk = uvm_cpu_chunk_get_chunk_for_page(block, block_page.nid, block_page.page_index);
NvU64 dma_addr = uvm_cpu_chunk_get_parent_gpu_phys_addr(chunk, gpu->parent);
NvU64 dma_addr = uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu);
uvm_va_block_region_t chunk_region = uvm_va_block_chunk_region(block,
uvm_cpu_chunk_get_size(chunk),
block_page.page_index);
@ -5387,7 +5383,7 @@ static bool block_check_gpu_chunks(uvm_va_block_t *block, uvm_gpu_id_t id)
if (chunk) {
if (chunk_size != uvm_gpu_chunk_get_size(chunk)) {
UVM_ERR_PRINT("chunk size mismatch: calc %u, actual %u. VA block [0x%llx, 0x%llx) GPU: %u page_index: %u chunk index: %zu\n",
UVM_ERR_PRINT("chunk size mismatch: calc %u, actual %u. VA block [0x%llx, 0x%llx) GPU: %u page_index: %u chunk index: %lu\n",
chunk_size,
uvm_gpu_chunk_get_size(chunk),
block->start,
@ -5399,7 +5395,7 @@ static bool block_check_gpu_chunks(uvm_va_block_t *block, uvm_gpu_id_t id)
}
if (chunk->state != UVM_PMM_GPU_CHUNK_STATE_ALLOCATED) {
UVM_ERR_PRINT("Invalid chunk state %s. VA block [0x%llx, 0x%llx) GPU: %u page_index: %u chunk index: %zu chunk_size: %u\n",
UVM_ERR_PRINT("Invalid chunk state %s. VA block [0x%llx, 0x%llx) GPU: %u page_index: %u chunk index: %lu chunk_size: llu\n",
uvm_pmm_gpu_chunk_state_string(chunk->state),
block->start,
block->end + 1,
@ -5718,7 +5714,7 @@ static bool block_check_mappings_ptes(uvm_va_block_t *block, uvm_va_block_contex
uvm_pte_bits_gpu_t pte_bit;
uvm_processor_id_t resident_id;
uvm_prot_t prot;
NvU32 big_page_size;
NvU64 big_page_size;
size_t num_big_pages, big_page_index;
uvm_va_block_region_t big_region, chunk_region;
uvm_gpu_chunk_t *chunk;
@ -6170,7 +6166,7 @@ static void block_gpu_pte_big_split_write_4k(uvm_va_block_t *block,
size_t big_page_index;
uvm_processor_id_t curr_resident_id;
uvm_prot_t curr_prot;
NvU32 big_page_size = uvm_va_block_gpu_big_page_size(block, gpu);
NvU64 big_page_size = uvm_va_block_gpu_big_page_size(block, gpu);
if (UVM_ID_IS_INVALID(resident_id))
UVM_ASSERT(new_prot == UVM_PROT_NONE);
@ -6252,7 +6248,7 @@ static void block_gpu_pte_clear_big(uvm_va_block_t *block,
{
uvm_va_block_gpu_state_t *gpu_state = uvm_va_block_gpu_state_get(block, gpu->id);
uvm_gpu_va_space_t *gpu_va_space = uvm_va_block_get_gpu_va_space(block, gpu);
NvU32 big_page_size = gpu_va_space->page_tables.big_page_size;
NvU64 big_page_size = gpu_va_space->page_tables.big_page_size;
uvm_gpu_phys_address_t pte_addr;
NvU32 pte_size = uvm_mmu_pte_size(&gpu_va_space->page_tables, big_page_size);
size_t big_page_index;
@ -6298,7 +6294,7 @@ static void block_gpu_pte_write_big(uvm_va_block_t *block,
uvm_va_block_gpu_state_t *gpu_state = uvm_va_block_gpu_state_get(block, gpu->id);
uvm_gpu_va_space_t *gpu_va_space = uvm_va_block_get_gpu_va_space(block, gpu);
uvm_page_tree_t *tree = &gpu_va_space->page_tables;
NvU32 big_page_size = tree->big_page_size;
NvU64 big_page_size = tree->big_page_size;
NvU32 pte_size = uvm_mmu_pte_size(tree, big_page_size);
size_t big_page_index;
uvm_va_block_region_t contig_region = {0};
@ -6376,7 +6372,7 @@ static void block_gpu_pte_merge_big_and_end(uvm_va_block_t *block,
{
uvm_va_block_gpu_state_t *gpu_state = uvm_va_block_gpu_state_get(block, gpu->id);
uvm_page_tree_t *tree = &uvm_va_block_get_gpu_va_space(block, gpu)->page_tables;
NvU32 big_page_size = tree->big_page_size;
NvU64 big_page_size = tree->big_page_size;
NvU64 unmapped_pte_val = tree->hal->unmapped_pte(big_page_size);
size_t big_page_index;
DECLARE_BITMAP(dummy_big_ptes, MAX_BIG_PAGES_PER_UVM_VA_BLOCK);
@ -6937,7 +6933,7 @@ static void block_gpu_split_big(uvm_va_block_t *block,
uvm_page_tree_t *tree = &uvm_va_block_get_gpu_va_space(block, gpu)->page_tables;
uvm_pte_batch_t *pte_batch = &block_context->mapping.pte_batch;
uvm_tlb_batch_t *tlb_batch = &block_context->mapping.tlb_batch;
NvU32 big_page_size = tree->big_page_size;
NvU64 big_page_size = tree->big_page_size;
uvm_va_block_region_t big_region;
uvm_processor_id_t resident_id;
size_t big_page_index;
@ -7039,7 +7035,7 @@ static void block_gpu_map_big_and_4k(uvm_va_block_t *block,
DECLARE_BITMAP(big_ptes_mask, MAX_BIG_PAGES_PER_UVM_VA_BLOCK);
uvm_va_block_region_t big_region;
size_t big_page_index;
NvU32 big_page_size = tree->big_page_size;
NvU64 big_page_size = tree->big_page_size;
uvm_membar_t tlb_membar = block_pte_op_membar(pte_op, gpu, resident_id);
UVM_ASSERT(!gpu_state->pte_is_2m);
@ -7341,7 +7337,7 @@ static void block_gpu_unmap_big_and_4k(uvm_va_block_t *block,
DECLARE_BITMAP(big_ptes_split, MAX_BIG_PAGES_PER_UVM_VA_BLOCK);
DECLARE_BITMAP(big_ptes_before_or_after, MAX_BIG_PAGES_PER_UVM_VA_BLOCK);
DECLARE_BITMAP(big_ptes_mask, MAX_BIG_PAGES_PER_UVM_VA_BLOCK);
NvU32 big_page_size = tree->big_page_size;
NvU64 big_page_size = tree->big_page_size;
NvU64 unmapped_pte_val = tree->hal->unmapped_pte(big_page_size);
UVM_ASSERT(!gpu_state->pte_is_2m);
@ -7487,7 +7483,7 @@ static void block_gpu_compute_new_pte_state(uvm_va_block_t *block,
{
uvm_va_block_gpu_state_t *gpu_state = uvm_va_block_gpu_state_get(block, gpu->id);
uvm_va_block_region_t big_region_all, big_page_region, region;
NvU32 big_page_size;
NvU64 big_page_size;
uvm_page_index_t page_index;
size_t big_page_index;
DECLARE_BITMAP(big_ptes_not_covered, MAX_BIG_PAGES_PER_UVM_VA_BLOCK);
@ -7640,7 +7636,7 @@ static void block_gpu_compute_new_pte_state(uvm_va_block_t *block,
// happens, the pending tracker is added to the block's tracker.
static NV_STATUS block_alloc_pt_range_with_retry(uvm_va_block_t *va_block,
uvm_gpu_t *gpu,
NvU32 page_size,
NvU64 page_size,
uvm_page_table_range_t *page_table_range,
uvm_tracker_t *pending_tracker)
{
@ -7763,13 +7759,13 @@ allocated:
// sizes. See block_alloc_pt_range_with_retry.
static NV_STATUS block_alloc_ptes_with_retry(uvm_va_block_t *va_block,
uvm_gpu_t *gpu,
NvU32 page_sizes,
NvU64 page_sizes,
uvm_tracker_t *pending_tracker)
{
uvm_va_block_gpu_state_t *gpu_state = uvm_va_block_gpu_state_get(va_block, gpu->id);
uvm_gpu_va_space_t *gpu_va_space = uvm_va_block_get_gpu_va_space(va_block, gpu);
uvm_page_table_range_t *range;
NvU32 page_size;
NvU64 page_size;
NV_STATUS status, final_status = NV_OK;
UVM_ASSERT(gpu_state);
@ -7821,7 +7817,7 @@ static NV_STATUS block_alloc_ptes_new_state(uvm_va_block_t *va_block,
uvm_va_block_new_pte_state_t *new_pte_state,
uvm_tracker_t *pending_tracker)
{
NvU32 page_sizes = 0;
NvU64 page_sizes = 0;
if (new_pte_state->pte_is_2m) {
page_sizes |= UVM_PAGE_SIZE_2M;
@ -7853,8 +7849,8 @@ static NV_STATUS block_pre_populate_pde1_gpu(uvm_va_block_t *block,
uvm_gpu_va_space_t *gpu_va_space,
uvm_tracker_t *pending_tracker)
{
NvU32 page_sizes;
NvU32 big_page_size;
NvU64 page_sizes;
NvU64 big_page_size;
uvm_gpu_t *gpu;
uvm_va_block_gpu_state_t *gpu_state;
@ -9509,7 +9505,6 @@ static void block_kill(uvm_va_block_t *block)
// Free CPU pages
for_each_possible_uvm_node(nid) {
uvm_va_block_cpu_node_state_t *node_state = block_node_state_get(block, nid);
size_t index = node_to_index(nid);
for_each_cpu_chunk_in_block_safe(chunk, page_index, next_page_index, block, nid) {
// be conservative.
@ -9524,9 +9519,20 @@ static void block_kill(uvm_va_block_t *block)
UVM_ASSERT(uvm_page_mask_empty(&node_state->allocated));
UVM_ASSERT(node_state->chunks == 0);
kmem_cache_free(g_uvm_va_block_cpu_node_state_cache, block->cpu.node_state[index]);
}
// While a per-NUMA node_state array is in use, all of its elements are
// expected to be valid. Therefore the teardown of these elements must occur
// as a single "transaction". This teardown must take place after freeing
// the CPU pages (see the "Free CPU pages" loop above). This is because as
// part of removing chunks from VA blocks, the per-page allocated bitmap is
// recomputed using the per-NUMA node_state array elements.
for_each_possible_uvm_node(nid) {
uvm_va_block_cpu_node_state_t *node_state;
node_state = block_node_state_get(block, nid);
kmem_cache_free(g_uvm_va_block_cpu_node_state_cache, node_state);
}
uvm_kvfree((void *)block->cpu.node_state);
block->cpu.node_state = NULL;
@ -9642,8 +9648,8 @@ static NV_STATUS block_split_presplit_ptes_gpu(uvm_va_block_t *existing, uvm_va_
uvm_va_block_gpu_state_t *existing_gpu_state = uvm_va_block_gpu_state_get(existing, gpu->id);
uvm_va_space_t *va_space = uvm_va_block_get_va_space(existing);
uvm_va_block_context_t *block_context = uvm_va_space_block_context(va_space, NULL);
NvU32 big_page_size = uvm_va_block_gpu_big_page_size(existing, gpu);
NvU32 alloc_sizes;
NvU64 big_page_size = uvm_va_block_gpu_big_page_size(existing, gpu);
NvU64 alloc_sizes;
DECLARE_BITMAP(new_big_ptes, MAX_BIG_PAGES_PER_UVM_VA_BLOCK);
uvm_page_index_t new_start_page_index = uvm_va_block_cpu_page_index(existing, new->start);
size_t big_page_index;
@ -9986,7 +9992,7 @@ static NV_STATUS block_split_cpu_chunk_one(uvm_va_block_t *block, uvm_page_index
gpu = block_get_gpu(block, id);
// If the parent chunk has not been mapped, there is nothing to split.
gpu_mapping_addr = uvm_cpu_chunk_get_parent_gpu_phys_addr(chunk, gpu->parent);
gpu_mapping_addr = uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu);
if (gpu_mapping_addr == 0)
continue;
@ -10008,7 +10014,7 @@ static NV_STATUS block_split_cpu_chunk_one(uvm_va_block_t *block, uvm_page_index
merge:
for_each_gpu_id_in_mask(id, gpu_split_mask) {
gpu = block_get_gpu(block, id);
gpu_mapping_addr = uvm_cpu_chunk_get_parent_gpu_phys_addr(chunk, gpu->parent);
gpu_mapping_addr = uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu);
uvm_pmm_sysmem_mappings_merge_gpu_mappings(&gpu->pmm_reverse_sysmem_mappings,
gpu_mapping_addr,
chunk_size);
@ -10194,7 +10200,7 @@ static void block_merge_cpu_chunks_one(uvm_va_block_t *block, uvm_page_index_t p
continue;
gpu = block_get_gpu(block, id);
gpu_mapping_addr = uvm_cpu_chunk_get_parent_gpu_phys_addr(chunk, gpu->parent);
gpu_mapping_addr = uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu);
if (gpu_mapping_addr == 0)
continue;
@ -10646,8 +10652,7 @@ static void block_split_gpu(uvm_va_block_t *existing, uvm_va_block_t *new, uvm_g
for_each_possible_uvm_node(nid) {
for_each_cpu_chunk_in_block(cpu_chunk, page_index, new, nid) {
uvm_pmm_sysmem_mappings_reparent_gpu_mapping(&gpu->pmm_reverse_sysmem_mappings,
uvm_cpu_chunk_get_parent_gpu_phys_addr(cpu_chunk,
gpu->parent),
uvm_cpu_chunk_get_gpu_phys_addr(cpu_chunk, gpu),
new);
}
}
@ -10685,7 +10690,7 @@ static void block_split_gpu(uvm_va_block_t *existing, uvm_va_block_t *new, uvm_g
gpu_va_space = uvm_gpu_va_space_get(va_space, gpu);
if (gpu_va_space) {
if (existing_gpu_state->page_table_range_big.table) {
NvU32 big_page_size = uvm_va_block_gpu_big_page_size(existing, gpu);
NvU64 big_page_size = uvm_va_block_gpu_big_page_size(existing, gpu);
// existing's end has not been adjusted yet
existing_pages_big = range_num_big_pages(existing->start, new->start - 1, big_page_size);
@ -13614,7 +13619,7 @@ NV_STATUS uvm_test_va_residency_info(UVM_TEST_VA_RESIDENCY_INFO_PARAMS *params,
for_each_id_in_mask(id, &block->mapped) {
uvm_processor_id_t processor_to_map;
block_phys_page_t block_page;
NvU32 page_size = uvm_va_block_page_size_processor(block, id, page_index);
NvU64 page_size = uvm_va_block_page_size_processor(block, id, page_index);
int nid = NUMA_NO_NODE;
if (page_size == 0)
@ -13650,7 +13655,7 @@ NV_STATUS uvm_test_va_residency_info(UVM_TEST_VA_RESIDENCY_INFO_PARAMS *params,
if (uvm_processor_mask_test(resident_on_mask, UVM_ID_CPU)) {
if (uvm_pmm_sysmem_mappings_indirect_supported()) {
for_each_gpu_id(id) {
NvU32 page_size = uvm_va_block_page_size_processor(block, id, page_index);
NvU64 page_size = uvm_va_block_page_size_processor(block, id, page_index);
uvm_reverse_map_t sysmem_page;
uvm_cpu_chunk_t *chunk = uvm_cpu_chunk_get_chunk_for_page_resident(block, page_index);
size_t num_pages;
@ -13665,8 +13670,7 @@ NV_STATUS uvm_test_va_residency_info(UVM_TEST_VA_RESIDENCY_INFO_PARAMS *params,
continue;
num_pages = uvm_pmm_sysmem_mappings_dma_to_virt(&gpu->pmm_reverse_sysmem_mappings,
uvm_cpu_chunk_get_parent_gpu_phys_addr(chunk,
gpu->parent),
uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu),
uvm_cpu_chunk_get_size(chunk),
&sysmem_page,
1);

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2023 NVIDIA Corporation
Copyright (c) 2015-2024 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -111,8 +111,6 @@ typedef struct
// Pages that have been evicted to sysmem
uvm_page_mask_t evicted;
NvU64 *cpu_chunks_dma_addrs;
// Array of naturally-aligned chunks. Each chunk has the largest possible
// size which can fit within the block, so they are not uniform size.
//
@ -2155,8 +2153,7 @@ NV_STATUS uvm_va_block_map_cpu_chunk_on_gpus(uvm_va_block_t *va_block,
// Physically unmap a CPU chunk from all registered GPUs.
// Locking: The va_block lock must be held.
void uvm_va_block_unmap_cpu_chunk_on_gpus(uvm_va_block_t *va_block,
uvm_cpu_chunk_t *chunk,
uvm_page_index_t page_index);
uvm_cpu_chunk_t *chunk);
// Remove any CPU chunks in the given region.
// Locking: The va_block lock must be held.
@ -2166,19 +2163,19 @@ void uvm_va_block_remove_cpu_chunks(uvm_va_block_t *va_block, uvm_va_block_regio
// specified processor in the block. Returns 0 if the address is not resident on
// the specified processor.
// Locking: The va_block lock must be held.
NvU32 uvm_va_block_get_physical_size(uvm_va_block_t *block,
NvU64 uvm_va_block_get_physical_size(uvm_va_block_t *block,
uvm_processor_id_t processor,
uvm_page_index_t page_index);
// Get CPU page size or 0 if it is not mapped
NvU32 uvm_va_block_page_size_cpu(uvm_va_block_t *va_block,
NvU64 uvm_va_block_page_size_cpu(uvm_va_block_t *va_block,
uvm_page_index_t page_index);
// Get GPU page size or 0 if it is not mapped on the given GPU
NvU32 uvm_va_block_page_size_gpu(uvm_va_block_t *va_block, uvm_gpu_id_t gpu_id, uvm_page_index_t page_index);
NvU64 uvm_va_block_page_size_gpu(uvm_va_block_t *va_block, uvm_gpu_id_t gpu_id, uvm_page_index_t page_index);
// Get page size or 0 if it is not mapped on the given processor
static NvU32 uvm_va_block_page_size_processor(uvm_va_block_t *va_block,
static NvU64 uvm_va_block_page_size_processor(uvm_va_block_t *va_block,
uvm_processor_id_t processor_id,
uvm_page_index_t page_index)
{
@ -2189,10 +2186,10 @@ static NvU32 uvm_va_block_page_size_processor(uvm_va_block_t *va_block,
}
// Returns the big page size for the GPU VA space of the block
NvU32 uvm_va_block_gpu_big_page_size(uvm_va_block_t *va_block, uvm_gpu_t *gpu);
NvU64 uvm_va_block_gpu_big_page_size(uvm_va_block_t *va_block, uvm_gpu_t *gpu);
// Returns the number of big pages in the VA block for the given size
size_t uvm_va_block_num_big_pages(uvm_va_block_t *va_block, NvU32 big_page_size);
size_t uvm_va_block_num_big_pages(uvm_va_block_t *va_block, NvU64 big_page_size);
// Returns the number of big pages in the VA block for the big page size on the
// given GPU
@ -2202,29 +2199,29 @@ static size_t uvm_va_block_gpu_num_big_pages(uvm_va_block_t *va_block, uvm_gpu_t
}
// Returns the start address of the given big page index and big page size
NvU64 uvm_va_block_big_page_addr(uvm_va_block_t *va_block, size_t big_page_index, NvU32 big_page_size);
NvU64 uvm_va_block_big_page_addr(uvm_va_block_t *va_block, size_t big_page_index, NvU64 big_page_size);
// Returns the region [start, end] of the given big page index and big page size
uvm_va_block_region_t uvm_va_block_big_page_region(uvm_va_block_t *va_block,
size_t big_page_index,
NvU32 big_page_size);
NvU64 big_page_size);
// Returns the largest sub-region region of [start, end] which can fit big
// pages. If the region cannot fit any big pages, an invalid region (0, 0) is
// returned.
uvm_va_block_region_t uvm_va_block_big_page_region_all(uvm_va_block_t *va_block, NvU32 big_page_size);
uvm_va_block_region_t uvm_va_block_big_page_region_all(uvm_va_block_t *va_block, NvU64 big_page_size);
// Returns the largest sub-region region of 'region' which can fit big pages.
// If the region cannot fit any big pages, an invalid region (0, 0) is returned.
uvm_va_block_region_t uvm_va_block_big_page_region_subset(uvm_va_block_t *va_block,
uvm_va_block_region_t region,
NvU32 big_page_size);
NvU64 big_page_size);
// Returns the big page index (the bit index within
// uvm_va_block_gpu_state_t::big_ptes) corresponding to page_index. If
// page_index cannot be covered by a big PTE due to alignment or block size,
// MAX_BIG_PAGES_PER_UVM_VA_BLOCK is returned.
size_t uvm_va_block_big_page_index(uvm_va_block_t *va_block, uvm_page_index_t page_index, NvU32 big_page_size);
size_t uvm_va_block_big_page_index(uvm_va_block_t *va_block, uvm_page_index_t page_index, NvU64 big_page_size);
// Returns the new residency for a page that faulted or triggered access counter
// notifications. The read_duplicate output parameter indicates if the page

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2023 NVIDIA Corporation
Copyright (c) 2015-2024 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -1853,7 +1853,7 @@ NV_STATUS uvm_api_alloc_semaphore_pool(UVM_ALLOC_SEMAPHORE_POOL_PARAMS *params,
if (uvm_api_range_invalid(params->base, params->length))
return NV_ERR_INVALID_ADDRESS;
if (params->gpuAttributesCount > UVM_MAX_GPUS_V2)
if (params->gpuAttributesCount > UVM_MAX_GPUS)
return NV_ERR_INVALID_ARGUMENT;
if (g_uvm_global.conf_computing_enabled && params->gpuAttributesCount == 0)

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2022 NVIDIA Corporation
Copyright (c) 2015-2024 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -188,8 +188,7 @@ typedef struct
// GPU which owns the allocation. For sysmem, this is the GPU that the
// sysmem was originally allocated under. For the allocation to remain valid
// we need to prevent the GPU from going away, similarly to P2P mapped
// memory.
// Similarly for EGM memory.
// memory and to EGM memory.
//
// This field is not used for sparse mappings as they don't have an
// allocation and, hence, owning GPU.
@ -212,6 +211,7 @@ typedef struct
// EGM memory. If true is_sysmem also has to be true and owning_gpu
// has to be valid.
bool is_egm;
// GPU page tables mapping the allocation
uvm_page_table_range_vec_t pt_range_vec;

View File

@ -199,7 +199,7 @@ void uvm_hal_volta_host_tlb_invalidate_va(uvm_push_t *push,
NvU32 depth,
NvU64 base,
NvU64 size,
NvU32 page_size,
NvU64 page_size,
uvm_membar_t membar)
{
NvU32 aperture_value;
@ -216,9 +216,9 @@ void uvm_hal_volta_host_tlb_invalidate_va(uvm_push_t *push,
NvU32 log2_invalidation_size;
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
UVM_ASSERT_MSG(IS_ALIGNED(page_size, 1 << 12), "page_size 0x%x\n", page_size);
UVM_ASSERT_MSG(IS_ALIGNED(base, page_size), "base 0x%llx page_size 0x%x\n", base, page_size);
UVM_ASSERT_MSG(IS_ALIGNED(size, page_size), "size 0x%llx page_size 0x%x\n", size, page_size);
UVM_ASSERT_MSG(IS_ALIGNED(page_size, 1 << 12), "page_size 0x%llx\n", page_size);
UVM_ASSERT_MSG(IS_ALIGNED(base, page_size), "base 0x%llx page_size 0x%llx\n", base, page_size);
UVM_ASSERT_MSG(IS_ALIGNED(size, page_size), "size 0x%llx page_size 0x%llx\n", size, page_size);
UVM_ASSERT_MSG(size > 0, "size 0x%llx\n", size);
// The invalidation size must be a power-of-two number of pages containing

View File

@ -42,7 +42,7 @@ static NvU32 entries_per_index_volta(NvU32 depth)
return 1;
}
static NvLength entry_offset_volta(NvU32 depth, NvU32 page_size)
static NvLength entry_offset_volta(NvU32 depth, NvU64 page_size)
{
UVM_ASSERT(depth < 5);
if (page_size == UVM_PAGE_SIZE_4K && depth == 3)
@ -252,7 +252,7 @@ static NvU64 make_pte_volta(uvm_aperture_t aperture, NvU64 address, uvm_prot_t p
static uvm_mmu_mode_hal_t volta_mmu_mode_hal;
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_volta(NvU32 big_page_size)
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_volta(NvU64 big_page_size)
{
static bool initialized = false;

View File

@ -159,14 +159,7 @@ static int lkca_aead_internal(struct crypto_aead *aead,
}
if (rc != 0) {
if (enc) {
pr_info("aead.c: Encryption failed with error %i\n", rc);
} else {
pr_info("aead.c: Decryption failed with error %i\n", rc);
if (rc == -EBADMSG) {
pr_info("aead.c: Authentication tag mismatch!\n");
}
}
pr_info("Encryption FAILED\n");
}
*data_out_size = data_in_size;

View File

@ -39,7 +39,9 @@
#define RSA_PSS_PADDING_ZEROS_SIZE_BYTE (8)
#define RSA_PSS_TRAILER_FIELD (0xbc)
#define SHIFT_RIGHT_AND_GET_BYTE(val, x) ((val >> x) & 0xFF)
#ifndef BITS_TO_BYTES
#define BITS_TO_BYTES(b) (b >> 3)
#endif
static const unsigned char zeroes[RSA_PSS_PADDING_ZEROS_SIZE_BYTE] = { 0 };

View File

@ -66,6 +66,9 @@ static NvBool battery_present = NV_FALSE;
#define ACPI_VIDEO_CLASS "video"
#endif
/* Maximum size of ACPI _DSM method's 4th argument */
#define NV_MAX_ACPI_DSM_PARAM_SIZE 1024
// Used for NVPCF event handling
static acpi_handle nvpcf_handle = NULL;
static acpi_handle nvpcf_device_handle = NULL;
@ -73,21 +76,6 @@ static nv_acpi_t *nvpcf_nv_acpi_object = NULL;
#define ACPI_NVPCF_EVENT_CHANGE 0xC0
static int nv_acpi_get_device_handle(nv_state_t *nv, acpi_handle *dev_handle)
{
nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
#if defined(DEVICE_ACPI_HANDLE)
*dev_handle = DEVICE_ACPI_HANDLE(nvl->dev);
return NV_TRUE;
#elif defined (ACPI_HANDLE)
*dev_handle = ACPI_HANDLE(nvl->dev);
return NV_TRUE;
#else
return NV_FALSE;
#endif
}
/*
* This callback will be invoked by the acpi_notifier_call_chain()
*/
@ -174,7 +162,7 @@ static void nv_acpi_nvpcf_event(acpi_handle handle, u32 event_type, void *data)
}
else
{
nv_printf(NV_DBG_INFO,"NVRM: %s: NVPCF event 0x%x is not supported\n", event_type, __FUNCTION__);
nv_printf(NV_DBG_INFO,"NVRM: %s: NVPCF event 0x%x is not supported\n", __FUNCTION__, event_type);
}
}
@ -267,11 +255,10 @@ static void nv_acpi_notify_event(acpi_handle handle, u32 event_type, void *data)
void nv_acpi_register_notifier(nv_linux_state_t *nvl)
{
acpi_handle dev_handle = NULL;
acpi_handle dev_handle = ACPI_HANDLE(nvl->dev);
/* Install the ACPI notifier corresponding to dGPU ACPI device. */
if ((nvl->nv_acpi_object == NULL) &&
nv_acpi_get_device_handle(NV_STATE_PTR(nvl), &dev_handle) &&
(dev_handle != NULL))
{
nvl->nv_acpi_object = nv_install_notifier(dev_handle, nv_acpi_notify_event, nvl);
@ -657,64 +644,36 @@ static NV_STATUS nv_acpi_nvif_method(
return NV_OK;
}
#define MAX_INPUT_PARAM_SIZE 1024
/*
* This function executes a _DSM ACPI method.
*/
NV_STATUS NV_API_CALL nv_acpi_dsm_method(
nv_state_t *nv,
NvU8 *pAcpiDsmGuid,
NvU32 acpiDsmRev,
NvBool acpiNvpcfDsmFunction,
NvU32 acpiDsmSubFunction,
void *pInParams,
NvU16 inParamSize,
NvU32 *outStatus,
void *pOutData,
NvU16 *pSize
static NV_STATUS nv_acpi_evaluate_dsm_method(
acpi_handle dev_handle,
NvU8 *pathname,
NvU8 *pAcpiDsmGuid,
NvU32 acpiDsmRev,
NvU32 acpiDsmSubFunction,
void *arg3,
NvU16 arg3Size,
NvBool bArg3Integer,
NvU32 *outStatus,
void *pOutData,
NvU16 *pSize
)
{
NV_STATUS status = NV_ERR_OPERATING_SYSTEM;
acpi_status acpi_status;
NV_STATUS rmStatus = NV_OK;
acpi_status status;
struct acpi_object_list input;
union acpi_object *dsm = NULL;
struct acpi_buffer output = { ACPI_ALLOCATE_BUFFER, NULL };
union acpi_object dsm_params[4];
NvU8 *argument3 = NULL;
NvU32 data_size;
acpi_handle dev_handle = NULL;
if (!nv_acpi_get_device_handle(nv, &dev_handle))
return NV_ERR_NOT_SUPPORTED;
if (!dev_handle)
return NV_ERR_INVALID_ARGUMENT;
if ((!pInParams) || (inParamSize > MAX_INPUT_PARAM_SIZE) || (!pOutData) || (!pSize))
{
nv_printf(NV_DBG_INFO,
"NVRM: %s: invalid argument(s)!\n", __FUNCTION__);
return NV_ERR_INVALID_ARGUMENT;
}
if (!NV_MAY_SLEEP())
{
#if defined(DEBUG)
nv_printf(NV_DBG_INFO,
"NVRM: %s: invalid argument(s)!\n", __FUNCTION__);
nv_printf(NV_DBG_ERRORS, "NVRM: %s: invalid context!\n", __FUNCTION__);
#endif
return NV_ERR_NOT_SUPPORTED;
}
status = os_alloc_mem((void **)&argument3, inParamSize);
if (status != NV_OK)
return status;
//
// dsm_params[0].buffer.pointer and dsm_params[1].integer.value set in
// switch below based on acpiDsmFunction
//
dsm_params[0].buffer.type = ACPI_TYPE_BUFFER;
dsm_params[0].buffer.length = 0x10;
dsm_params[0].buffer.pointer = pAcpiDsmGuid;
@ -725,35 +684,28 @@ NV_STATUS NV_API_CALL nv_acpi_dsm_method(
dsm_params[2].integer.type = ACPI_TYPE_INTEGER;
dsm_params[2].integer.value = acpiDsmSubFunction;
dsm_params[3].buffer.type = ACPI_TYPE_BUFFER;
dsm_params[3].buffer.length = inParamSize;
memcpy(argument3, pInParams, dsm_params[3].buffer.length);
dsm_params[3].buffer.pointer = argument3;
if (bArg3Integer)
{
dsm_params[3].integer.type = ACPI_TYPE_INTEGER;
dsm_params[3].integer.value = *((NvU32 *)arg3);
}
else
{
dsm_params[3].buffer.type = ACPI_TYPE_BUFFER;
dsm_params[3].buffer.length = arg3Size;
dsm_params[3].buffer.pointer = arg3;
}
// parameters for dsm calls (GUID, rev, subfunction, data)
input.count = 4;
input.pointer = dsm_params;
if (acpiNvpcfDsmFunction)
{
//
// acpi_evaluate_object() can operate with either valid object pathname or
// valid object handle. For NVPCF DSM function, use valid pathname as we do
// not have device handle for NVPCF device
//
dev_handle = NULL;
acpi_status = acpi_evaluate_object(dev_handle, "\\_SB.NPCF._DSM", &input, &output);
}
else
{
acpi_status = acpi_evaluate_object(dev_handle, "_DSM", &input, &output);
}
if (ACPI_FAILURE(acpi_status))
status = acpi_evaluate_object(dev_handle, pathname, &input, &output);
if (ACPI_FAILURE(status))
{
nv_printf(NV_DBG_INFO,
"NVRM: %s: failed to evaluate _DSM method!\n", __FUNCTION__);
goto exit;
return NV_ERR_OPERATING_SYSTEM;
}
dsm = output.pointer;
@ -767,20 +719,80 @@ NV_STATUS NV_API_CALL nv_acpi_dsm_method(
dsm->buffer.pointer[0];
}
status = nv_acpi_extract_object(dsm, pOutData, *pSize, &data_size);
rmStatus = nv_acpi_extract_object(dsm, pOutData, *pSize, &data_size);
*pSize = data_size;
kfree(output.pointer);
}
if (status != NV_OK)
else
{
*pSize = 0;
}
if (rmStatus != NV_OK)
{
nv_printf(NV_DBG_ERRORS,
"NVRM: %s: DSM data invalid!\n", __FUNCTION__);
}
exit:
return rmStatus;
}
/*
* This function executes a _DSM ACPI method.
*/
NV_STATUS NV_API_CALL nv_acpi_dsm_method(
nv_state_t *nv,
NvU8 *pAcpiDsmGuid,
NvU32 acpiDsmRev,
NvBool acpiNvpcfDsmFunction,
NvU32 acpiDsmSubFunction,
void *pInParams,
NvU16 inParamSize,
NvU32 *outStatus,
void *pOutData,
NvU16 *pSize
)
{
NV_STATUS rmStatus = NV_ERR_OPERATING_SYSTEM;
NvU8 *argument3 = NULL;
nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
acpi_handle dev_handle = ACPI_HANDLE(nvl->dev);
NvU8 *pathname = "_DSM";
if (!dev_handle)
return NV_ERR_INVALID_ARGUMENT;
if ((!pInParams) || (inParamSize > NV_MAX_ACPI_DSM_PARAM_SIZE) || (!pOutData) || (!pSize))
{
nv_printf(NV_DBG_INFO,
"NVRM: %s: invalid argument(s)!\n", __FUNCTION__);
return NV_ERR_INVALID_ARGUMENT;
}
rmStatus = os_alloc_mem((void **)&argument3, inParamSize);
if (rmStatus != NV_OK)
return rmStatus;
memcpy(argument3, pInParams, inParamSize);
if (acpiNvpcfDsmFunction)
{
//
// acpi_evaluate_object() can operate with either valid object pathname or
// valid object handle. For NVPCF DSM function, use valid pathname as we do
// not have device handle for NVPCF device
//
dev_handle = NULL;
pathname = "\\_SB.NPCF._DSM";
}
rmStatus = nv_acpi_evaluate_dsm_method(dev_handle, pathname, pAcpiDsmGuid, acpiDsmRev,
acpiDsmSubFunction, argument3, inParamSize,
NV_FALSE, NULL, pOutData, pSize);
os_free_mem(argument3);
return status;
return rmStatus;
}
/*
@ -796,13 +808,11 @@ NV_STATUS NV_API_CALL nv_acpi_ddc_method(
acpi_status status;
union acpi_object *ddc = NULL;
NvU32 i, largestEdidSize;
acpi_handle dev_handle = NULL;
nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
acpi_handle dev_handle = ACPI_HANDLE(nvl->dev);
acpi_handle lcd_dev_handle = NULL;
acpi_handle handle = NULL;
if (!nv_acpi_get_device_handle(nv, &dev_handle))
return NV_ERR_NOT_SUPPORTED;
if (!dev_handle)
return NV_ERR_INVALID_ARGUMENT;
@ -836,7 +846,7 @@ NV_STATUS NV_API_CALL nv_acpi_ddc_method(
case 0x0400:
case 0xA420:
lcd_dev_handle = handle;
nv_printf(NV_DBG_INFO, "NVRM: %s Found LCD: %x\n",
nv_printf(NV_DBG_INFO, "NVRM: %s Found LCD: %llx\n",
__FUNCTION__, device_id);
break;
default:
@ -915,12 +925,10 @@ NV_STATUS NV_API_CALL nv_acpi_rom_method(
union acpi_object *rom;
union acpi_object rom_arg[2];
struct acpi_object_list input = { 2, rom_arg };
acpi_handle dev_handle = NULL;
nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
acpi_handle dev_handle = ACPI_HANDLE(nvl->dev);
uint32_t offset, length;
if (!nv_acpi_get_device_handle(nv, &dev_handle))
return NV_ERR_NOT_SUPPORTED;
if (!dev_handle)
return NV_ERR_INVALID_ARGUMENT;
@ -982,12 +990,10 @@ NV_STATUS NV_API_CALL nv_acpi_dod_method(
acpi_status status;
struct acpi_buffer output = { ACPI_ALLOCATE_BUFFER, NULL };
union acpi_object *dod;
acpi_handle dev_handle = NULL;
nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
acpi_handle dev_handle = ACPI_HANDLE(nvl->dev);
NvU32 i, count = (*pSize / sizeof(NvU32));
if (!nv_acpi_get_device_handle(nv, &dev_handle))
return NV_ERR_NOT_SUPPORTED;
if (!dev_handle)
return NV_ERR_INVALID_ARGUMENT;
@ -1129,17 +1135,11 @@ NvBool nv_acpi_power_resource_method_present(
struct pci_dev *pdev
)
{
acpi_handle handle = NULL;
acpi_handle handle = ACPI_HANDLE(&pdev->dev);
struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER, NULL };
union acpi_object *object_package, *object_reference;
acpi_status status;
#if defined(DEVICE_ACPI_HANDLE)
handle = DEVICE_ACPI_HANDLE(&pdev->dev);
#elif defined (ACPI_HANDLE)
handle = ACPI_HANDLE(&pdev->dev);
#endif
if (!handle)
return NV_FALSE;
@ -1198,7 +1198,8 @@ NV_STATUS NV_API_CALL nv_acpi_mux_method(
union acpi_object *mux = NULL;
union acpi_object mux_arg = { ACPI_TYPE_INTEGER };
struct acpi_object_list input = { 1, &mux_arg };
acpi_handle dev_handle = NULL;
nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
acpi_handle dev_handle = ACPI_HANDLE(nvl->dev);
acpi_handle mux_dev_handle = NULL;
acpi_handle handle = NULL;
unsigned long long device_id = 0;
@ -1216,9 +1217,6 @@ NV_STATUS NV_API_CALL nv_acpi_mux_method(
__FUNCTION__, pMethodName);
}
if (!nv_acpi_get_device_handle(nv, &dev_handle))
return NV_ERR_NOT_SUPPORTED;
if (!dev_handle)
return NV_ERR_INVALID_ARGUMENT;
@ -1384,6 +1382,34 @@ NvBool NV_API_CALL nv_acpi_is_battery_present(void)
return NV_FALSE;
}
NV_STATUS NV_API_CALL nv_acpi_d3cold_dsm_for_upstream_port(
nv_state_t *nv,
NvU8 *pAcpiDsmGuid,
NvU32 acpiDsmRev,
NvU32 acpiDsmSubFunction,
NvU32 *data
)
{
NV_STATUS rmStatus = NV_ERR_OPERATING_SYSTEM;
nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
acpi_handle dev_handle = ACPI_HANDLE(nvl->dev->parent);
NvU32 outData = 0;
NvU16 outDatasize = sizeof(NvU32);
NvU16 inParamSize = sizeof(NvU32);
if (!dev_handle)
return NV_ERR_INVALID_ARGUMENT;
rmStatus = nv_acpi_evaluate_dsm_method(dev_handle, "_DSM", pAcpiDsmGuid, acpiDsmRev,
acpiDsmSubFunction, data, inParamSize, NV_TRUE,
NULL, &outData, &outDatasize);
if (rmStatus == NV_OK)
*data = outData;
return rmStatus;
}
#else // NV_LINUX_ACPI_EVENTS_SUPPORTED
void NV_API_CALL nv_acpi_methods_init(NvU32 *handlePresent)
@ -1426,6 +1452,17 @@ NV_STATUS NV_API_CALL nv_acpi_dsm_method(
return NV_ERR_NOT_SUPPORTED;
}
NV_STATUS NV_API_CALL nv_acpi_d3cold_dsm_for_upstream_port(
nv_state_t *nv,
NvU8 *pAcpiDsmGuid,
NvU32 acpiDsmRev,
NvU32 acpiDsmSubFunction,
NvU32 *data
)
{
return NV_ERR_NOT_SUPPORTED;
}
NV_STATUS NV_API_CALL nv_acpi_ddc_method(
nv_state_t *nv,
void *pEdidBuffer,

View File

@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@ -24,6 +24,7 @@
#include "nv-linux.h"
extern int NVreg_ImexChannelCount;
extern int NVreg_CreateImexChannel0;
static int nv_caps_imex_open(struct inode *inode, struct file *file)
{
@ -104,6 +105,10 @@ int NV_API_CALL nv_caps_imex_init(void)
if (NVreg_ImexChannelCount == 0)
{
nv_printf(NV_DBG_INFO, "nv-caps-imex is disabled.\n");
// Disable channel creation as well
NVreg_CreateImexChannel0 = 0;
return 0;
}

View File

@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2019-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2019-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@ -26,6 +26,8 @@
#include "nv-procfs.h"
#include "nv-hash.h"
#include "nvmisc.h"
extern int NVreg_ModifyDeviceFiles;
/* sys_close() or __close_fd() */
@ -49,7 +51,7 @@ typedef struct nv_cap_table_entry
struct hlist_node hlist;
} nv_cap_table_entry_t;
#define NV_CAP_NUM_ENTRIES(_table) (sizeof(_table) / sizeof(_table[0]))
#define NV_CAP_NUM_ENTRIES(_table) (NV_ARRAY_ELEMENTS(_table))
static nv_cap_table_entry_t g_nv_cap_nvlink_table[] =
{
@ -361,18 +363,28 @@ static ssize_t nv_cap_procfs_write(struct file *file,
nv_cap_file_private_t *private = NULL;
unsigned long bytes_left;
char *proc_buffer;
int status;
status = nv_down_read_interruptible(&nv_system_pm_lock);
if (status < 0)
{
nv_printf(NV_DBG_ERRORS, "nv-caps: failed to lock the nv_system_pm_lock!\n");
return status;
}
private = ((struct seq_file *)file->private_data)->private;
bytes_left = (sizeof(private->buffer) - private->offset - 1);
if (count == 0)
{
return -EINVAL;
count = -EINVAL;
goto done;
}
if ((bytes_left == 0) || (count > bytes_left))
{
return -ENOSPC;
count = -ENOSPC;
goto done;
}
proc_buffer = &private->buffer[private->offset];
@ -380,7 +392,8 @@ static ssize_t nv_cap_procfs_write(struct file *file,
if (copy_from_user(proc_buffer, buffer, count))
{
nv_printf(NV_DBG_ERRORS, "nv-caps: failed to copy in proc data!\n");
return -EFAULT;
count = -EFAULT;
goto done;
}
private->offset += count;
@ -388,17 +401,28 @@ static ssize_t nv_cap_procfs_write(struct file *file,
*pos = private->offset;
done:
up_read(&nv_system_pm_lock);
return count;
}
static int nv_cap_procfs_read(struct seq_file *s, void *v)
{
int status;
nv_cap_file_private_t *private = s->private;
status = nv_down_read_interruptible(&nv_system_pm_lock);
if (status < 0)
{
return status;
}
seq_printf(s, "%s: %d\n", "DeviceFileMinor", private->minor);
seq_printf(s, "%s: %d\n", "DeviceFileMode", private->permissions);
seq_printf(s, "%s: %d\n", "DeviceFileModify", private->modify);
up_read(&nv_system_pm_lock);
return 0;
}
@ -423,14 +447,6 @@ static int nv_cap_procfs_open(struct inode *inode, struct file *file)
if (rc < 0)
{
NV_KFREE(private, sizeof(nv_cap_file_private_t));
return rc;
}
rc = nv_down_read_interruptible(&nv_system_pm_lock);
if (rc < 0)
{
single_release(inode, file);
NV_KFREE(private, sizeof(nv_cap_file_private_t));
}
return rc;
@ -449,8 +465,6 @@ static int nv_cap_procfs_release(struct inode *inode, struct file *file)
private = s->private;
}
up_read(&nv_system_pm_lock);
single_release(inode, file);
if (private != NULL)

View File

@ -28,12 +28,21 @@
* teardown.
*/
#define NV_MEM_LOGGER_STACK_TRACE 0
#if defined(NV_STACK_TRACE_PRESENT) && defined(NV_MEM_LOGGER) && defined(DEBUG)
#define NV_MEM_LOGGER_STACK_TRACE 1
#endif
typedef struct {
struct rb_node rb_node;
void *addr;
NvU64 size;
NvU32 line;
const char *file;
#if NV_MEM_LOGGER_STACK_TRACE == 1
unsigned long stack_trace[32];
#endif
} nv_memdbg_node_t;
struct
@ -117,6 +126,12 @@ void nv_memdbg_add(void *addr, NvU64 size, const char *file, int line)
node->size = size;
node->file = file;
node->line = line;
#if NV_MEM_LOGGER_STACK_TRACE == 1
memset(node->stack_trace, '\0', sizeof(node->stack_trace));
stack_trace_save(node->stack_trace, NV_ARRAY_ELEMENTS(node->stack_trace), 0);
#endif
}
NV_SPIN_LOCK_IRQSAVE(&g_nv_memdbg.lock, flags);
@ -209,6 +224,10 @@ void nv_memdbg_exit(void)
node->size, node->addr);
}
#if NV_MEM_LOGGER_STACK_TRACE == 1
stack_trace_print(node->stack_trace, NV_ARRAY_ELEMENTS(node->stack_trace), 1);
#endif
rb_erase(&node->rb_node, &g_nv_memdbg.rb_root);
kfree(node);
}

View File

@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2020-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2020-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@ -62,7 +62,7 @@ nvidia_nano_timer_callback(
nv_linux_state_t *nvl = nv_nstimer->nv_linux_state;
nvidia_stack_t *sp = NULL;
if (nv_kmem_cache_alloc_stack(&sp) != 0)
if (nv_kmem_cache_alloc_stack_atomic(&sp) != 0)
{
nv_printf(NV_DBG_ERRORS, "NVRM: no cache memory \n");
return;
@ -189,12 +189,6 @@ void NV_API_CALL nv_start_nano_timer(
NvU32 time_us;
time_us = (NvU32)(time_ns / 1000);
if (time_us == 0)
{
nv_printf(NV_DBG_WARNINGS, "NVRM: Timer value cannot be less than 1 usec.\n");
}
time_jiffies = usecs_to_jiffies(time_us);
mod_timer(&nv_nstimer->jiffy_timer, jiffies + time_jiffies);
#endif

View File

@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2011-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2011-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@ -31,6 +31,8 @@
#include "nv-p2p.h"
#include "rmp2pdefines.h"
#include "nvmisc.h"
typedef enum nv_p2p_page_table_type {
NV_P2P_PAGE_TABLE_TYPE_NON_PERSISTENT = 0,
NV_P2P_PAGE_TABLE_TYPE_PERSISTENT,
@ -50,6 +52,7 @@ typedef struct nv_p2p_mem_info {
struct semaphore lock;
} dma_mapping_list;
void *private;
void *mig_info;
} nv_p2p_mem_info_t;
// declared and created in nv.c
@ -73,7 +76,7 @@ static struct nvidia_status_mapping {
};
#define NVIDIA_STATUS_MAPPINGS \
(sizeof(nvidia_status_mappings) / sizeof(struct nvidia_status_mapping))
NV_ARRAY_ELEMENTS(nvidia_status_mappings)
static int nvidia_p2p_map_status(NV_STATUS status)
{
@ -314,7 +317,7 @@ static NV_STATUS nv_p2p_put_pages(
* callback which can free it unlike non-persistent page_table.
*/
mem_info = container_of(*page_table, nv_p2p_mem_info_t, page_table);
status = rm_p2p_put_pages_persistent(sp, mem_info->private, *page_table);
status = rm_p2p_put_pages_persistent(sp, mem_info->private, *page_table, mem_info->mig_info);
}
else
{
@ -412,6 +415,17 @@ static int nv_p2p_get_pages(
NvU8 uuid[NVIDIA_P2P_GPU_UUID_LEN] = {0};
int rc;
if (!NV_IS_ALIGNED64(virtual_address, NVRM_P2P_PAGESIZE_BIG_64K) ||
!NV_IS_ALIGNED64(length, NVRM_P2P_PAGESIZE_BIG_64K))
{
nv_printf(NV_DBG_ERRORS,
"NVRM: Invalid argument in nv_p2p_get_pages,"
"address or length are not aligned "
"address=0x%llx, length=0x%llx\n",
virtual_address, length);
return -EINVAL;
}
rc = nv_kmem_cache_alloc_stack(&sp);
if (rc != 0)
{
@ -495,7 +509,7 @@ static int nv_p2p_get_pages(
status = rm_p2p_get_pages_persistent(sp, virtual_address, length,
&mem_info->private,
physical_addresses, &entries,
*page_table, gpu_info);
*page_table, gpu_info, &mem_info->mig_info);
if (status != NV_OK)
{
goto failed;

View File

@ -37,6 +37,10 @@
#include <linux/kernfs.h>
#endif
#if !defined(NV_BUS_TYPE_HAS_IOMMU_OPS)
#include <linux/iommu.h>
#endif
static void
nv_check_and_exclude_gpu(
nvidia_stack_t *sp,
@ -324,7 +328,7 @@ static NvU32 find_gpu_numa_nodes_in_srat(nv_linux_state_t *nvl)
gi = (struct acpi_srat_generic_affinity *) subtable_header;
gi_dbdf = *((NvU16 *)(&gi->device_handle[0])) << 16 |
*((NvU16 *)(&gi->device_handle[2]));
if (gi_dbdf == dev_dbdf) {
numa_node = pxm_to_node(gi->proximity_domain);
if (numa_node < MAX_NUMNODES) {
@ -349,7 +353,6 @@ exit:
acpi_put_table(table_header);
return pxm_count;
}
#endif
static void
@ -375,6 +378,7 @@ nv_init_coherent_link_info
return;
gi_found = find_gpu_numa_nodes_in_srat(nvl);
if (!gi_found &&
(device_property_read_u64(nvl->dev, "nvidia,gpu-mem-pxm-start", &pxm_start) != 0 ||
device_property_read_u64(nvl->dev, "nvidia,gpu-mem-pxm-count", &pxm_count) != 0))
@ -530,35 +534,20 @@ nv_pci_probe
if (pci_dev->is_virtfn)
{
#if defined(NV_VGPU_KVM_BUILD)
nvl = pci_get_drvdata(pci_dev->physfn);
if (!nvl)
#if defined(NV_BUS_TYPE_HAS_IOMMU_OPS)
if (pci_dev->dev.bus->iommu_ops == NULL)
#else
if ((pci_dev->dev.iommu != NULL) && (pci_dev->dev.iommu->iommu_dev != NULL) &&
(pci_dev->dev.iommu->iommu_dev->ops == NULL))
#endif
{
nv_printf(NV_DBG_ERRORS, "NVRM: Aborting probe for VF %04x:%02x:%02x.%x "
"since PF is not bound to nvidia driver.\n",
"since IOMMU is not present on the system.\n",
NV_PCI_DOMAIN_NUMBER(pci_dev), NV_PCI_BUS_NUMBER(pci_dev),
NV_PCI_SLOT_NUMBER(pci_dev), PCI_FUNC(pci_dev->devfn));
goto failed;
}
if (pci_dev->dev.bus->iommu_ops == NULL)
{
nv = NV_STATE_PTR(nvl);
if (rm_is_iommu_needed_for_sriov(sp, nv))
{
nv_printf(NV_DBG_ERRORS, "NVRM: Aborting probe for VF %04x:%02x:%02x.%x "
"since IOMMU is not present on the system.\n",
NV_PCI_DOMAIN_NUMBER(pci_dev), NV_PCI_BUS_NUMBER(pci_dev),
NV_PCI_SLOT_NUMBER(pci_dev), PCI_FUNC(pci_dev->devfn));
goto failed;
}
}
if (nvidia_vgpu_vfio_probe(pci_dev) != NV_OK)
{
nv_printf(NV_DBG_ERRORS, "NVRM: Failed to register device to vGPU VFIO module");
goto failed;
}
nv_kmem_cache_free_stack(sp);
return 0;
#else
@ -687,8 +676,8 @@ next_bar:
// Invalid 32 or 64-bit BAR.
nv_printf(NV_DBG_ERRORS,
"NVRM: This PCI I/O region assigned to your NVIDIA device is invalid:\n"
"NVRM: BAR%d is %dM @ 0x%llx (PCI:%04x:%02x:%02x.%x)\n", i,
(NV_PCI_RESOURCE_SIZE(pci_dev, i) >> 20),
"NVRM: BAR%d is %" NvU64_fmtu "M @ 0x%" NvU64_fmtx " (PCI:%04x:%02x:%02x.%x)\n", i,
(NvU64)(NV_PCI_RESOURCE_SIZE(pci_dev, i) >> 20),
(NvU64)NV_PCI_RESOURCE_START(pci_dev, i),
NV_PCI_DOMAIN_NUMBER(pci_dev), NV_PCI_BUS_NUMBER(pci_dev),
NV_PCI_SLOT_NUMBER(pci_dev), PCI_FUNC(pci_dev->devfn));
@ -708,10 +697,10 @@ next_bar:
nv_device_name))
{
nv_printf(NV_DBG_ERRORS,
"NVRM: request_mem_region failed for %dM @ 0x%llx. This can\n"
"NVRM: request_mem_region failed for %" NvU64_fmtu "M @ 0x%" NvU64_fmtx ". This can\n"
"NVRM: occur when a driver such as rivatv is loaded and claims\n"
"NVRM: ownership of the device's registers.\n",
(NV_PCI_RESOURCE_SIZE(pci_dev, regs_bar_index) >> 20),
(NvU64)(NV_PCI_RESOURCE_SIZE(pci_dev, regs_bar_index) >> 20),
(NvU64)NV_PCI_RESOURCE_START(pci_dev, regs_bar_index));
goto failed;
}

View File

@ -197,28 +197,25 @@ nv_procfs_read_power(
{
nv_state_t *nv = s->private;
nvidia_stack_t *sp = NULL;
const char *vidmem_power_status;
const char *dynamic_power_status;
const char *gc6_support;
const char *gcoff_support;
nv_power_info_t power_info;
if (nv_kmem_cache_alloc_stack(&sp) != 0)
{
return 0;
}
dynamic_power_status = rm_get_dynamic_power_management_status(sp, nv);
seq_printf(s, "Runtime D3 status: %s\n", dynamic_power_status);
vidmem_power_status = rm_get_vidmem_power_status(sp, nv);
seq_printf(s, "Video Memory: %s\n\n", vidmem_power_status);
rm_get_power_info(sp, nv, &power_info);
seq_printf(s, "Runtime D3 status: %s\n", power_info.dynamic_power_status);
seq_printf(s, "Video Memory: %s\n\n", power_info.vidmem_power_status);
seq_printf(s, "GPU Hardware Support:\n");
gc6_support = rm_get_gpu_gcx_support(sp, nv, NV_TRUE);
seq_printf(s, " Video Memory Self Refresh: %s\n", gc6_support);
seq_printf(s, " Video Memory Self Refresh: %s\n", power_info.gc6_support);
seq_printf(s, " Video Memory Off: %s\n\n", power_info.gcoff_support);
gcoff_support = rm_get_gpu_gcx_support(sp, nv, NV_FALSE);
seq_printf(s, " Video Memory Off: %s\n", gcoff_support);
seq_printf(s, "S0ix Power Management:\n");
seq_printf(s, " Platform Support: %s\n",
nv_platform_supports_s0ix() ? "Supported" : "Not Supported");
seq_printf(s, " Status: %s\n", power_info.s0ix_status);
nv_kmem_cache_free_stack(sp);
return 0;

View File

@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2006-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2006-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@ -869,6 +869,8 @@
* NVreg_ModifyDeviceFiles, NVreg_DeviceFileGID, NVreg_DeviceFileUID
* and NVreg_DeviceFileMode will be honored by nvidia-modprobe.
*
* Also, refer to the NVreg_CreateImexChannel0 option.
*
* Possible values:
* 0 - Disable IMEX using CUDA driver's fabric handles.
* N - N IMEX channels will be enabled in the driver to facilitate N
@ -878,6 +880,29 @@
#define __NV_IMEX_CHANNEL_COUNT ImexChannelCount
#define NV_REG_IMEX_CHANNEL_COUNT NV_REG_STRING(__NV_IMEX_CHANNEL_COUNT)
/*
* Option: NVreg_CreateImexChannel0
*
* Description:
*
* This option allows users to specify whether the NVIDIA driver must create
* the IMEX channel 0 by default. The channel will be created automatically
* when an application (e.g. nvidia-smi, nvidia-persistenced) is run.
*
* Note that users are advised to enable this option only in trusted
* environments where it is acceptable for applications to share the same
* IMEX channel.
*
* For more details on IMEX channels, refer to the NVreg_ImexChannelCount
* option.
*
* Possible values:
* 0 - Do not create IMEX channel 0 (default).
* 1 - Create IMEX channel 0.
*/
#define __NV_CREATE_IMEX_CHANNEL_0 CreateImexChannel0
#define NV_CREATE_IMEX_CHANNEL_0 NV_REG_STRING(__CREATE_IMEX_CHANNEL_0)
#if defined(NV_DEFINE_REGISTRY_KEY_TABLE)
/*
@ -927,6 +952,7 @@ NV_DEFINE_REG_STRING_ENTRY(__NV_EXCLUDED_GPUS, NULL);
NV_DEFINE_REG_ENTRY(__NV_DMA_REMAP_PEER_MMIO, NV_DMA_REMAP_PEER_MMIO_ENABLE);
NV_DEFINE_REG_STRING_ENTRY(__NV_RM_NVLINK_BW, NULL);
NV_DEFINE_REG_ENTRY_GLOBAL(__NV_IMEX_CHANNEL_COUNT, 2048);
NV_DEFINE_REG_ENTRY_GLOBAL(__NV_CREATE_IMEX_CHANNEL_0, 0);
/*
*----------------registry database definition----------------------
@ -974,6 +1000,7 @@ nv_parm_t nv_parms[] = {
NV_DEFINE_PARAMS_TABLE_ENTRY(__NV_OPENRM_ENABLE_UNSUPPORTED_GPUS),
NV_DEFINE_PARAMS_TABLE_ENTRY(__NV_DMA_REMAP_PEER_MMIO),
NV_DEFINE_PARAMS_TABLE_ENTRY(__NV_IMEX_CHANNEL_COUNT),
NV_DEFINE_PARAMS_TABLE_ENTRY(__NV_CREATE_IMEX_CHANNEL_0),
{NULL, NULL}
};

View File

@ -514,7 +514,6 @@ NV_STATUS nv_alloc_system_pages(
struct device *dev = at->dev;
dma_addr_t bus_addr;
// Order should be zero except for EGM allocations.
unsigned int alloc_page_size = PAGE_SIZE << at->order;
unsigned int alloc_num_pages = NV_CEIL(at->num_pages * PAGE_SIZE, alloc_page_size);
@ -523,7 +522,7 @@ NV_STATUS nv_alloc_system_pages(
unsigned int os_pages_in_page = alloc_page_size / PAGE_SIZE;
nv_printf(NV_DBG_MEMINFO,
"NVRM: VM: %u: %u order0 pages, %u order\n", __FUNCTION__, at->num_pages, at->order);
"NVRM: VM: %s: %u order0 pages, %u order\n", __FUNCTION__, at->num_pages, at->order);
gfp_mask = nv_compute_gfp_mask(nv, at);
@ -641,7 +640,6 @@ void nv_free_system_pages(
unsigned int i;
struct device *dev = at->dev;
// Order should be zero except for EGM allocations.
unsigned int alloc_page_size = PAGE_SIZE << at->order;
unsigned int os_pages_in_page = alloc_page_size / PAGE_SIZE;

View File

@ -29,7 +29,7 @@
NvU64 NV_API_CALL nv_get_kern_phys_address(NvU64 address)
{
/* direct-mapped kernel address */
if (virt_addr_valid(address))
if (virt_addr_valid((void *)address))
return __pa(address);
nv_printf(NV_DBG_ERRORS,

View File

@ -3131,6 +3131,7 @@ NV_STATUS NV_API_CALL
nv_alias_pages(
nv_state_t *nv,
NvU32 page_cnt,
NvU64 page_size,
NvU32 contiguous,
NvU32 cache_type,
NvU64 guest_id,
@ -3152,7 +3153,14 @@ nv_alias_pages(
at->cache_type = cache_type;
if (contiguous)
{
at->flags.contig = NV_TRUE;
at->order = get_order(at->num_pages * PAGE_SIZE);
}
else
{
at->order = get_order(page_size);
}
#if defined(NVCPU_AARCH64)
if (at->cache_type != NV_MEMORY_CACHED)
at->flags.aliased = NV_TRUE;
@ -3160,8 +3168,6 @@ nv_alias_pages(
at->flags.guest = NV_TRUE;
at->order = get_order(at->num_pages * PAGE_SIZE);
for (i=0; i < at->num_pages; ++i)
{
page_ptr = at->page_table[i];
@ -3271,7 +3277,7 @@ NV_STATUS NV_API_CALL nv_register_user_pages(
nv_linux_state_t *nvl;
nvidia_pte_t *page_ptr;
nv_printf(NV_DBG_MEMINFO, "NVRM: VM: nv_register_user_pages: 0x%x\n", page_count);
nv_printf(NV_DBG_MEMINFO, "NVRM: VM: nv_register_user_pages: 0x%" NvU64_fmtx"\n", page_count);
user_pages = *priv_data;
nvl = NV_GET_NVL_FROM_NV_STATE(nv);
@ -3332,7 +3338,7 @@ void NV_API_CALL nv_unregister_user_pages(
{
nv_alloc_t *at = *priv_data;
nv_printf(NV_DBG_MEMINFO, "NVRM: VM: nv_unregister_user_pages: 0x%x\n", page_count);
nv_printf(NV_DBG_MEMINFO, "NVRM: VM: nv_unregister_user_pages: 0x%" NvU64_fmtx "\n", page_count);
NV_PRINT_AT(NV_DBG_MEMINFO, at);
@ -6133,7 +6139,10 @@ void NV_API_CALL nv_get_screen_info(
{
NvU64 physAddr = screen_info.lfb_base;
#if defined(VIDEO_CAPABILITY_64BIT_BASE)
physAddr |= (NvU64)screen_info.ext_lfb_base << 32;
if (screen_info.capabilities & VIDEO_CAPABILITY_64BIT_BASE)
{
physAddr |= (NvU64)screen_info.ext_lfb_base << 32;
}
#endif
/* Make sure base address is mapped to GPU BAR */

View File

@ -285,12 +285,15 @@ NV_STATUS nvGpuOpsFlushReplayableFaultBuffer(gpuFaultInfo *pFaultInfo,
NV_STATUS nvGpuOpsTogglePrefetchFaults(gpuFaultInfo *pFaultInfo,
NvBool bEnable);
// Interface used for CCSL
NV_STATUS nvGpuOpsKeyRotationChannelDisable(struct gpuChannel *channelList[],
NvU32 channelListCount);
// Interface used for CCSL
NV_STATUS nvGpuOpsCcslContextInit(struct ccslContext_t **ctx,
gpuChannelHandle channel);
NV_STATUS nvGpuOpsCcslContextClear(struct ccslContext_t *ctx);
NV_STATUS nvGpuOpsCcslContextUpdate(struct ccslContext_t *ctx);
NV_STATUS nvGpuOpsCcslContextUpdate(UvmCslContext *contextList[],
NvU32 contextListCount);
NV_STATUS nvGpuOpsCcslRotateIv(struct ccslContext_t *ctx,
NvU8 direction);
NV_STATUS nvGpuOpsCcslEncrypt(struct ccslContext_t *ctx,

View File

@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2013-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2013-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@ -1478,6 +1478,15 @@ NV_STATUS nvUvmInterfacePagingChannelPushStream(UvmGpuPagingChannelHandle channe
}
EXPORT_SYMBOL(nvUvmInterfacePagingChannelPushStream);
NV_STATUS nvUvmInterfaceKeyRotationChannelDisable(uvmGpuChannelHandle channelList[],
NvU32 channeListCount)
{
nvidia_stack_t *sp = nvUvmGetSafeStack();
return rm_gpu_ops_key_rotation_channel_disable(sp, ((gpuChannelHandle *)channelList), channeListCount);
}
EXPORT_SYMBOL(nvUvmInterfaceKeyRotationChannelDisable);
NV_STATUS nvUvmInterfaceCslInitContext(UvmCslContext *uvmCslContext,
uvmGpuChannelHandle channel)
{
@ -1516,12 +1525,13 @@ void nvUvmInterfaceDeinitCslContext(UvmCslContext *uvmCslContext)
}
EXPORT_SYMBOL(nvUvmInterfaceDeinitCslContext);
NV_STATUS nvUvmInterfaceCslUpdateContext(UvmCslContext *uvmCslContext)
NV_STATUS nvUvmInterfaceCslUpdateContext(UvmCslContext *contextList[],
NvU32 contextListCount)
{
NV_STATUS status;
nvidia_stack_t *sp = uvmCslContext->nvidia_stack;
nvidia_stack_t *sp = contextList[0]->nvidia_stack;
status = rm_gpu_ops_ccsl_context_update(sp, uvmCslContext->ctx);
status = rm_gpu_ops_ccsl_context_update(sp, contextList, contextListCount);
return status;
}

View File

@ -195,6 +195,7 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += devm_clk_bulk_get_all
NV_CONFTEST_FUNCTION_COMPILE_TESTS += get_task_ioprio
NV_CONFTEST_FUNCTION_COMPILE_TESTS += mdev_set_iommu_device
NV_CONFTEST_FUNCTION_COMPILE_TESTS += offline_and_remove_memory
NV_CONFTEST_FUNCTION_COMPILE_TESTS += stack_trace
NV_CONFTEST_FUNCTION_COMPILE_TESTS += crypto_tfm_ctx_aligned
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_gpl_of_node_to_nid
@ -227,6 +228,8 @@ NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_tsec_comms_clear_in
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_tsec_comms_alloc_mem_from_gscco
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_tsec_comms_free_gscco_mem
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_memory_block_size_bytes
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_tegra_platform_is_fpga
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_tegra_platform_is_sim
NV_CONFTEST_SYMBOL_COMPILE_TESTS += crypto
NV_CONFTEST_TYPE_COMPILE_TESTS += dma_ops
@ -251,6 +254,7 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += pci_driver_has_driver_managed_dma
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_area_struct_has_const_vm_flags
NV_CONFTEST_TYPE_COMPILE_TESTS += memory_failure_has_trapno_arg
NV_CONFTEST_TYPE_COMPILE_TESTS += foll_longterm_present
NV_CONFTEST_TYPE_COMPILE_TESTS += bus_type_has_iommu_ops
NV_CONFTEST_GENERIC_COMPILE_TESTS += dom0_kernel_present
NV_CONFTEST_GENERIC_COMPILE_TESTS += nvidia_vgpu_kvm_build

View File

@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 1993-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@ -464,6 +464,9 @@ namespace DisplayPort
virtual bool getStreamStatusChanged() = 0;
virtual void clearStreamStatusChanged() =0;
virtual bool getDpTunnelingIrq() = 0;
virtual void clearDpTunnelingIrq() = 0;
virtual void setDirtyLinkStatus(bool dirty) = 0;
virtual void refreshLinkStatus() = 0;
virtual bool isLinkStatusValid(unsigned lanes) = 0;
@ -529,6 +532,15 @@ namespace DisplayPort
virtual bool readPsrEvtIndicator(vesaPsrEventIndicator *psrErr) = 0;
virtual bool readPrSinkDebugInfo(panelReplaySinkDebugInfo *prDbgInfo) = 0;
virtual bool getDpTunnelBwAllocationSupported() = 0;
virtual bool getDpTunnelEstimatedBw(NvU8 &estimatedBw) = 0;
virtual bool getDpTunnelGranularityMultiplier(NvU8 &granularityMultiplier) = 0;
virtual TriState getDpTunnelBwRequestStatus() = 0;
virtual bool setDpTunnelBwAllocation(bool bEnable) = 0;
virtual bool hasDpTunnelEstimatedBwChanged() = 0;
virtual bool hasDpTunnelBwAllocationCapabilityChanged() = 0;
virtual bool writeDpTunnelRequestedBw(NvU8 requestedBw) = 0;
virtual ~DPCDHAL() {}
};
@ -536,7 +548,876 @@ namespace DisplayPort
//
// Implement interface
//
DPCDHAL * MakeDPCDHAL(AuxBus * bus, Timer * timer);
DPCDHAL * MakeDPCDHAL(AuxBus * bus, Timer * timer, MainLink * main);
struct DPCDHALImpl : DPCDHAL
{
AuxRetry bus;
Timer * timer;
bool dpcdOffline;
bool bGrantsPostLtRequest;
bool pc2Disabled;
bool uprequestEnable;
bool upstreamIsSource;
bool bMultistream;
bool bGpuFECSupported;
bool bLttprSupported;
bool bBypassILREdpRevCheck;
NvU32 overrideDpcdMaxLinkRate;
NvU32 overrideDpcdRev;
NvU32 overrideDpcdMaxLaneCount;
NvU32 gpuDPSupportedVersions;
struct _LegacyPort: public LegacyPort
{
DwnStreamPortType type;
DwnStreamPortAttribute nonEDID;
NvU64 maxTmdsClkRate;
DwnStreamPortType getDownstreamPortType()
{
return type;
}
DwnStreamPortAttribute getDownstreamNonEDIDPortAttribute()
{
return nonEDID;
}
NvU64 getMaxTmdsClkRate()
{
return maxTmdsClkRate;
}
} legacyPort[16];
struct
{
unsigned revisionMajor, revisionMinor; // DPCD offset 0
bool supportsESI;
LinkRate maxLinkRate; // DPCD offset 1
unsigned maxLaneCount; // DPCD offset 2
unsigned maxLanesAtHBR;
unsigned maxLanesAtRBR;
bool enhancedFraming;
bool bPostLtAdjustmentSupport;
bool supportsNoHandshakeTraining;
bool bSupportsTPS4;
unsigned NORP; // DPCD offset 4
bool detailedCapInfo; // DPCD offset 5
bool downStreamPortPresent;
NvU8 downStreamPortType;
unsigned downStreamPortCount; // DPCD offset 7
bool ouiSupported;
bool msaTimingParIgnored;
NvU16 linkRateTable[NV_DPCD_SUPPORTED_LINK_RATES__SIZE]; // DPCD offset 10 ~ 1F
bool supportsMultistream; // DPCD offset 21
unsigned numberAudioEndpoints; // DPCD offset 22
bool overrideToSST; // force to SST even if MST capable
bool noLinkTraining; // DPCD offset 330h
bool extendedRxCapsPresent; // DPCD offset 000Eh [7] - Extended Receiver Capability present
// DPCD Offset 2211h;
unsigned extendedSleepWakeTimeoutRequestMs;
// DPCD Offset 0119h [0] - If we grant the extendedSleepWakeTimeoutRequest
bool bExtendedSleepWakeTimeoutGranted;
bool bFECSupported;
// DPCD Offset F0002h - Number of Physical Repeaters present (after mapping) between Source and Sink
unsigned phyRepeaterCount;
// DPCD offset 700 - EDP_DPCD_REV
unsigned eDpRevision;
struct
{
unsigned revisionMajor, revisionMinor; // DPCD offset F0000h
LinkRate maxLinkRate; // DPCD offset F0001h
unsigned maxLaneCount; // DPCD offset F0004h
unsigned phyRepeaterExtendedWakeTimeoutMs; // DPCD offset F0005h
// The array to keep track of FEC capability of each LTTPR
bool bFECSupportedRepeater[NV_DPCD14_PHY_REPEATER_CNT_MAX];
// If all the LTTPRs supports FEC
bool bFECSupported;
} repeaterCaps;
struct
{
bool bIsSupported;
bool bUsb4DriverSupport;
bool bIsPanelReplayOptimizationSupported;
bool bIsBwAllocationSupported;
NvU8 maxLaneCount;
LinkRate maxLinkRate;
} dpInTunnelingCaps;
PCONCaps pconCaps;
vesaPsrSinkCaps psrCaps;
NvU32 videoFallbackFormats; // DPCD offset 0200h
} caps;
bool bIsDpTunnelBwAllocationEnabled;
struct
{
unsigned sinkCount; // DPCD offset 200
bool automatedTestRequest;
bool cpIRQ;
bool mccsIRQ;
bool downRepMsgRdy;
bool upReqMsgRdy;
bool prErrorStatus; // DPCD offset 2004h[3]
bool rxCapChanged; // DPCD offset 2005
bool linkStatusChanged; // DPCD offset 2005
bool streamStatusChanged; // DPCD offset 2005
bool hdmiLinkStatusChanged; // DPCD offset 2005
bool dpTunnelingIrq; // DPCD offset 2005
NvU8 eightyBitCustomPat[10]; // DPCD offset 250 - 259
struct
{
struct
{
bool clockRecoveryDone;
bool channelEqualizationDone;
bool symbolLocked;
} laneStatus[4]; // DPCD offset 202, 203
bool interlaneAlignDone; // DPCD offset 204
bool downstmPortChng;
bool linkStatusUpdated;
//
// (ESI specific) signifies that we have link trained and should
// update the link status in the next query to isLinkLost. Keep in
// mind that linkStatusChanged might still be zero.
//
bool linkStatusDirtied;
} laneStatusIntr;
struct
{
bool testRequestTraining; // DPCD offset 218
LinkRate testRequestLinkRate; // DPCD offset 219
unsigned testRequestLaneCount; // DPCD offset 220
} testTraining;
struct
{
bool testRequestEdidRead; // DPCD offset 218
} testEdid;
struct
{
bool testRequestPattern; // DPCD offset 218
TestPatternType testPatRequested; // DPCD offset 221
NvU16 testHorTotalPixels; // DPCD offset 222, 223
NvU16 testVerTotalLines; // DPCD offset 224, 225
NvU16 testHorStartPixels; // DPCD offset 226, 227
NvU16 testVerStartLines; // DPCD offset 228, 229
NvU16 testHsyncWidthPixels; // DPCD offset 22A, 22B
bool testHsyncPolarity;
NvU16 testVsyncWidthLines; // DPCD offset 22C, 22D
bool testVsyncPolarity;
NvU16 testActiveWidthPixels; // DPCD offset 22E, 22F
NvU16 testActiveHeightLines; // DPCD offset 230, 231
} testPattern;
struct
{
bool testRequestPhyCompliance; // DPCD offset 218
LinkQualityPatternType phyTestPattern; // DPCD offset 248
} testPhyCompliance;
} interrupts;
bool bIndexedLinkrateCapable, bIndexedLinkrateEnabled;
public:
DPCDHALImpl(AuxBus * bus, Timer * timer)
: bus(bus), timer(timer), bGrantsPostLtRequest(false), uprequestEnable(false),
upstreamIsSource(false), bMultistream(false), bGpuFECSupported(false),
bBypassILREdpRevCheck(false), overrideDpcdMaxLinkRate(0),
overrideDpcdRev(0), gpuDPSupportedVersions(0), bIsDpTunnelBwAllocationEnabled(false)
{
// start with default caps.
dpcdOffline = true;
//
// fill out the bare minimum caps required ...
// this should be extended in for more dpcd offsets in future.
//
caps.revisionMajor = 0x1;
caps.revisionMinor = 0x1;
caps.supportsESI = false;
caps.maxLinkRate = HBR3;
caps.maxLaneCount = 4;
caps.enhancedFraming = true;
caps.downStreamPortPresent = true;
caps.downStreamPortCount = 1;
// populate the sinkcount interrupt
interrupts.sinkCount = 1;
}
~DPCDHALImpl()
{
}
virtual void setAuxBus(AuxBus * bus)
{
this->bus = bus;
}
bool isDpcdOffline()
{
return dpcdOffline;
}
void setDPCDOffline(bool bOffline)
{
dpcdOffline = bOffline;
}
void updateDPCDOffline();
void setPC2Disabled(bool disabled)
{
pc2Disabled = disabled;
}
void setLttprSupported(bool isLttprSupported)
{
bLttprSupported = isLttprSupported;
}
bool isPC2Disabled()
{
return pc2Disabled;
}
virtual void parseAndReadCaps();
virtual PCONCaps * getPCONCaps()
{
return &(caps.pconCaps);
}
// DPCD offset 0
virtual unsigned getRevisionMajor()
{
return caps.revisionMajor;
}
virtual unsigned getRevisionMinor()
{
return caps.revisionMinor;
}
// DPCD offset F0000h
virtual unsigned lttprGetRevisionMajor()
{
return caps.repeaterCaps.revisionMajor;
}
virtual unsigned lttprGetRevisionMinor()
{
return caps.repeaterCaps.revisionMinor;
}
virtual LinkRate getMaxLinkRate();
// DPCD offset 2
virtual unsigned getMaxLaneCount();
virtual bool getNoLinkTraining()
{
return caps.noLinkTraining;
}
virtual unsigned getPhyRepeaterCount()
{
return caps.phyRepeaterCount;
}
// Max lanes supported at the desired link rate.
virtual unsigned getMaxLaneCountSupportedAtLinkRate(LinkRate linkRate);
virtual bool getEnhancedFraming()
{
return caps.enhancedFraming;
}
// DPCD offset 5
virtual bool getDownstreamPort(NvU8 *portType)
{
*portType = caps.downStreamPortType;
return caps.downStreamPortPresent;
}
virtual bool getSupportsNoHandshakeTraining()
{
return caps.supportsNoHandshakeTraining;
}
// DPCD offset 7
virtual unsigned getLegacyPortCount()
{
return caps.downStreamPortCount;
}
virtual LegacyPort * getLegacyPort(unsigned index)
{
return &legacyPort[index];
}
virtual bool getMsaTimingparIgnored()
{
return caps.msaTimingParIgnored;
}
virtual bool getOuiSupported()
{
return caps.ouiSupported;
}
virtual bool getSDPExtnForColorimetry();
virtual bool getRootAsyncSDPSupported();
virtual AuxRetry::status setOuiSource(unsigned ouiId, const char * model,
size_t modelNameLength, NvU8 chipRevision);
virtual bool getOuiSource(unsigned &ouiId, char * modelName,
size_t modelNameBufferSize, NvU8 & chipRevision);
virtual bool getOuiSink(unsigned &ouiId, char * modelName,
size_t modelNameBufferSize, NvU8 & chipRevision);
// DPCD offset 21h
virtual bool getSupportsMultistream()
{
return caps.supportsMultistream && (!caps.overrideToSST);
}
virtual void setSupportsESI(bool bIsESISupported)
{
caps.supportsESI = bIsESISupported;
}
//
// Single stream specific caps
// DPCD offset 22h
//
virtual unsigned getNumberOfAudioEndpoints();
// DPCD offset 30h
virtual bool getGUID(GUID & guid);
virtual AuxRetry::status setGUID(GUID & guid);
void parsePortDescriptors();
//
// Notifications of external events
//
virtual void notifyIRQ()
{
parseAndReadInterrupts();
}
virtual void populateFakeDpcd();
// DPCD override routine: Max link rate override.
void overrideMaxLinkRate(NvU32 overrideMaxLinkRate);
// DPCD override routine: Max lane count override.
void overrideMaxLaneCount(NvU32 maxLaneCount)
{
caps.maxLaneCount = maxLaneCount;
overrideDpcdMaxLaneCount = maxLaneCount;
}
// DPCD override routine: Max lane count override at a given link rate.
void skipCableBWCheck(NvU32 maxLaneAtHighRate, NvU32 maxLaneAtLowRate)
{
caps.maxLanesAtHBR = maxLaneAtHighRate;
caps.maxLanesAtRBR = maxLaneAtLowRate;
}
// DPCD override routine: Optimal link config (link rate and lane count) override.
void overrideOptimalLinkCfg(LinkRate optimalLinkRate,
NvU32 optimalLaneCount)
{
caps.maxLinkRate = optimalLinkRate;
caps.maxLaneCount = optimalLaneCount;
}
// DPCD override routine: Optimal link rate
void overrideOptimalLinkRate(LinkRate optimalLinkRate)
{
caps.maxLinkRate = optimalLinkRate;
}
virtual void notifyHPD(bool status, bool bSkipDPCDRead);
virtual bool isPostLtAdjustRequestSupported()
{
//
// If the upstream DPTX and downstream DPRX both support TPS4,
// TPS4 shall be used instead of POST_LT_ADJ_REQ.
//
NvBool bTps4Supported = FLD_TEST_DRF(0073_CTRL_CMD_DP, _GET_CAPS_DP_VERSIONS_SUPPORTED,
_DP1_4, _YES, gpuDPSupportedVersions) &&
caps.bSupportsTPS4;
return bGrantsPostLtRequest && !bTps4Supported;
}
virtual void setPostLtAdjustRequestGranted(bool bGrantPostLtRequest);
virtual bool getIsPostLtAdjRequestInProgress();
virtual TrainingPatternSelectType getTrainingPatternSelect();
virtual bool setTrainingMultiLaneSet(NvU8 numLanes,
NvU8 *voltSwingSet,
NvU8 *preEmphasisSet);
virtual AuxRetry::status setIgnoreMSATimingParamters(bool msaTimingParamIgnoreEn);
virtual AuxRetry::status setLinkQualPatternSet(LinkQualityPatternType linkQualPattern, unsigned laneCount);
virtual AuxRetry::status setLinkQualLaneSet(unsigned lane, LinkQualityPatternType linkQualPattern);
virtual AuxRetry::status setMessagingEnable(bool _uprequestEnable, bool _upstreamIsSource);
virtual AuxRetry::status setMultistreamLink(bool enable);
virtual AuxRetry::status setMultistreamHotplugMode(MultistreamHotplugMode notifyType);
bool parseTestRequestTraining(NvU8 * buffer /* 0x18-0x28 valid */);
void parseAutomatedTestRequest(bool testRequestPending);
virtual bool parseTestRequestPhy();
virtual bool interruptCapabilitiesChanged()
{
return interrupts.rxCapChanged;
}
virtual void clearInterruptCapabilitiesChanged()
{
NvU8 irqVector = 0;
irqVector = FLD_SET_DRF(_DPCD, _LINK_SERVICE_IRQ_VECTOR_ESI0, _RX_CAP_CHANGED, _YES, irqVector);
bus.write(NV_DPCD_LINK_SERVICE_IRQ_VECTOR_ESI0, &irqVector, sizeof irqVector);
}
virtual bool isPanelReplayErrorSet()
{
return interrupts.prErrorStatus;
}
virtual void readPanelReplayError();
virtual void clearPanelReplayError()
{
NvU8 irqVector = 0U;
irqVector = FLD_SET_DRF(_DPCD, _DEVICE_SERVICE_IRQ_VECTOR_ESI1,
_PANEL_REPLAY_ERROR_STATUS, _YES, irqVector);
bus.write(NV_DPCD_DEVICE_SERVICE_IRQ_VECTOR_ESI1, &irqVector,
sizeof irqVector);
}
virtual bool getLinkStatusChanged()
{
return interrupts.linkStatusChanged;
}
virtual void clearLinkStatusChanged()
{
NvU8 irqVector = 0;
irqVector = FLD_SET_DRF(_DPCD, _LINK_SERVICE_IRQ_VECTOR_ESI0, _LINK_STATUS_CHANGED, _YES, irqVector);
bus.write(NV_DPCD_LINK_SERVICE_IRQ_VECTOR_ESI0, &irqVector, sizeof irqVector);
}
virtual bool getHdmiLinkStatusChanged()
{
return interrupts.hdmiLinkStatusChanged;
}
virtual void clearHdmiLinkStatusChanged()
{
NvU8 irqVector = 0;
irqVector = FLD_SET_DRF(_DPCD, _LINK_SERVICE_IRQ_VECTOR_ESI0, _HDMI_LINK_STATUS_CHANGED, _YES, irqVector);
bus.write(NV_DPCD_LINK_SERVICE_IRQ_VECTOR_ESI0, &irqVector, sizeof irqVector);
}
virtual bool getStreamStatusChanged()
{
return interrupts.streamStatusChanged;
}
virtual void clearStreamStatusChanged()
{
NvU8 irqVector = 0;
irqVector = FLD_SET_DRF(_DPCD, _LINK_SERVICE_IRQ_VECTOR_ESI0, _STREAM_STATUS_CHANGED, _YES, irqVector);
bus.write(NV_DPCD_LINK_SERVICE_IRQ_VECTOR_ESI0, &irqVector, sizeof irqVector);
}
virtual bool getDpTunnelingIrq()
{
return interrupts.dpTunnelingIrq;
}
virtual void clearDpTunnelingIrq()
{
NvU8 irqVector = 0;
irqVector = FLD_SET_DRF(_DPCD20, _LINK_SERVICE_IRQ_VECTOR_ESI0, _DP_TUNNELING_IRQ, _YES, irqVector);
bus.write(NV_DPCD20_LINK_SERVICE_IRQ_VECTOR_ESI0, &irqVector, sizeof irqVector);
}
virtual bool isLinkStatusValid(unsigned lanes);
virtual void refreshLinkStatus();
virtual void setDirtyLinkStatus(bool dirty)
{
interrupts.laneStatusIntr.linkStatusDirtied = dirty;
}
void parseAndReadInterruptsESI();
void readLTTPRLinkStatus(NvS32 rxIndex, NvU8 *buffer);
void resetIntrLaneStatus();
void fetchLinkStatusESI();
void fetchLinkStatusLegacy();
virtual bool readTraining(NvU8* voltageSwingLane, NvU8* preemphasisLane,
NvU8* trainingScoreLane, NvU8* postCursor,
NvU8 activeLaneCount);
virtual bool isLaneSettingsChanged(NvU8* oldVoltageSwingLane,
NvU8* newVoltageSwingLane,
NvU8* oldPreemphasisLane,
NvU8* newPreemphasisLane,
NvU8 activeLaneCount);
void parseAndReadInterruptsLegacy();
void parseAndReadInterrupts()
{
if (caps.supportsESI)
parseAndReadInterruptsESI(); // DP 1.2 should use the new ESI region
else
parseAndReadInterruptsLegacy();
}
virtual int getSinkCount() // DPCD offset 200
{
return interrupts.sinkCount;
}
//
// This was introduced as part of WAR for HP SDC Panel since their
// TCON sets DPCD 0x200 SINK_COUNT=0. It should never be called to
// set the SinkCount in other cases since SinkCount comes from DPCD.
//
virtual void setSinkCount(int sinkCount)
{
interrupts.sinkCount = sinkCount;
}
virtual bool interruptContentProtection()
{
return interrupts.cpIRQ;
}
virtual void clearInterruptContentProtection();
virtual bool intteruptMCCS()
{
return interrupts.mccsIRQ;
}
virtual void clearInterruptMCCS();
virtual bool interruptDownReplyReady()
{
return interrupts.downRepMsgRdy;
}
virtual bool interruptUpRequestReady()
{
return interrupts.upReqMsgRdy;
}
virtual void clearInterruptDownReplyReady();
virtual void clearInterruptUpRequestReady();
virtual bool getLaneStatusSymbolLock(int lane)
{
return interrupts.laneStatusIntr.laneStatus[lane].symbolLocked;
}
virtual bool getLaneStatusClockRecoveryDone(int lane)
{
return interrupts.laneStatusIntr.laneStatus[lane].clockRecoveryDone;
}
virtual bool getInterlaneAlignDone() // DPCD offset 204
{
return interrupts.laneStatusIntr.interlaneAlignDone;
}
virtual bool getDownStreamPortStatusChange()
{
return interrupts.laneStatusIntr.downstmPortChng;
}
virtual bool getPendingTestRequestTraining() // DPCD offset 218
{
return interrupts.testTraining.testRequestTraining;
}
virtual bool getPendingAutomatedTestRequest()
{
return interrupts.automatedTestRequest;
}
virtual bool getPendingTestRequestEdidRead()
{
return interrupts.testEdid.testRequestEdidRead;
}
virtual bool getPendingTestRequestPhyCompliance()
{
return interrupts.testPhyCompliance.testRequestPhyCompliance;
}
virtual void getTestRequestTraining(LinkRate & rate, unsigned & lanes) // DPCD offset 219, 220
{
rate = interrupts.testTraining.testRequestLinkRate;
lanes = interrupts.testTraining.testRequestLaneCount;
}
virtual LinkQualityPatternType getPhyTestPattern() // DPCD offset 248
{
return interrupts.testPhyCompliance.phyTestPattern;
}
virtual void getCustomTestPattern(NvU8 *testPattern) // DPCD offset 250 - 259
{
int i;
for (i = 0; i < 10; i++)
{
testPattern[i] = interrupts.eightyBitCustomPat[i];
}
}
virtual bool getBKSV(NvU8 *bKSV);
virtual bool getBCaps(BCaps &bCaps, NvU8 * rawByte);
virtual bool getHdcp22BCaps(BCaps &bCaps, NvU8 *rawByte);
virtual bool getBinfo(BInfo &bInfo);
virtual bool getRxStatus(const HDCPState &hdcpState, NvU8 *data);
virtual AuxRetry::status setTestResponseChecksum(NvU8 checksum)
{
if (caps.revisionMajor <= 0)
DP_ASSERT(0 && "Something is wrong, revision major should be > 0");
return bus.write(NV_DPCD_TEST_EDID_CHKSUM, &checksum, sizeof checksum);
}
virtual AuxRetry::status setTestResponse(bool ack, bool edidChecksumWrite);
// Message box encoding
virtual AuxRetry::status writeDownRequestMessageBox(NvU8 * data, size_t length)
{
//
// We can assume no message was sent if this fails.
// Reasoning:
// Sinks are not allowed to DEFER except on the first 16 byte write.
// If there isn't enough room for the 48 byte packet, that write
// will defer.
//
return bus.write(NV_DPCD_MBOX_DOWN_REQ, data, (unsigned)length);
}
virtual size_t getDownRequestMessageBoxSize()
{
return DP_MESSAGEBOX_SIZE;
}
virtual AuxRetry::status writeUpReplyMessageBox(NvU8 * data, size_t length)
{
if (caps.revisionMajor <= 0)
DP_ASSERT(0 && "Something is wrong, revision major should be > 0");
//
// We can assume no message was sent if this fails.
// Reasoning:
// Sinks are not allowed to DEFER except on the first 16 byte write.
// If there isn't enough room for the 48 byte packet, that write
// will defer.
//
return bus.write(NV_DPCD_MBOX_UP_REP, data, (unsigned)length);
}
virtual size_t getUpReplyMessageBoxSize()
{
return DP_MESSAGEBOX_SIZE;
}
virtual AuxRetry::status readDownReplyMessageBox(NvU32 offset, NvU8 * data, size_t length)
{
// if (caps.revisionMajor <= 0)
// DP_ASSERT(0 && "Something is wrong, revision major should be > 0");
DP_ASSERT(offset + length <= DP_MESSAGEBOX_SIZE);
return bus.read(NV_DPCD_MBOX_DOWN_REP + offset, data, (unsigned)length);
}
virtual size_t getDownReplyMessageBoxSize()
{
return DP_MESSAGEBOX_SIZE;
}
virtual AuxRetry::status readUpRequestMessageBox(NvU32 offset, NvU8 * data, size_t length)
{
if (caps.revisionMajor <= 0)
DP_ASSERT(0 && "Something is wrong, revision major should be > 0");
DP_ASSERT(offset + length <= DP_MESSAGEBOX_SIZE);
return bus.read(NV_DPCD_MBOX_UP_REQ + offset, data, (unsigned)length);
}
virtual size_t getUpRequestMessageBoxSize()
{
return DP_MESSAGEBOX_SIZE;
}
virtual size_t getTransactionSize()
{
return bus.getDirect()->transactionSize();
}
virtual PowerState getPowerState();
virtual bool setPowerState(PowerState newState);
virtual void payloadTableClearACT();
virtual bool payloadWaitForACTReceived();
virtual bool payloadAllocate(unsigned streamId, unsigned begin, unsigned count);
void overrideMultiStreamCap(bool mstCapable)
{
caps.overrideToSST = !mstCapable;
}
bool getMultiStreamCapOverride()
{
return caps.overrideToSST;
}
bool getDpcdMultiStreamCap(void)
{
return caps.supportsMultistream;
}
virtual void setGpuDPSupportedVersions(NvU32 _gpuDPSupportedVersions);
void setGpuFECSupported(bool bSupportFEC)
{
bGpuFECSupported = bSupportFEC;
}
void applyRegkeyOverrides(const DP_REGKEY_DATABASE& dpRegkeyDatabase);
// To clear pending message {DOWN_REP/UP_REQ} and reply true if existed.
virtual bool clearPendingMsg();
virtual bool isMessagingEnabled();
virtual void setIndexedLinkrateEnabled(bool val)
{
bIndexedLinkrateEnabled = val;
}
virtual bool isIndexedLinkrateEnabled()
{
return bIndexedLinkrateEnabled;
}
virtual bool isIndexedLinkrateCapable()
{
return bIndexedLinkrateCapable;
}
virtual NvU16 *getLinkRateTable();
virtual NvU32 getVideoFallbackSupported()
{
return caps.videoFallbackFormats;
}
virtual bool getRawLinkRateTable(NvU8 *buffer);
virtual void resetProtocolConverter()
{
NvU8 data = 0;
bus.write(NV_DPCD14_PCON_FRL_LINK_CONFIG_1, &data, sizeof(data));
bus.write(NV_DPCD14_PCON_FRL_LINK_CONFIG_2, &data, sizeof(data));
}
virtual bool setSourceControlMode(bool bEnableSourceControlMode, bool bEnableFRLMode);
virtual bool checkPCONFrlReady(bool *bFrlReady);
virtual bool setupPCONFrlLinkAssessment(NvU32 linkBwMask,
bool bEnableExtendLTMode = false,
bool bEnableConcurrentMode = false);
virtual bool checkPCONFrlLinkStatus(NvU32 *frlRateMask);
virtual bool queryHdmiLinkStatus(bool *bLinkActive, bool *bLinkReady);
virtual NvU32 restorePCONFrlLink(NvU32 linkBwMask,
bool bEnableExtendLTMode = false,
bool bEnableConcurrentMode = false);
virtual void readPsrCapabilities(vesaPsrSinkCaps *caps)
{
dpMemCopy(caps, &this->caps.psrCaps, sizeof(vesaPsrSinkCaps));
}
virtual bool updatePsrConfiguration(vesaPsrConfig psrcfg);
virtual bool readPsrConfiguration(vesaPsrConfig *psrcfg);
virtual bool readPsrState(vesaPsrState *psrState);
virtual bool readPsrDebugInfo(vesaPsrDebugStatus *psrDbgState);
virtual bool writePsrErrorStatus(vesaPsrErrorStatus psrErr);
virtual bool readPsrErrorStatus(vesaPsrErrorStatus *psrErr);
virtual bool writePsrEvtIndicator(vesaPsrEventIndicator psrEvt);
virtual bool readPsrEvtIndicator(vesaPsrEventIndicator *psrEvt);
virtual bool readPrSinkDebugInfo(panelReplaySinkDebugInfo *prDbgInfo);
bool getDpTunnelBwAllocationSupported()
{
return false;
}
virtual bool getDpTunnelGranularityMultiplier(NvU8 &granularityMultiplier);
virtual TriState getDpTunnelBwRequestStatus();
virtual bool setDpTunnelBwAllocation(bool bEnable);
bool getDpTunnelEstimatedBw(NvU8 &estimatedBw);
bool hasDpTunnelEstimatedBwChanged();
bool hasDpTunnelBwAllocationCapabilityChanged();
bool writeDpTunnelRequestedBw(NvU8 requestedBw);
};
}
#endif //INCLUDED_DP_CONFIGCAPS_H

View File

@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@ -65,6 +65,7 @@ namespace DisplayPort
DP_IMP_ERROR_INSUFFICIENT_BANDWIDTH,
DP_IMP_ERROR_INSUFFICIENT_BANDWIDTH_DSC,
DP_IMP_ERROR_INSUFFICIENT_BANDWIDTH_NO_DSC,
DP_IMP_ERROR_INSUFFICIENT_DP_TUNNELING_BANDWIDTH,
DP_IMP_ERROR_WATERMARK_BLANKING,
DP_IMP_ERROR_PPS_COLOR_FORMAT_NOT_SUPPORTED,
DP_IMP_ERROR_PPS_INVALID_HBLANK,
@ -274,6 +275,10 @@ namespace DisplayPort
virtual DscCaps getDscCaps() = 0;
virtual NvBool isDynamicPPSSupported() = 0;
virtual NvBool isDynamicDscToggleSupported() = 0;
//
// This function returns the device itself or its parent device that is doing
// DSC decompression for it.
@ -321,8 +326,14 @@ namespace DisplayPort
virtual bool isMSAOverMSTCapable() = 0;
virtual bool isFakedMuxDevice() = 0;
virtual bool setPanelReplayConfig(panelReplayConfig prcfg) = 0;
virtual bool getPanelReplayConfig(panelReplayConfig *pPrcfg) = 0;
virtual bool isPanelReplaySupported() = 0;
virtual bool getPanelReplayStatus(PanelReplayStatus *pPrStatus) = 0;
virtual bool getDeviceSpecificData(NvU8 *oui, NvU8 *deviceIdString,
NvU8 *hwRevision, NvU8 *swMajorRevision,
NvU8 *swMinorRevision) = 0;
virtual bool setModeList(DisplayPort::DpModesetParams *pModeList, unsigned numModes) = 0;
protected:
virtual ~Device() {}
@ -594,6 +605,8 @@ namespace DisplayPort
virtual void notifyGPUCapabilityChange() = 0;
virtual void notifyHBR2WAREngage() = 0;
virtual bool dpUpdateDscStream(Group *target, NvU32 dscBpp) = 0;
// Create a new Group. Note that if you wish to do a modeset but send the
// stream nowhere, you may do a modeset with an EMPTY group. This is expected
// to be the mechanism by which monitor faking is implemented.
@ -710,6 +723,7 @@ namespace DisplayPort
virtual bool setTestPattern(NV0073_CTRL_DP_TESTPATTERN testPattern,
NvU8 laneMask, NV0073_CTRL_DP_CSTM cstm,
NvBool bIsHBR2, NvBool bSkipLaneDataOverride) = 0;
// "data" is an array of NV0073_CTRL_MAX_LANES unsigned ints
virtual bool getLaneConfig(NvU32 *numLanes, NvU32 *data) = 0;
// "data" is an array of NV0073_CTRL_MAX_LANES unsigned ints
@ -735,6 +749,7 @@ namespace DisplayPort
virtual bool updatePsrLinkState(bool bTurnOnLink) = 0;
virtual bool readPrSinkDebugInfo(panelReplaySinkDebugInfo *prDbgInfo) = 0;
virtual void enableDpTunnelingBwAllocationSupport() = 0;
protected:
virtual ~Connector() {}

View File

@ -49,6 +49,9 @@
#define HDCP_FLAGS_ABORT_DEVICE_INVALID 0x00080000 // Abort due to an invalid device in DP1.2 topology
#define HDCP_FLAGS_ABORT_HOP_LIMIT_EXCEEDED 0x80000000 // Abort, number of devices in DP1.2 topology exceeds supported limit
#define DP_TUNNEL_REQUEST_BW_MAX_TIME_MS (1000U)
#define DP_TUNNEL_REQUEST_BW_POLLING_INTERVAL_MS (10U)
static inline unsigned getDataClockMultiplier(NvU64 linkRate, NvU64 laneCount)
{
//
@ -192,6 +195,7 @@ namespace DisplayPort
bool compoundQueryResult;
unsigned compoundQueryCount;
unsigned compoundQueryLocalLinkPBN;
NvU64 compoundQueryUsedTunnelingBw;
bool compoundQueryForceEnableFEC;
unsigned freeSlots;
@ -309,7 +313,6 @@ namespace DisplayPort
bool bNoFallbackInPostLQA;
bool bReportDeviceLostBeforeNew;
bool bEnableAudioBeyond48K;
bool bDisableSSC;
bool bEnableFastLT;
NvU32 maxLinkRateFromRegkey;
@ -348,9 +351,6 @@ namespace DisplayPort
//
bool bPowerDownPhyBeforeD3;
// Force DSC on sink irrespective of LT status
bool bForceDscOnSink;
//
// Reset the MSTM_CTRL registers on branch device irrespective of
// IRQ VECTOR register having stale message. Certain branch devices
@ -362,6 +362,11 @@ namespace DisplayPort
bool bForceClearPendingMsg;
bool bSkipFakeDeviceDpcdAccess;
NvU64 allocatedDpTunnelBw;
NvU64 allocatedDpTunnelBwShadow;
bool bForceDisableTunnelBwAllocation;
bool bClientRequestedDpTunnelBwAllocation;
bool bIsDpTunnelBwAllocationEnabled;
Group *perHeadAttachedGroup[NV_MAX_HEADS];
NvU32 inTransitionHeadMask;
@ -444,6 +449,9 @@ namespace DisplayPort
const DpModesetParams &modesetParams, // Modeset info
DscParams *pDscParams = NULL, // DSC parameters
DP_IMP_ERROR *pErrorCode = NULL); // Error Status code
virtual bool compoundQueryAttachTunneling(const DpModesetParams &modesetParams,
DscParams *pDscParams = NULL,
DP_IMP_ERROR *pErrorCode = NULL);
virtual bool endCompoundQuery();
@ -495,6 +503,7 @@ namespace DisplayPort
char tagHDCPReauthentication;
char tagDelayedHdcpCapRead;
char tagDelayedHDCPCPIrqHandling;
char tagDpBwAllocationChanged;
//
// Enable disable TMDS mode
@ -563,6 +572,18 @@ namespace DisplayPort
bool willLinkSupportModeSST(const LinkConfiguration & linkConfig, const ModesetInfo & modesetInfo);
void forceLinkTraining();
bool updateDpTunnelBwAllocation();
void configureDpTunnelBwAllocation();
TriState requestDpTunnelBw(NvU8 requestedBw);
bool allocateDpTunnelBw(NvU64 bandwidth);
bool allocateMaxDpTunnelBw();
NvU64 getMaxTunnelBw();
void enableDpTunnelingBwAllocationSupport()
{
bClientRequestedDpTunnelBwAllocation = true;
}
void assessLink(LinkTrainingType trainType = NORMAL_LINK_TRAINING);
bool isLinkInD3();
@ -594,8 +615,8 @@ namespace DisplayPort
void populateDscBranchCaps(DSC_INFO* dscInfo, DeviceImpl * dev);
void populateDscModesetInfo(MODESET_INFO * pModesetInfo, const DpModesetParams * pModesetParams);
bool train(const LinkConfiguration & lConfig, bool force, LinkTrainingType trainType = NORMAL_LINK_TRAINING);
bool validateLinkConfiguration(const LinkConfiguration & lConfig);
virtual bool train(const LinkConfiguration & lConfig, bool force, LinkTrainingType trainType = NORMAL_LINK_TRAINING);
virtual bool validateLinkConfiguration(const LinkConfiguration & lConfig);
virtual bool assessPCONLinkCapability(PCONLinkControl *params);
bool trainPCONFrlLink(PCONLinkControl *pConControl);
@ -606,12 +627,12 @@ namespace DisplayPort
// the lowest level function(nearest to the hal) for the connector.
bool rawTrain(const LinkConfiguration & lConfig, bool force, LinkTrainingType linkTrainingType);
bool enableFlush();
bool beforeAddStream(GroupImpl * group, bool force=false, bool forFlushMode = false);
void afterAddStream(GroupImpl * group);
void beforeDeleteStream(GroupImpl * group, bool forFlushMode = false);
void afterDeleteStream(GroupImpl * group);
void disableFlush(bool test=false);
virtual bool enableFlush();
virtual bool beforeAddStream(GroupImpl * group, bool force=false, bool forFlushMode = false);
virtual void afterAddStream(GroupImpl * group);
virtual void beforeDeleteStream(GroupImpl * group, bool forFlushMode = false);
virtual void afterDeleteStream(GroupImpl * group);
virtual void disableFlush(bool test=false);
bool beforeAddStreamMST(GroupImpl * group, bool force = false, bool forFlushMode = false);
@ -619,7 +640,7 @@ namespace DisplayPort
bool deleteAllVirtualChannels();
void clearTimeslices();
bool allocateTimeslice(GroupImpl * targetGroup);
virtual bool allocateTimeslice(GroupImpl * targetGroup);
void freeTimeslice(GroupImpl * targetGroup);
void flushTimeslotsToHardware();
bool getHDCPAbortCodesDP12(NvU32 &hdcpAbortCodesDP12);
@ -629,6 +650,7 @@ namespace DisplayPort
bool handleCPIRQ();
void handleSSC();
void handleMCCSIRQ();
void handleDpTunnelingIrq();
void handleHdmiLinkStatusChanged();
void sortActiveGroups(bool ascending);
void configInit();
@ -639,7 +661,7 @@ namespace DisplayPort
void notifyLongPulseInternal(bool statusConnected);
virtual void notifyLongPulse(bool status);
virtual void notifyShortPulse();
virtual Group * newGroup() ;
virtual Group * newGroup();
virtual void destroy();
virtual void createFakeMuxDevice(const NvU8 *buffer, NvU32 bufferSize);
virtual void deleteFakeMuxDevice();
@ -664,6 +686,7 @@ namespace DisplayPort
Group * createFirmwareGroup();
virtual void notifyGPUCapabilityChange();
virtual void notifyHBR2WAREngage();
bool dpUpdateDscStream(Group *target, NvU32 dscBpp);
bool getTestPattern(NV0073_CTRL_DP_TESTPATTERN *testPattern);
bool setTestPattern(NV0073_CTRL_DP_TESTPATTERN testPattern, NvU8 laneMask, NV0073_CTRL_DP_CSTM cstm, NvBool bIsHBR2, NvBool bSkipLaneDataOverride = false);
@ -707,16 +730,16 @@ namespace DisplayPort
//
struct DevicePendingEDIDRead : protected EdidReadMultistream::EdidReadMultistreamEventSink, public ListElement
{
EdidReadMultistream reader;
DiscoveryManager::Device device;
ConnectorImpl * parent;
DiscoveryManager::Device device;
EdidReadMultistream reader;
void mstEdidCompleted(EdidReadMultistream * from);
void mstEdidReadFailed(EdidReadMultistream * from);
public:
DevicePendingEDIDRead(ConnectorImpl * _parent, MessageManager * manager, DiscoveryManager::Device dev)
: reader(_parent->timer, manager, this, dev.address), device(dev), parent(_parent)
: parent(_parent), device(dev), reader(_parent->timer, manager, this, dev.address)
{
}
};

View File

@ -44,6 +44,7 @@ namespace DisplayPort
#define HDCP_BCAPS_DDC_EN_BIT 0x80
#define HDCP_BCAPS_DP_EN_BIT 0x01
#define HDCP_I2C_CLIENT_ADDR 0x74
#define DEVICE_OUI_SIZE 3
struct GroupImpl;
struct ConnectorImpl;
@ -170,7 +171,6 @@ namespace DisplayPort
// Panel replay Caps
PanelReplayCaps prCaps;
bool bIsFakedMuxDevice;
bool bIsPreviouslyFakedMuxDevice;
bool bisMarkedForDeletion;
@ -202,6 +202,8 @@ namespace DisplayPort
bool bSkipFakeDeviceDpcdAccess;
DeviceImpl(DPCDHAL * hal, ConnectorImpl * connector, DeviceImpl * parent, bool bSkipFakeDeviceDpcdAccess);
NvU64 maxModeBwRequired;
~DeviceImpl();
virtual bool isCableOk();
@ -380,6 +382,11 @@ namespace DisplayPort
return dpcdRevisionMinor >= minor;
}
NvU64 getMaxModeBwRequired()
{
return maxModeBwRequired;
}
virtual void queryGUID2();
virtual bool getSDPExtnForColorimetrySupported();
@ -445,6 +452,7 @@ namespace DisplayPort
bool isPanelReplaySupported(void);
void getPanelReplayCaps(void);
bool setPanelReplayConfig(panelReplayConfig prcfg);
bool getPanelReplayConfig(panelReplayConfig *pPrcfg);
bool getPanelReplayStatus(PanelReplayStatus *pPrStatus);
NvBool getDSCSupport();
@ -481,6 +489,11 @@ namespace DisplayPort
unsigned getDscMaxSliceWidth();
unsigned getDscDecoderColorDepthSupportMask();
void setDscDecompressionDevice(bool bDscCapBasedOnParent);
virtual bool getDeviceSpecificData(NvU8 *oui, NvU8 *deviceIdString,
NvU8 *hwRevision, NvU8 *swMajorRevision,
NvU8 *swMinorRevision);
virtual bool setModeList(DisplayPort::DpModesetParams *pModeList, unsigned numModes);
};
class DeviceHDCPDetection : public Object, MessageManager::Message::MessageEventSink, Timer::TimerCallback
{

View File

@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2010-2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2010-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@ -124,26 +124,7 @@ namespace DisplayPort
return this->patchedChecksum;
}
bool isValidHeader() const
{
NvU8 validHeaderData[8] = {
0x00, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0x00};
if (buffer.getLength() < 0x8)
return false;
for (unsigned i = 0; i < 8; i++)
{
if (buffer.data[i] != validHeaderData[i])
{
DP_LOG(("DP-EDID> Invalid EDID Header"));
return false;
}
}
return true;
}
bool isValidHeader() const;
unsigned getManufId() const
{

View File

@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@ -147,6 +147,7 @@ namespace DisplayPort
// Defines the same as NV0073_CTRL_CMD_DP_GET_CAPS_PARAMS.dpVersionsSupported
//
NvU32 _gpuSupportedDpVersions;
bool _isStreamCloningEnabled;
bool _needForceRmEdid;
bool _skipPowerdownEDPPanelWhenHeadDetach;
@ -156,10 +157,11 @@ namespace DisplayPort
bool _useDfpMaxLinkRateCaps;
bool _applyLinkBwOverrideWarRegVal;
bool _isDynamicMuxCapable;
bool _isMDMEnabled;
bool _enableMSAOverrideOverMST;
bool _isLTPhyRepeaterSupported;
bool _isMSTPCONCapsReadDisabled;
bool _isDownspreadSupported;
//
// LTTPR count reported by RM, it might not be the same with DPLib probe
// For example, some Intel LTTPR might not be ready to response 0xF0000 probe
@ -258,6 +260,16 @@ namespace DisplayPort
return (_isDynamicMuxCapable && _isEDP);
}
virtual bool isMDMEnabled()
{
return (_isMDMEnabled && _isEDP);
}
virtual bool isDownspreadSupported()
{
return _isDownspreadSupported;
}
// Get GPU DSC capabilities
virtual void getDscCaps(bool *pbDscSupported,
unsigned *pEncoderColorFormatMask,
@ -313,6 +325,11 @@ namespace DisplayPort
return this->_isLTPhyRepeaterSupported;
}
EvoInterface * getProvider()
{
return this->provider;
}
// Return the current mux state. Returns false if device is not mux capable
bool getDynamicMuxState(NvU32 *muxState);
@ -334,8 +351,8 @@ namespace DisplayPort
virtual bool getMaxLinkConfigFromUefi(NvU8 &linkRate, NvU8 &laneCount);
virtual bool setDpMSAParameters(bool bStereoEnable, const NV0073_CTRL_CMD_DP_SET_MSA_PROPERTIES_PARAMS &msaparams);
virtual bool setDpStereoMSAParameters(bool bStereoEnable, const NV0073_CTRL_CMD_DP_SET_MSA_PROPERTIES_PARAMS &msaparams);
virtual bool setFlushMode();
virtual void clearFlushMode(unsigned headMask, bool testMode=false);
bool setFlushMode();
void clearFlushMode(unsigned headMask, bool testMode=false);
virtual bool dscCrcTransaction(NvBool bEnable, gpuDscCrc *data, NvU16 *headIndex);

View File

@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2015-2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2015-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@ -32,12 +32,14 @@
#include "nvtypes.h"
#include "dp_tracing.h"
#include "dp_printf.h"
extern "C" void * dpMalloc(NvLength size);
extern "C" void dpFree(void * ptr);
extern "C" void dpDebugBreakpoint();
// Note: dpPrint() implementations are expected to append a newline themselves.
extern "C" void dpPrint(const char * formatter, ...);
extern "C" void dpPrintf(DP_LOG_LEVEL severity, const char * formatter, ...);
extern "C" void dpTraceEvent(NV_DP_TRACING_EVENT event,
NV_DP_TRACING_PRIORITY priority, NvU32 numArgs, ...);

View File

@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 1993-2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@ -108,13 +108,6 @@ template <class T> void dp_used(const T & /*x*/) {}
//
#if NV_DP_ASSERT_ENABLED
#define DP_LOG(x) \
do \
{ \
dpPrint x; \
addDpLogRecord x; \
}while (false)
#define DP_ASSERT(x) \
if (!(x)) \
{ \
@ -123,9 +116,6 @@ template <class T> void dp_used(const T & /*x*/) {}
dpDebugBreakpoint(); \
}
#else
#define DP_LOG(x)
#define DP_ASSERT(x) \
{ \
DP_USED(x); \

Some files were not shown because too many files have changed in this diff Show More