This commit is contained in:
Andy Ritger 2022-11-10 08:39:33 -08:00
parent 7c345b838b
commit 758b4ee818
No known key found for this signature in database
GPG Key ID: 6D466BB75E006CFC
1323 changed files with 262135 additions and 60754 deletions

View File

@ -1,5 +1,22 @@
# Changelog
## Release 525 Entries
### [525.53] 2022-11-10
#### Changed
- GSP firmware is now distributed as multiple firmware files: this release has `gsp_tu10x.bin` and `gsp_ad10x.bin` replacing `gsp.bin` from previous releases.
- Each file is named after a GPU architecture and supports GPUs from one or more architectures. This allows GSP firmware to better leverage each architecture's capabilities.
- The .run installer will continue to install firmware to `/lib/firmware/nvidia/<version>` and the `nvidia.ko` kernel module will load the appropriate firmware for each GPU at runtime.
#### Fixed
- Add support for IBT (indirect branch tracking) on supported platforms, [#256](https://github.com/NVIDIA/open-gpu-kernel-modules/issues/256) by @rnd-ash
- Return EINVAL when [failing to] allocating memory, [#280](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/280) by @YusufKhan-gamedev
- Fix various typos in nvidia/src/kernel, [#16](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/16) by @alexisgeoffrey
- Added support for rotation in X11, Quadro Sync, Stereo, and YUV 4:2:0 on Turing.
## Release 520 Entries
### [520.56.06] 2022-10-12
@ -29,6 +46,8 @@
- Improved compatibility with new Linux kernel releases
- Fixed possible excessive GPU power draw on an idle X11 or Wayland desktop when driving high resolutions or refresh rates
### [515.65.07] 2022-10-19
### [515.65.01] 2022-08-02
#### Fixed

View File

@ -1,7 +1,7 @@
# NVIDIA Linux Open GPU Kernel Module Source
This is the source release of the NVIDIA Linux open GPU kernel modules,
version 520.56.06.
version 525.53.
## How to Build
@ -15,9 +15,9 @@ as root:
make modules_install -j$(nproc)
Note that the kernel modules built here must be used with gsp.bin
Note that the kernel modules built here must be used with GSP
firmware and user-space NVIDIA GPU driver components from a corresponding
520.56.06 driver release. This can be achieved by installing
525.53 driver release. This can be achieved by installing
the NVIDIA GPU driver from the .run file using the `--no-kernel-modules`
option. E.g.,
@ -167,7 +167,7 @@ for the target kernel.
## Compatible GPUs
The open-gpu-kernel-modules can be used on any Turing or later GPU
(see the table below). However, in the 520.56.06 release,
(see the table below). However, in the 525.53 release,
GeForce and Workstation support is still considered alpha-quality.
To enable use of the open kernel modules on GeForce and Workstation GPUs,
@ -175,7 +175,7 @@ set the "NVreg_OpenRmEnableUnsupportedGpus" nvidia.ko kernel module
parameter to 1. For more details, see the NVIDIA GPU driver end user
README here:
https://us.download.nvidia.com/XFree86/Linux-x86_64/520.56.06/README/kernel_open.html
https://us.download.nvidia.com/XFree86/Linux-x86_64/525.53/README/kernel_open.html
In the below table, if three IDs are listed, the first is the PCI Device
ID, the second is the PCI Subsystem Vendor ID, and the third is the PCI
@ -652,6 +652,17 @@ Subsystem Device ID.
| NVIDIA PG506-232 | 20B6 10DE 1492 |
| NVIDIA A30 | 20B7 10DE 1532 |
| NVIDIA A100-PCIE-40GB | 20F1 10DE 145F |
| NVIDIA A800-SXM4-80GB | 20F3 10DE 179B |
| NVIDIA A800-SXM4-80GB | 20F3 10DE 179C |
| NVIDIA A800-SXM4-80GB | 20F3 10DE 179D |
| NVIDIA A800-SXM4-80GB | 20F3 10DE 179E |
| NVIDIA A800-SXM4-80GB | 20F3 10DE 179F |
| NVIDIA A800-SXM4-80GB | 20F3 10DE 17A0 |
| NVIDIA A800-SXM4-80GB | 20F3 10DE 17A1 |
| NVIDIA A800-SXM4-80GB | 20F3 10DE 17A2 |
| NVIDIA A800 80GB PCIe | 20F5 10DE 1799 |
| NVIDIA A800 80GB PCIe LC | 20F5 10DE 179A |
| NVIDIA A800 40GB PCIe | 20F6 10DE 17A3 |
| NVIDIA GeForce GTX 1660 Ti | 2182 |
| NVIDIA GeForce GTX 1660 | 2184 |
| NVIDIA GeForce GTX 1650 SUPER | 2187 |

View File

@ -72,7 +72,7 @@ EXTRA_CFLAGS += -I$(src)/common/inc
EXTRA_CFLAGS += -I$(src)
EXTRA_CFLAGS += -Wall -MD $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-error -Wno-format-extra-args
EXTRA_CFLAGS += -D__KERNEL__ -DMODULE -DNVRM
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"520.56.06\"
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"525.53\"
EXTRA_CFLAGS += -Wno-unused-function
@ -229,6 +229,7 @@ NV_HEADER_PRESENCE_TESTS = \
drm/drm_ioctl.h \
drm/drm_device.h \
drm/drm_mode_config.h \
drm/drm_modeset_lock.h \
dt-bindings/interconnect/tegra_icc_id.h \
generated/autoconf.h \
generated/compile.h \
@ -243,6 +244,8 @@ NV_HEADER_PRESENCE_TESTS = \
linux/log2.h \
linux/of.h \
linux/bug.h \
linux/sched.h \
linux/sched/mm.h \
linux/sched/signal.h \
linux/sched/task.h \
linux/sched/task_stack.h \
@ -286,7 +289,10 @@ NV_HEADER_PRESENCE_TESTS = \
linux/ioasid.h \
linux/stdarg.h \
linux/iosys-map.h \
asm/coco.h
asm/coco.h \
linux/vfio_pci_core.h \
soc/tegra/bpmp-abi.h \
soc/tegra/bpmp.h
# Filename to store the define for the header in $(1); this is only consumed by
# the rule below that concatenates all of these together.

View File

@ -242,7 +242,7 @@
#endif
/* For verification-only features not intended to be included in normal drivers */
#if (defined(NV_MODS) || defined(NV_GSP_MODS)) && defined(DEBUG) && !defined(DISABLE_VERIF_FEATURES)
#if defined(ENABLE_VERIF_FEATURES)
#define NV_VERIF_FEATURES
#endif
@ -276,12 +276,6 @@
#define NV_IS_MODS 0
#endif
#if defined(NV_GSP_MODS)
#define NV_IS_GSP_MODS 1
#else
#define NV_IS_GSP_MODS 0
#endif
#if defined(NV_WINDOWS)
#define NVOS_IS_WINDOWS 1
#else

View File

@ -0,0 +1,132 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef NV_FIRMWARE_H
#define NV_FIRMWARE_H
#include <nvtypes.h>
#include <nvmisc.h>
typedef enum
{
NV_FIRMWARE_TYPE_GSP,
NV_FIRMWARE_TYPE_GSP_LOG
} nv_firmware_type_t;
typedef enum
{
NV_FIRMWARE_CHIP_FAMILY_NULL = 0,
NV_FIRMWARE_CHIP_FAMILY_TU10X = 1,
NV_FIRMWARE_CHIP_FAMILY_TU11X = 2,
NV_FIRMWARE_CHIP_FAMILY_GA100 = 3,
NV_FIRMWARE_CHIP_FAMILY_GA10X = 4,
NV_FIRMWARE_CHIP_FAMILY_AD10X = 5,
NV_FIRMWARE_CHIP_FAMILY_GH100 = 6,
NV_FIRMWARE_CHIP_FAMILY_END,
} nv_firmware_chip_family_t;
static inline const char *nv_firmware_chip_family_to_string(
nv_firmware_chip_family_t fw_chip_family
)
{
switch (fw_chip_family) {
case NV_FIRMWARE_CHIP_FAMILY_GH100: return "gh100";
case NV_FIRMWARE_CHIP_FAMILY_AD10X: return "ad10x";
case NV_FIRMWARE_CHIP_FAMILY_GA10X: return "ga10x";
case NV_FIRMWARE_CHIP_FAMILY_GA100: return "ga100";
case NV_FIRMWARE_CHIP_FAMILY_TU11X: return "tu11x";
case NV_FIRMWARE_CHIP_FAMILY_TU10X: return "tu10x";
case NV_FIRMWARE_CHIP_FAMILY_END: // fall through
case NV_FIRMWARE_CHIP_FAMILY_NULL:
return NULL;
}
return NULL;
}
// The includer (presumably nv.c) may optionally define
// NV_FIRMWARE_PATH_FOR_FILENAME(filename)
// to return a string "path" given a gsp_*.bin or gsp_log_*.bin filename.
//
// The function nv_firmware_path will then be available.
#if defined(NV_FIRMWARE_PATH_FOR_FILENAME)
static inline const char *nv_firmware_path(
nv_firmware_type_t fw_type,
nv_firmware_chip_family_t fw_chip_family
)
{
if (fw_type == NV_FIRMWARE_TYPE_GSP)
{
switch (fw_chip_family)
{
case NV_FIRMWARE_CHIP_FAMILY_AD10X:
return NV_FIRMWARE_PATH_FOR_FILENAME("gsp_ad10x.bin");
case NV_FIRMWARE_CHIP_FAMILY_GH100: // fall through
case NV_FIRMWARE_CHIP_FAMILY_GA100: // fall through
case NV_FIRMWARE_CHIP_FAMILY_GA10X: // fall through
case NV_FIRMWARE_CHIP_FAMILY_TU11X: // fall through
case NV_FIRMWARE_CHIP_FAMILY_TU10X:
return NV_FIRMWARE_PATH_FOR_FILENAME("gsp_tu10x.bin");
case NV_FIRMWARE_CHIP_FAMILY_END: // fall through
case NV_FIRMWARE_CHIP_FAMILY_NULL:
return "";
}
}
else if (fw_type == NV_FIRMWARE_TYPE_GSP_LOG)
{
switch (fw_chip_family)
{
case NV_FIRMWARE_CHIP_FAMILY_AD10X:
return NV_FIRMWARE_PATH_FOR_FILENAME("gsp_log_ad10x.bin");
case NV_FIRMWARE_CHIP_FAMILY_GH100: // fall through
case NV_FIRMWARE_CHIP_FAMILY_GA100: // fall through
case NV_FIRMWARE_CHIP_FAMILY_GA10X: // fall through
case NV_FIRMWARE_CHIP_FAMILY_TU11X: // fall through
case NV_FIRMWARE_CHIP_FAMILY_TU10X:
return NV_FIRMWARE_PATH_FOR_FILENAME("gsp_log_tu10x.bin");
case NV_FIRMWARE_CHIP_FAMILY_END: // fall through
case NV_FIRMWARE_CHIP_FAMILY_NULL:
return "";
}
}
return "";
}
#endif // defined(NV_FIRMWARE_PATH_FOR_FILENAME)
// The includer (presumably nv.c) may optionally define
// NV_FIRMWARE_DECLARE_GSP_FILENAME(filename)
// which will then be invoked (at the top-level) for each
// gsp_*.bin (but not gsp_log_*.bin)
#if defined(NV_FIRMWARE_DECLARE_GSP_FILENAME)
NV_FIRMWARE_DECLARE_GSP_FILENAME("gsp_ad10x.bin")
NV_FIRMWARE_DECLARE_GSP_FILENAME("gsp_tu10x.bin")
#endif // defined(NV_FIRMWARE_DECLARE_GSP_FILENAME)
#endif // NV_FIRMWARE_DECLARE_GSP_FILENAME

View File

@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2020 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2020-22 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@ -91,6 +91,6 @@ static inline void _nv_hash_init(struct hlist_head *ht, unsigned int sz)
* @key: the key of the objects to iterate over
*/
#define nv_hash_for_each_possible(name, obj, member, key) \
nv_hlist_for_each_entry(obj, &name[NV_HASH_MIN(key, NV_HASH_BITS(name))], member)
hlist_for_each_entry(obj, &name[NV_HASH_MIN(key, NV_HASH_BITS(name))], member)
#endif // __NV_HASH_H__

View File

@ -27,15 +27,13 @@
#include <nv-kernel-interface-api.h>
// Enums for supported hypervisor types.
// New hypervisor type should be added before OS_HYPERVISOR_CUSTOM_FORCED
// New hypervisor type should be added before OS_HYPERVISOR_UNKNOWN
typedef enum _HYPERVISOR_TYPE
{
OS_HYPERVISOR_XEN = 0,
OS_HYPERVISOR_VMWARE,
OS_HYPERVISOR_HYPERV,
OS_HYPERVISOR_KVM,
OS_HYPERVISOR_PARALLELS,
OS_HYPERVISOR_CUSTOM_FORCED,
OS_HYPERVISOR_UNKNOWN
} HYPERVISOR_TYPE;

View File

@ -115,11 +115,6 @@ struct nv_kthread_q_item
void *function_args;
};
#if defined(NV_KTHREAD_CREATE_ON_NODE_PRESENT)
#define NV_KTHREAD_Q_SUPPORTS_AFFINITY() 1
#else
#define NV_KTHREAD_Q_SUPPORTS_AFFINITY() 0
#endif
#ifndef NUMA_NO_NODE
#define NUMA_NO_NODE (-1)
@ -142,18 +137,12 @@ struct nv_kthread_q_item
//
// A short prefix of the qname arg will show up in []'s, via the ps(1) utility.
//
// The kernel thread stack is preferably allocated on the specified NUMA node if
// NUMA-affinity (NV_KTHREAD_Q_SUPPORTS_AFFINITY() == 1) is supported, but
// fallback to another node is possible because kernel allocators do not
// The kernel thread stack is preferably allocated on the specified NUMA node,
// but fallback to another node is possible because kernel allocators do not
// guarantee affinity. Note that NUMA-affinity applies only to
// the kthread stack. This API does not do anything about limiting the CPU
// affinity of the kthread. That is left to the caller.
//
// On kernels, which do not support NUMA-aware kthread stack allocations
// (NV_KTHTREAD_Q_SUPPORTS_AFFINITY() == 0), the API will return -ENOTSUPP
// if the value supplied for 'preferred_node' is anything other than
// NV_KTHREAD_NO_NODE.
//
// Reusing a queue: once a queue is initialized, it must be safely shut down
// (see "Stopping the queue(s)", below), before it can be reused. So, for
// a simple queue use case, the following will work:

View File

@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2001-2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2001-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@ -191,13 +191,6 @@
*/
#define NV_CURRENT_EUID() (__kuid_val(current->cred->euid))
#if !defined(NV_KUID_T_PRESENT)
static inline uid_t __kuid_val(uid_t uid)
{
return uid;
}
#endif
#if defined(CONFIG_VGA_ARB)
#include <linux/vgaarb.h>
#endif
@ -234,18 +227,6 @@ static inline uid_t __kuid_val(uid_t uid)
#include <asm-generic/pci-dma-compat.h>
#endif
#if defined(NV_EFI_ENABLED_PRESENT) && defined(NV_EFI_ENABLED_ARGUMENT_COUNT)
#if (NV_EFI_ENABLED_ARGUMENT_COUNT == 1)
#define NV_EFI_ENABLED() efi_enabled(EFI_BOOT)
#else
#error "NV_EFI_ENABLED_ARGUMENT_COUNT value unrecognized!"
#endif
#elif (defined(NV_EFI_ENABLED_PRESENT) || defined(efi_enabled))
#define NV_EFI_ENABLED() efi_enabled
#else
#define NV_EFI_ENABLED() 0
#endif
#if defined(CONFIG_CRAY_XT)
#include <cray/cray_nvidia.h>
NV_STATUS nvos_forward_error_to_cray(struct pci_dev *, NvU32,
@ -521,7 +502,7 @@ static inline void *nv_vmalloc(unsigned long size)
return ptr;
}
static inline void nv_vfree(void *ptr, NvU32 size)
static inline void nv_vfree(void *ptr, NvU64 size)
{
NV_MEMDBG_REMOVE(ptr, size);
vfree(ptr);
@ -592,11 +573,7 @@ static NvBool nv_numa_node_has_memory(int node_id)
{
if (node_id < 0 || node_id >= MAX_NUMNODES)
return NV_FALSE;
#if defined(NV_NODE_STATES_N_MEMORY_PRESENT)
return node_state(node_id, N_MEMORY) ? NV_TRUE : NV_FALSE;
#else
return node_state(node_id, N_HIGH_MEMORY) ? NV_TRUE : NV_FALSE;
#endif
}
#define NV_KMALLOC(ptr, size) \
@ -606,6 +583,13 @@ static NvBool nv_numa_node_has_memory(int node_id)
NV_MEMDBG_ADD(ptr, size); \
}
#define NV_KZALLOC(ptr, size) \
{ \
(ptr) = kzalloc(size, NV_GFP_KERNEL); \
if (ptr) \
NV_MEMDBG_ADD(ptr, size); \
}
#define NV_KMALLOC_ATOMIC(ptr, size) \
{ \
(ptr) = kmalloc(size, NV_GFP_ATOMIC); \
@ -838,10 +822,8 @@ static inline dma_addr_t nv_phys_to_dma(struct device *dev, NvU64 pa)
})
#endif
#if defined(NV_PCI_STOP_AND_REMOVE_BUS_DEVICE_PRESENT) // introduced in 3.4.9
#if defined(NV_PCI_STOP_AND_REMOVE_BUS_DEVICE_PRESENT) // introduced in 3.18-rc1 for aarch64
#define NV_PCI_STOP_AND_REMOVE_BUS_DEVICE(pci_dev) pci_stop_and_remove_bus_device(pci_dev)
#elif defined(NV_PCI_REMOVE_BUS_DEVICE_PRESENT) // introduced in 2.6
#define NV_PCI_STOP_AND_REMOVE_BUS_DEVICE(pci_dev) pci_remove_bus_device(pci_dev)
#endif
#define NV_PRINT_AT(nv_debug_level,at) \
@ -1139,11 +1121,14 @@ static inline int nv_kmem_cache_alloc_stack(nvidia_stack_t **stack)
{
nvidia_stack_t *sp = NULL;
#if defined(NVCPU_X86_64)
sp = NV_KMEM_CACHE_ALLOC(nvidia_stack_t_cache);
if (sp == NULL)
return -ENOMEM;
sp->size = sizeof(sp->stack);
sp->top = sp->stack + sp->size;
if (rm_is_altstack_in_use())
{
sp = NV_KMEM_CACHE_ALLOC(nvidia_stack_t_cache);
if (sp == NULL)
return -ENOMEM;
sp->size = sizeof(sp->stack);
sp->top = sp->stack + sp->size;
}
#endif
*stack = sp;
return 0;
@ -1152,7 +1137,7 @@ static inline int nv_kmem_cache_alloc_stack(nvidia_stack_t **stack)
static inline void nv_kmem_cache_free_stack(nvidia_stack_t *stack)
{
#if defined(NVCPU_X86_64)
if (stack != NULL)
if (stack != NULL && rm_is_altstack_in_use())
{
NV_KMEM_CACHE_FREE(stack, nvidia_stack_t_cache);
}
@ -1386,8 +1371,7 @@ typedef struct nv_dma_map_s {
* xen_swiotlb_map_sg_attrs may try to route to the SWIOTLB. We must only use
* single-page sg elements on Xen Server.
*/
#if defined(NV_SG_ALLOC_TABLE_FROM_PAGES_PRESENT) && \
!defined(NV_DOM0_KERNEL_PRESENT)
#if !defined(NV_DOM0_KERNEL_PRESENT)
#define NV_ALLOC_DMA_SUBMAP_SCATTERLIST(dm, sm, i) \
((sg_alloc_table_from_pages(&sm->sgt, \
&dm->pages[NV_DMA_SUBMAP_IDX_TO_PAGE_IDX(i)], \
@ -1667,6 +1651,27 @@ static inline nv_linux_file_private_t *nv_get_nvlfp_from_nvfp(nv_file_private_t
#define NV_STATE_PTR(nvl) &(((nv_linux_state_t *)(nvl))->nv_state)
static inline nvidia_stack_t *nv_nvlfp_get_sp(nv_linux_file_private_t *nvlfp, nvidia_entry_point_index_t which)
{
#if defined(NVCPU_X86_64)
if (rm_is_altstack_in_use())
{
down(&nvlfp->fops_sp_lock[which]);
return nvlfp->fops_sp[which];
}
#endif
return NULL;
}
static inline void nv_nvlfp_put_sp(nv_linux_file_private_t *nvlfp, nvidia_entry_point_index_t which)
{
#if defined(NVCPU_X86_64)
if (rm_is_altstack_in_use())
{
up(&nvlfp->fops_sp_lock[which]);
}
#endif
}
#define NV_ATOMIC_READ(data) atomic_read(&(data))
#define NV_ATOMIC_SET(data,val) atomic_set(&(data), (val))
@ -1895,20 +1900,12 @@ static inline NvU32 nv_default_irq_flags(nv_state_t *nv)
#define NV_GET_UNUSED_FD_FLAGS(flags) (-1)
#endif
#if defined(NV_SET_CLOSE_ON_EXEC_PRESENT)
#define NV_SET_CLOSE_ON_EXEC(fd, fdt) __set_close_on_exec(fd, fdt)
#elif defined(NV_LINUX_TIME_H_PRESENT) && defined(FD_SET)
#define NV_SET_CLOSE_ON_EXEC(fd, fdt) FD_SET(fd, fdt->close_on_exec)
#else
#define NV_SET_CLOSE_ON_EXEC(fd, fdt) __set_bit(fd, fdt->close_on_exec)
#endif
#define MODULE_BASE_NAME "nvidia"
#define MODULE_INSTANCE_NUMBER 0
#define MODULE_INSTANCE_STRING ""
#define MODULE_NAME MODULE_BASE_NAME MODULE_INSTANCE_STRING
NvS32 nv_request_soc_irq(nv_linux_state_t *, NvU32, nv_soc_irq_type_t, NvU32, NvU32);
NvS32 nv_request_soc_irq(nv_linux_state_t *, NvU32, nv_soc_irq_type_t, NvU32, NvU32, const char*);
static inline void nv_mutex_destroy(struct mutex *lock)
{

View File

@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2013-2020 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2013-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@ -73,21 +73,4 @@
}
#endif
#if defined(NV_HLIST_FOR_EACH_ENTRY_ARGUMENT_COUNT)
#if NV_HLIST_FOR_EACH_ENTRY_ARGUMENT_COUNT == 3
#define nv_hlist_for_each_entry(pos, head, member) \
hlist_for_each_entry(pos, head, member)
#else
#if !defined(hlist_entry_safe)
#define hlist_entry_safe(ptr, type, member) \
(ptr) ? hlist_entry(ptr, type, member) : NULL
#endif
#define nv_hlist_for_each_entry(pos, head, member) \
for (pos = hlist_entry_safe((head)->first, typeof(*(pos)), member); \
pos; \
pos = hlist_entry_safe((pos)->member.next, typeof(*(pos)), member))
#endif
#endif // NV_HLIST_FOR_EACH_ENTRY_ARGUMENT_COUNT
#endif // __NV_LIST_HELPERS_H__

View File

@ -29,6 +29,25 @@
typedef int vm_fault_t;
#endif
/* pin_user_pages
* Presence of pin_user_pages() also implies the presence of unpin-user_page().
* Both were added in the v5.6-rc1
*
* pin_user_pages() was added by commit eddb1c228f7951d399240
* ("mm/gup: introduce pin_user_pages*() and FOLL_PIN") in v5.6-rc1 (2020-01-30)
*
*/
#include <linux/mm.h>
#include <linux/sched.h>
#if defined(NV_PIN_USER_PAGES_PRESENT)
#define NV_PIN_USER_PAGES pin_user_pages
#define NV_UNPIN_USER_PAGE unpin_user_page
#else
#define NV_PIN_USER_PAGES NV_GET_USER_PAGES
#define NV_UNPIN_USER_PAGE put_page
#endif // NV_PIN_USER_PAGES_PRESENT
/* get_user_pages
*
* The 8-argument version of get_user_pages was deprecated by commit
@ -47,51 +66,57 @@ typedef int vm_fault_t;
*
*/
#if defined(NV_GET_USER_PAGES_HAS_ARGS_WRITE_FORCE)
#if defined(NV_GET_USER_PAGES_HAS_ARGS_FLAGS)
#define NV_GET_USER_PAGES get_user_pages
#elif defined(NV_GET_USER_PAGES_HAS_ARGS_TSK_WRITE_FORCE)
#define NV_GET_USER_PAGES(start, nr_pages, write, force, pages, vmas) \
get_user_pages(current, current->mm, start, nr_pages, write, force, pages, vmas)
#elif defined(NV_GET_USER_PAGES_HAS_ARGS_TSK_FLAGS)
#define NV_GET_USER_PAGES(start, nr_pages, flags, pages, vmas) \
get_user_pages(current, current->mm, start, nr_pages, flags, pages, vmas)
#else
#include <linux/mm.h>
#include <linux/sched.h>
static inline long NV_GET_USER_PAGES(unsigned long start,
unsigned long nr_pages,
int write,
int force,
unsigned int flags,
struct page **pages,
struct vm_area_struct **vmas)
{
unsigned int flags = 0;
int write = flags & FOLL_WRITE;
int force = flags & FOLL_FORCE;
if (write)
flags |= FOLL_WRITE;
if (force)
flags |= FOLL_FORCE;
#if defined(NV_GET_USER_PAGES_HAS_ARGS_TSK_FLAGS)
return get_user_pages(current, current->mm, start, nr_pages, flags,
pages, vmas);
#if defined(NV_GET_USER_PAGES_HAS_ARGS_WRITE_FORCE)
return get_user_pages(start, nr_pages, write, force, pages, vmas);
#else
// remaining defination(NV_GET_USER_PAGES_HAS_ARGS_FLAGS)
return get_user_pages(start, nr_pages, flags, pages, vmas);
#endif
// NV_GET_USER_PAGES_HAS_ARGS_TSK_WRITE_FORCE
return get_user_pages(current, current->mm, start, nr_pages, write,
force, pages, vmas);
#endif // NV_GET_USER_PAGES_HAS_ARGS_WRITE_FORCE
}
#endif
#endif // NV_GET_USER_PAGES_HAS_ARGS_FLAGS
/* pin_user_pages_remote
*
* pin_user_pages_remote() was added by commit eddb1c228f7951d399240
* ("mm/gup: introduce pin_user_pages*() and FOLL_PIN") in v5.6 (2020-01-30)
*
* pin_user_pages_remote() removed 'tsk' parameter by commit
* 64019a2e467a ("mm/gup: remove task_struct pointer for all gup code")
* in v5.9-rc1 (2020-08-11). *
*
*/
#if defined(NV_PIN_USER_PAGES_REMOTE_PRESENT)
#if defined (NV_PIN_USER_PAGES_REMOTE_HAS_ARGS_TSK)
#define NV_PIN_USER_PAGES_REMOTE(mm, start, nr_pages, flags, pages, vmas, locked) \
pin_user_pages_remote(NULL, mm, start, nr_pages, flags, pages, vmas, locked)
#else
#define NV_PIN_USER_PAGES_REMOTE pin_user_pages_remote
#endif // NV_PIN_USER_PAGES_REMOTE_HAS_ARGS_TSK
#else
#define NV_PIN_USER_PAGES_REMOTE NV_GET_USER_PAGES_REMOTE
#endif // NV_PIN_USER_PAGES_REMOTE_PRESENT
/*
* get_user_pages_remote() was added by commit 1e9877902dc7
* ("mm/gup: Introduce get_user_pages_remote()") in v4.6 (2016-02-12).
*
* The very next commit cde70140fed8 ("mm/gup: Overload get_user_pages()
* functions") deprecated the 8-argument version of get_user_pages for the
* non-remote case (calling get_user_pages with current and current->mm).
*
* The guidelines are: call NV_GET_USER_PAGES_REMOTE if you need the 8-argument
* version that uses something other than current and current->mm. Use
* NV_GET_USER_PAGES if you are refering to current and current->mm.
*
* Note that get_user_pages_remote() requires the caller to hold a reference on
* the task_struct (if non-NULL and if this API has tsk argument) and the mm_struct.
* This will always be true when using current and current->mm. If the kernel passes
@ -113,66 +138,55 @@ typedef int vm_fault_t;
*/
#if defined(NV_GET_USER_PAGES_REMOTE_PRESENT)
#if defined(NV_GET_USER_PAGES_REMOTE_HAS_ARGS_TSK_WRITE_FORCE)
#define NV_GET_USER_PAGES_REMOTE get_user_pages_remote
#if defined(NV_GET_USER_PAGES_REMOTE_HAS_ARGS_FLAGS_LOCKED)
#define NV_GET_USER_PAGES_REMOTE get_user_pages_remote
#elif defined(NV_GET_USER_PAGES_REMOTE_HAS_ARGS_TSK_FLAGS_LOCKED)
#define NV_GET_USER_PAGES_REMOTE(mm, start, nr_pages, flags, pages, vmas, locked) \
get_user_pages_remote(NULL, mm, start, nr_pages, flags, pages, vmas, locked)
#elif defined(NV_GET_USER_PAGES_REMOTE_HAS_ARGS_TSK_FLAGS)
#define NV_GET_USER_PAGES_REMOTE(mm, start, nr_pages, flags, pages, vmas, locked) \
get_user_pages_remote(NULL, mm, start, nr_pages, flags, pages, vmas)
#else
static inline long NV_GET_USER_PAGES_REMOTE(struct task_struct *tsk,
struct mm_struct *mm,
// NV_GET_USER_PAGES_REMOTE_HAS_ARGS_TSK_WRITE_FORCE
static inline long NV_GET_USER_PAGES_REMOTE(struct mm_struct *mm,
unsigned long start,
unsigned long nr_pages,
int write,
int force,
unsigned int flags,
struct page **pages,
struct vm_area_struct **vmas)
struct vm_area_struct **vmas,
int *locked)
{
unsigned int flags = 0;
int write = flags & FOLL_WRITE;
int force = flags & FOLL_FORCE;
if (write)
flags |= FOLL_WRITE;
if (force)
flags |= FOLL_FORCE;
#if defined(NV_GET_USER_PAGES_REMOTE_HAS_ARGS_TSK_FLAGS)
return get_user_pages_remote(tsk, mm, start, nr_pages, flags,
return get_user_pages_remote(NULL, mm, start, nr_pages, write, force,
pages, vmas);
#elif defined(NV_GET_USER_PAGES_REMOTE_HAS_ARGS_TSK_FLAGS_LOCKED)
return get_user_pages_remote(tsk, mm, start, nr_pages, flags,
pages, vmas, NULL);
#else
// remaining defined(NV_GET_USER_PAGES_REMOTE_HAS_ARGS_FLAGS_LOCKED)
return get_user_pages_remote(mm, start, nr_pages, flags,
pages, vmas, NULL);
#endif
}
#endif
#endif // NV_GET_USER_PAGES_REMOTE_HAS_ARGS_FLAGS_LOCKED
#else
#if defined(NV_GET_USER_PAGES_HAS_ARGS_TSK_WRITE_FORCE)
#define NV_GET_USER_PAGES_REMOTE get_user_pages
#else
#include <linux/mm.h>
#include <linux/sched.h>
static inline long NV_GET_USER_PAGES_REMOTE(struct task_struct *tsk,
struct mm_struct *mm,
static inline long NV_GET_USER_PAGES_REMOTE(struct mm_struct *mm,
unsigned long start,
unsigned long nr_pages,
int write,
int force,
unsigned int flags,
struct page **pages,
struct vm_area_struct **vmas)
struct vm_area_struct **vmas,
int *locked)
{
unsigned int flags = 0;
int write = flags & FOLL_WRITE;
int force = flags & FOLL_FORCE;
if (write)
flags |= FOLL_WRITE;
if (force)
flags |= FOLL_FORCE;
return get_user_pages(tsk, mm, start, nr_pages, flags, pages, vmas);
return get_user_pages(NULL, mm, start, nr_pages, write, force, pages, vmas);
}
#endif
#endif
#else
#define NV_GET_USER_PAGES_REMOTE(mm, start, nr_pages, flags, pages, vmas, locked) \
get_user_pages(NULL, mm, start, nr_pages, flags, pages, vmas)
#endif // NV_GET_USER_PAGES_HAS_ARGS_TSK_WRITE_FORCE
#endif // NV_GET_USER_PAGES_REMOTE_PRESENT
/*
* The .virtual_address field was effectively renamed to .address, by these

View File

@ -27,17 +27,6 @@
#include <linux/pci.h>
#include "nv-linux.h"
#if defined(NV_DEV_IS_PCI_PRESENT)
#define nv_dev_is_pci(dev) dev_is_pci(dev)
#else
/*
* Non-PCI devices are only supported on kernels which expose the
* dev_is_pci() function. For older kernels, we only support PCI
* devices, hence returning true to take all the PCI code paths.
*/
#define nv_dev_is_pci(dev) (true)
#endif
int nv_pci_register_driver(void);
void nv_pci_unregister_driver(void);
int nv_pci_count_devices(void);

View File

@ -78,13 +78,8 @@ static inline pgprot_t pgprot_modify_writecombine(pgprot_t old_prot)
#define NV_PGPROT_UNCACHED_DEVICE(old_prot) pgprot_noncached(old_prot)
#if defined(NVCPU_AARCH64)
#if defined(NV_MT_DEVICE_GRE_PRESENT)
#define NV_PROT_WRITE_COMBINED_DEVICE (PROT_DEFAULT | PTE_PXN | PTE_UXN | \
PTE_ATTRINDX(MT_DEVICE_GRE))
#else
#define NV_PROT_WRITE_COMBINED_DEVICE (PROT_DEFAULT | PTE_PXN | PTE_UXN | \
PTE_ATTRINDX(MT_DEVICE_nGnRE))
#endif
#define NV_PGPROT_WRITE_COMBINED_DEVICE(old_prot) \
__pgprot_modify(old_prot, PTE_ATTRINDX_MASK, NV_PROT_WRITE_COMBINED_DEVICE)
#define NV_PGPROT_WRITE_COMBINED(old_prot) NV_PGPROT_UNCACHED(old_prot)

View File

@ -74,21 +74,8 @@ typedef struct file_operations nv_proc_ops_t;
__entry; \
})
/*
* proc_mkdir_mode exists in Linux 2.6.9, but isn't exported until Linux 3.0.
* Use the older interface instead unless the newer interface is necessary.
*/
#if defined(NV_PROC_REMOVE_PRESENT)
# define NV_PROC_MKDIR_MODE(name, mode, parent) \
proc_mkdir_mode(name, mode, parent)
#else
# define NV_PROC_MKDIR_MODE(name, mode, parent) \
({ \
struct proc_dir_entry *__entry; \
__entry = create_proc_entry(name, mode, parent); \
__entry; \
})
#endif
#define NV_CREATE_PROC_DIR(name,parent) \
({ \
@ -104,16 +91,6 @@ typedef struct file_operations nv_proc_ops_t;
#define NV_PDE_DATA(inode) PDE_DATA(inode)
#endif
#if defined(NV_PROC_REMOVE_PRESENT)
# define NV_REMOVE_PROC_ENTRY(entry) \
proc_remove(entry);
#else
# define NV_REMOVE_PROC_ENTRY(entry) \
remove_proc_entry(entry->name, entry->parent);
#endif
void nv_procfs_unregister_all(struct proc_dir_entry *entry,
struct proc_dir_entry *delimiter);
#define NV_DEFINE_SINGLE_PROCFS_FILE_HELPER(name, lock) \
static int nv_procfs_open_##name( \
struct inode *inode, \

View File

@ -54,8 +54,6 @@ void nv_free_contig_pages (nv_alloc_t *);
NV_STATUS nv_alloc_system_pages (nv_state_t *, nv_alloc_t *);
void nv_free_system_pages (nv_alloc_t *);
void nv_address_space_init_once (struct address_space *mapping);
int nv_uvm_init (void);
void nv_uvm_exit (void);
NV_STATUS nv_uvm_suspend (void);

View File

@ -40,6 +40,7 @@
#include <nvstatus.h>
#include "nv_stdarg.h"
#include <nv-caps.h>
#include <nv-firmware.h>
#include <nv-ioctl.h>
#include <nvmisc.h>
@ -160,8 +161,14 @@ typedef enum _TEGRASOC_WHICH_CLK
TEGRASOC_WHICH_CLK_MAUD,
TEGRASOC_WHICH_CLK_AZA_2XBIT,
TEGRASOC_WHICH_CLK_AZA_BIT,
TEGRA234_CLK_MIPI_CAL,
TEGRA234_CLK_UART_FST_MIPI_CAL,
TEGRASOC_WHICH_CLK_MIPI_CAL,
TEGRASOC_WHICH_CLK_UART_FST_MIPI_CAL,
TEGRASOC_WHICH_CLK_SOR0_DIV,
TEGRASOC_WHICH_CLK_DISP_ROOT,
TEGRASOC_WHICH_CLK_HUB_ROOT,
TEGRASOC_WHICH_CLK_PLLA_DISP,
TEGRASOC_WHICH_CLK_PLLA_DISPHUB,
TEGRASOC_WHICH_CLK_PLLA,
TEGRASOC_WHICH_CLK_MAX, // TEGRASOC_WHICH_CLK_MAX is defined for boundary checks only.
} TEGRASOC_WHICH_CLK;
@ -304,7 +311,7 @@ typedef struct nv_alloc_mapping_context_s {
typedef enum
{
NV_SOC_IRQ_DISPLAY_TYPE,
NV_SOC_IRQ_DISPLAY_TYPE = 0x1,
NV_SOC_IRQ_DPAUX_TYPE,
NV_SOC_IRQ_GPIO_TYPE,
NV_SOC_IRQ_HDACODEC_TYPE,
@ -368,6 +375,7 @@ typedef struct nv_state_t
nv_aperture_t *mipical_regs;
nv_aperture_t *fb, ud;
nv_aperture_t *simregs;
nv_aperture_t *emc_regs;
NvU32 num_dpaux_instance;
NvU32 interrupt_line;
@ -430,9 +438,6 @@ typedef struct nv_state_t
/* Variable to force allocation of 32-bit addressable memory */
NvBool force_dma32_alloc;
/* Variable to track if device has entered dynamic power state */
NvBool dynamic_power_entered;
/* PCI power state should be D0 during system suspend */
NvBool d0_state_in_suspend;
@ -465,6 +470,9 @@ typedef struct nv_state_t
/* Check if NVPCF DSM function is implemented under NVPCF or GPU device scope */
NvBool nvpcf_dsm_in_gpu_scope;
/* Bool to check if the device received a shutdown notification */
NvBool is_shutdown;
} nv_state_t;
// These define need to be in sync with defines in system.h
@ -473,6 +481,10 @@ typedef struct nv_state_t
#define OS_TYPE_SUNOS 0x3
#define OS_TYPE_VMWARE 0x4
#define NVFP_TYPE_NONE 0x0
#define NVFP_TYPE_REFCOUNTED 0x1
#define NVFP_TYPE_REGISTERED 0x2
struct nv_file_private_t
{
NvHandle *handles;
@ -482,6 +494,7 @@ struct nv_file_private_t
nv_file_private_t *ctl_nvfp;
void *ctl_nvfp_priv;
NvU32 register_or_refcount;
};
// Forward define the gpu ops structures
@ -513,8 +526,9 @@ typedef struct UvmGpuChannelResourceBindParams_tag *nvgpuChannelResourceBindPar
typedef struct UvmGpuPagingChannelAllocParams_tag nvgpuPagingChannelAllocParams_t;
typedef struct UvmGpuPagingChannel_tag *nvgpuPagingChannelHandle_t;
typedef struct UvmGpuPagingChannelInfo_tag *nvgpuPagingChannelInfo_t;
typedef NV_STATUS (*nvPmaEvictPagesCallback)(void *, NvU32, NvU64 *, NvU32, NvU64, NvU64);
typedef NV_STATUS (*nvPmaEvictRangeCallback)(void *, NvU64, NvU64);
typedef enum UvmPmaGpuMemoryType_tag nvgpuGpuMemoryType_t;
typedef NV_STATUS (*nvPmaEvictPagesCallback)(void *, NvU32, NvU64 *, NvU32, NvU64, NvU64, nvgpuGpuMemoryType_t);
typedef NV_STATUS (*nvPmaEvictRangeCallback)(void *, NvU64, NvU64, nvgpuGpuMemoryType_t);
/*
* flags
@ -566,12 +580,6 @@ typedef enum
NV_POWER_STATE_RUNNING
} nv_power_state_t;
typedef enum
{
NV_FIRMWARE_GSP,
NV_FIRMWARE_GSP_LOG
} nv_firmware_t;
#define NV_PRIMARY_VGA(nv) ((nv)->primary_vga)
#define NV_IS_CTL_DEVICE(nv) ((nv)->flags & NV_FLAG_CONTROL)
@ -587,12 +595,6 @@ typedef enum
#define NV_SOC_IS_ISO_IOMMU_PRESENT(nv) \
((nv)->iso_iommu_present)
/*
* NVIDIA ACPI event ID to be passed into the core NVIDIA driver for
* AC/DC event.
*/
#define NV_SYSTEM_ACPI_BATTERY_POWER_EVENT 0x8002
/*
* GPU add/remove events
*/
@ -604,8 +606,6 @@ typedef enum
* to core NVIDIA driver for ACPI events.
*/
#define NV_SYSTEM_ACPI_EVENT_VALUE_DISPLAY_SWITCH_DEFAULT 0
#define NV_SYSTEM_ACPI_EVENT_VALUE_POWER_EVENT_AC 0
#define NV_SYSTEM_ACPI_EVENT_VALUE_POWER_EVENT_BATTERY 1
#define NV_SYSTEM_ACPI_EVENT_VALUE_DOCK_EVENT_UNDOCKED 0
#define NV_SYSTEM_ACPI_EVENT_VALUE_DOCK_EVENT_DOCKED 1
@ -616,14 +616,18 @@ typedef enum
#define NV_EVAL_ACPI_METHOD_NVIF 0x01
#define NV_EVAL_ACPI_METHOD_WMMX 0x02
#define NV_I2C_CMD_READ 1
#define NV_I2C_CMD_WRITE 2
#define NV_I2C_CMD_SMBUS_READ 3
#define NV_I2C_CMD_SMBUS_WRITE 4
#define NV_I2C_CMD_SMBUS_QUICK_WRITE 5
#define NV_I2C_CMD_SMBUS_QUICK_READ 6
#define NV_I2C_CMD_SMBUS_BLOCK_READ 7
#define NV_I2C_CMD_SMBUS_BLOCK_WRITE 8
typedef enum {
NV_I2C_CMD_READ = 1,
NV_I2C_CMD_WRITE,
NV_I2C_CMD_SMBUS_READ,
NV_I2C_CMD_SMBUS_WRITE,
NV_I2C_CMD_SMBUS_QUICK_WRITE,
NV_I2C_CMD_SMBUS_QUICK_READ,
NV_I2C_CMD_SMBUS_BLOCK_READ,
NV_I2C_CMD_SMBUS_BLOCK_WRITE,
NV_I2C_CMD_BLOCK_READ,
NV_I2C_CMD_BLOCK_WRITE
} nv_i2c_cmd_t;
// Flags needed by OSAllocPagesNode
#define NV_ALLOC_PAGES_NODE_NONE 0x0
@ -636,27 +640,33 @@ typedef enum
#define NV_GET_NV_STATE(pGpu) \
(nv_state_t *)((pGpu) ? (pGpu)->pOsGpuInfo : NULL)
#define IS_REG_OFFSET(nv, offset, length) \
(((offset) >= (nv)->regs->cpu_address) && \
(((offset) + ((length)-1)) <= \
(nv)->regs->cpu_address + ((nv)->regs->size-1)))
static inline NvBool IS_REG_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length)
{
return ((offset >= nv->regs->cpu_address) &&
((offset + (length - 1)) <= (nv->regs->cpu_address + (nv->regs->size - 1))));
}
#define IS_FB_OFFSET(nv, offset, length) \
(((nv)->fb) && ((offset) >= (nv)->fb->cpu_address) && \
(((offset) + ((length)-1)) <= (nv)->fb->cpu_address + ((nv)->fb->size-1)))
static inline NvBool IS_FB_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length)
{
return ((nv->fb) && (offset >= nv->fb->cpu_address) &&
((offset + (length - 1)) <= (nv->fb->cpu_address + (nv->fb->size - 1))));
}
#define IS_UD_OFFSET(nv, offset, length) \
(((nv)->ud.cpu_address != 0) && ((nv)->ud.size != 0) && \
((offset) >= (nv)->ud.cpu_address) && \
(((offset) + ((length)-1)) <= (nv)->ud.cpu_address + ((nv)->ud.size-1)))
static inline NvBool IS_UD_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length)
{
return ((nv->ud.cpu_address != 0) && (nv->ud.size != 0) &&
(offset >= nv->ud.cpu_address) &&
((offset + (length - 1)) <= (nv->ud.cpu_address + (nv->ud.size - 1))));
}
#define IS_IMEM_OFFSET(nv, offset, length) \
(((nv)->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address != 0) && \
((nv)->bars[NV_GPU_BAR_INDEX_IMEM].size != 0) && \
((offset) >= (nv)->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address) && \
(((offset) + ((length) - 1)) <= \
(nv)->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address + \
((nv)->bars[NV_GPU_BAR_INDEX_IMEM].size - 1)))
static inline NvBool IS_IMEM_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length)
{
return ((nv->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address != 0) &&
(nv->bars[NV_GPU_BAR_INDEX_IMEM].size != 0) &&
(offset >= nv->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address) &&
((offset + (length - 1)) <= (nv->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address +
(nv->bars[NV_GPU_BAR_INDEX_IMEM].size - 1))));
}
#define NV_RM_MAX_MSIX_LINES 8
@ -787,7 +797,7 @@ NV_STATUS NV_API_CALL nv_pci_trigger_recovery (nv_state_t *);
NvBool NV_API_CALL nv_requires_dma_remap (nv_state_t *);
NvBool NV_API_CALL nv_is_rm_firmware_active(nv_state_t *);
const void*NV_API_CALL nv_get_firmware(nv_state_t *, nv_firmware_t, const void **, NvU32 *);
const void*NV_API_CALL nv_get_firmware(nv_state_t *, nv_firmware_type_t, nv_firmware_chip_family_t, const void **, NvU32 *);
void NV_API_CALL nv_put_firmware(const void *);
nv_file_private_t* NV_API_CALL nv_get_file_private(NvS32, NvBool, void **);
@ -828,6 +838,7 @@ NV_STATUS NV_API_CALL nv_acquire_fabric_mgmt_cap (int, int*);
int NV_API_CALL nv_cap_drv_init(void);
void NV_API_CALL nv_cap_drv_exit(void);
NvBool NV_API_CALL nv_is_gpu_accessible(nv_state_t *);
NvBool NV_API_CALL nv_match_gpu_os_info(nv_state_t *, void *);
NvU32 NV_API_CALL nv_get_os_type(void);
@ -916,11 +927,11 @@ NvBool NV_API_CALL rm_is_supported_pci_device(NvU8 pci_class,
void NV_API_CALL rm_i2c_remove_adapters (nvidia_stack_t *, nv_state_t *);
NvBool NV_API_CALL rm_i2c_is_smbus_capable (nvidia_stack_t *, nv_state_t *, void *);
NV_STATUS NV_API_CALL rm_i2c_transfer (nvidia_stack_t *, nv_state_t *, void *, NvU8, NvU8, NvU8, NvU32, NvU8 *);
NV_STATUS NV_API_CALL rm_i2c_transfer (nvidia_stack_t *, nv_state_t *, void *, nv_i2c_cmd_t, NvU8, NvU8, NvU32, NvU8 *);
NV_STATUS NV_API_CALL rm_perform_version_check (nvidia_stack_t *, void *, NvU32);
NV_STATUS NV_API_CALL rm_system_event (nvidia_stack_t *, NvU32, NvU32);
void NV_API_CALL rm_power_source_change_event (nvidia_stack_t *, NvU32);
void NV_API_CALL rm_disable_gpu_state_persistence (nvidia_stack_t *sp, nv_state_t *);
NV_STATUS NV_API_CALL rm_p2p_init_mapping (nvidia_stack_t *, NvU64, NvU64 *, NvU64 *, NvU64 *, NvU64 *, NvU64, NvU64, NvU64, NvU64, void (*)(void *), void *);
@ -944,6 +955,7 @@ void NV_API_CALL rm_kernel_rmapi_op(nvidia_stack_t *sp, void *ops_cmd);
NvBool NV_API_CALL rm_get_device_remove_flag(nvidia_stack_t *sp, NvU32 gpu_id);
NV_STATUS NV_API_CALL rm_gpu_copy_mmu_faults(nvidia_stack_t *, nv_state_t *, NvU32 *);
NV_STATUS NV_API_CALL rm_gpu_copy_mmu_faults_unlocked(nvidia_stack_t *, nv_state_t *, NvU32 *);
NV_STATUS NV_API_CALL rm_gpu_handle_mmu_faults(nvidia_stack_t *, nv_state_t *, NvU32 *);
NvBool NV_API_CALL rm_gpu_need_4k_page_isolation(nv_state_t *);
NvBool NV_API_CALL rm_is_chipset_io_coherent(nv_stack_t *);
NvBool NV_API_CALL rm_init_event_locks(nvidia_stack_t *, nv_state_t *);
@ -969,12 +981,13 @@ const char* NV_API_CALL rm_get_dynamic_power_management_status(nvidia_stack_t *,
const char* NV_API_CALL rm_get_gpu_gcx_support(nvidia_stack_t *, nv_state_t *, NvBool);
void NV_API_CALL rm_acpi_notify(nvidia_stack_t *, nv_state_t *, NvU32);
NV_STATUS NV_API_CALL rm_get_clientnvpcf_power_limits(nvidia_stack_t *, nv_state_t *, NvU32 *, NvU32 *);
NvBool NV_API_CALL rm_is_altstack_in_use(void);
/* vGPU VFIO specific functions */
NV_STATUS NV_API_CALL nv_vgpu_create_request(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU32, NvU16 *, NvU32, NvBool *);
NV_STATUS NV_API_CALL nv_vgpu_delete(nvidia_stack_t *, const NvU8 *, NvU16);
NV_STATUS NV_API_CALL nv_vgpu_get_type_ids(nvidia_stack_t *, nv_state_t *, NvU32 *, NvU32 **, NvBool);
NV_STATUS NV_API_CALL nv_vgpu_get_type_ids(nvidia_stack_t *, nv_state_t *, NvU32 *, NvU32 *, NvBool, NvU8, NvBool);
NV_STATUS NV_API_CALL nv_vgpu_get_type_info(nvidia_stack_t *, nv_state_t *, NvU32, char *, int, NvU8);
NV_STATUS NV_API_CALL nv_vgpu_get_bar_info(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU64 *, NvU32, void *);
NV_STATUS NV_API_CALL nv_vgpu_start(nvidia_stack_t *, const NvU8 *, void *, NvS32 *, NvU8 *, NvU32);
@ -987,6 +1000,10 @@ NV_STATUS NV_API_CALL nv_get_usermap_access_params(nv_state_t*, nv_usermap_acces
nv_soc_irq_type_t NV_API_CALL nv_get_current_irq_type(nv_state_t*);
void NV_API_CALL nv_flush_coherent_cpu_cache_range(nv_state_t *nv, NvU64 cpu_virtual, NvU64 size);
#if defined(NV_VMWARE)
const void* NV_API_CALL rm_get_firmware(nv_firmware_type_t fw_type, const void **fw_buf, NvU32 *fw_size);
#endif
/* Callbacks should occur roughly every 10ms. */
#define NV_SNAPSHOT_TIMER_HZ 100
void NV_API_CALL nv_start_snapshot_timer(void (*snapshot_callback)(void *context));
@ -998,6 +1015,16 @@ static inline const NvU8 *nv_get_cached_uuid(nv_state_t *nv)
return nv->nv_uuid_cache.valid ? nv->nv_uuid_cache.uuid : NULL;
}
/* nano second resolution timer callback structure */
typedef struct nv_nano_timer nv_nano_timer_t;
/* nano timer functions */
void NV_API_CALL nv_create_nano_timer(nv_state_t *, void *pTmrEvent, nv_nano_timer_t **);
void NV_API_CALL nv_start_nano_timer(nv_state_t *nv, nv_nano_timer_t *, NvU64 timens);
NV_STATUS NV_API_CALL rm_run_nano_timer_callback(nvidia_stack_t *, nv_state_t *, void *pTmrEvent);
void NV_API_CALL nv_cancel_nano_timer(nv_state_t *, nv_nano_timer_t *);
void NV_API_CALL nv_destroy_nano_timer(nv_state_t *nv, nv_nano_timer_t *);
#if defined(NVCPU_X86_64)
static inline NvU64 nv_rdtsc(void)

View File

@ -331,10 +331,14 @@ typedef NV_STATUS (*uvmPmaEvictPagesCallback)(void *callbackData,
NvU64 *pPages,
NvU32 count,
NvU64 physBegin,
NvU64 physEnd);
NvU64 physEnd,
UVM_PMA_GPU_MEMORY_TYPE mem_type);
// Mirrors pmaEvictRangeCb_t, see its documentation in pma.h.
typedef NV_STATUS (*uvmPmaEvictRangeCallback)(void *callbackData, NvU64 physBegin, NvU64 physEnd);
typedef NV_STATUS (*uvmPmaEvictRangeCallback)(void *callbackData,
NvU64 physBegin,
NvU64 physEnd,
UVM_PMA_GPU_MEMORY_TYPE mem_type);
/*******************************************************************************
nvUvmInterfacePmaRegisterEvictionCallbacks

View File

@ -897,6 +897,16 @@ typedef struct UvmGpuAccessCntrConfig_tag
NvU32 threshold;
} UvmGpuAccessCntrConfig;
//
// When modifying this enum, make sure they are compatible with the mirrored
// MEMORY_PROTECTION enum in phys_mem_allocator.h.
//
typedef enum UvmPmaGpuMemoryType_tag
{
UVM_PMA_GPU_MEMORY_TYPE_UNPROTECTED = 0,
UVM_PMA_GPU_MEMORY_TYPE_PROTECTED = 1
} UVM_PMA_GPU_MEMORY_TYPE;
typedef UvmGpuChannelInfo gpuChannelInfo;
typedef UvmGpuChannelAllocParams gpuChannelAllocParams;
typedef UvmGpuCaps gpuCaps;

View File

@ -150,9 +150,7 @@ typedef struct NvSyncPointFenceRec {
|* *|
\***************************************************************************/
#if !defined(XAPIGEN) /* NvOffset is XAPIGEN builtin type, so skip typedef */
typedef NvU64 NvOffset; /* GPU address */
#endif
#define NvOffset_HI32(n) ((NvU32)(((NvU64)(n)) >> 32))
#define NvOffset_LO32(n) ((NvU32)((NvU64)(n)))

View File

@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2014-2015 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2014-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@ -29,6 +29,7 @@
#include <nvlimits.h>
#define NVKMS_MAX_SUBDEVICES NV_MAX_SUBDEVICES
#define NVKMS_MAX_HEADS_PER_DISP NV_MAX_HEADS
#define NVKMS_LEFT 0
#define NVKMS_RIGHT 1
@ -530,4 +531,78 @@ typedef struct {
NvBool noncoherent;
} NvKmsDispIOCoherencyModes;
enum NvKmsInputColorSpace {
/* Unknown colorspace; no de-gamma will be applied */
NVKMS_INPUT_COLORSPACE_NONE = 0,
/* Linear, Rec.709 [-0.5, 7.5) */
NVKMS_INPUT_COLORSPACE_SCRGB_LINEAR = 1,
/* PQ, Rec.2020 unity */
NVKMS_INPUT_COLORSPACE_BT2100_PQ = 2,
};
enum NvKmsOutputTf {
/*
* NVKMS itself won't apply any OETF (clients are still
* free to provide a custom OLUT)
*/
NVKMS_OUTPUT_TF_NONE = 0,
NVKMS_OUTPUT_TF_TRADITIONAL_GAMMA_SDR = 1,
NVKMS_OUTPUT_TF_PQ = 2,
};
/*!
* HDR Static Metadata Type1 Descriptor as per CEA-861.3 spec.
* This is expected to match exactly with the spec.
*/
struct NvKmsHDRStaticMetadata {
/*!
* Color primaries of the data.
* These are coded as unsigned 16-bit values in units of 0.00002,
* where 0x0000 represents zero and 0xC350 represents 1.0000.
*/
struct {
NvU16 x, y;
} displayPrimaries[3];
/*!
* White point of colorspace data.
* These are coded as unsigned 16-bit values in units of 0.00002,
* where 0x0000 represents zero and 0xC350 represents 1.0000.
*/
struct {
NvU16 x, y;
} whitePoint;
/**
* Maximum mastering display luminance.
* This value is coded as an unsigned 16-bit value in units of 1 cd/m2,
* where 0x0001 represents 1 cd/m2 and 0xFFFF represents 65535 cd/m2.
*/
NvU16 maxDisplayMasteringLuminance;
/*!
* Minimum mastering display luminance.
* This value is coded as an unsigned 16-bit value in units of
* 0.0001 cd/m2, where 0x0001 represents 0.0001 cd/m2 and 0xFFFF
* represents 6.5535 cd/m2.
*/
NvU16 minDisplayMasteringLuminance;
/*!
* Maximum content light level.
* This value is coded as an unsigned 16-bit value in units of 1 cd/m2,
* where 0x0001 represents 1 cd/m2 and 0xFFFF represents 65535 cd/m2.
*/
NvU16 maxCLL;
/*!
* Maximum frame-average light level.
* This value is coded as an unsigned 16-bit value in units of 1 cd/m2,
* where 0x0001 represents 1 cd/m2 and 0xFFFF represents 65535 cd/m2.
*/
NvU16 maxFALL;
};
#endif /* NVKMS_API_TYPES_H */

View File

@ -86,8 +86,9 @@ enum NvKmsSurfaceMemoryFormat {
NvKmsSurfaceMemoryFormatY12___V12U12_N420 = 32,
NvKmsSurfaceMemoryFormatY8___U8___V8_N444 = 33,
NvKmsSurfaceMemoryFormatY8___U8___V8_N420 = 34,
NvKmsSurfaceMemoryFormatRF16GF16BF16XF16 = 35,
NvKmsSurfaceMemoryFormatMin = NvKmsSurfaceMemoryFormatI8,
NvKmsSurfaceMemoryFormatMax = NvKmsSurfaceMemoryFormatY8___U8___V8_N420,
NvKmsSurfaceMemoryFormatMax = NvKmsSurfaceMemoryFormatRF16GF16BF16XF16,
};
typedef struct NvKmsSurfaceMemoryFormatInfo {

View File

@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2015 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2015-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@ -149,6 +149,7 @@ struct NvKmsKapiDeviceResourcesInfo {
} caps;
NvU64 supportedSurfaceMemoryFormats[NVKMS_KAPI_LAYER_MAX];
NvBool supportsHDR[NVKMS_KAPI_LAYER_MAX];
};
#define NVKMS_KAPI_LAYER_MASK(layerType) (1 << (layerType))
@ -218,6 +219,11 @@ struct NvKmsKapiLayerConfig {
struct NvKmsRRParams rrParams;
struct NvKmsKapiSyncpt syncptParams;
struct NvKmsHDRStaticMetadata hdrMetadata;
NvBool hdrMetadataSpecified;
enum NvKmsOutputTf tf;
NvU8 minPresentInterval;
NvBool tearing;
@ -226,6 +232,8 @@ struct NvKmsKapiLayerConfig {
NvS16 dstX, dstY;
NvU16 dstWidth, dstHeight;
enum NvKmsInputColorSpace inputColorSpace;
};
struct NvKmsKapiLayerRequestedConfig {
@ -277,6 +285,8 @@ struct NvKmsKapiHeadModeSetConfig {
NvKmsKapiDisplay displays[NVKMS_KAPI_MAX_CLONE_DISPLAYS];
struct NvKmsKapiDisplayMode mode;
NvBool vrrEnabled;
};
struct NvKmsKapiHeadRequestedConfig {
@ -368,6 +378,9 @@ struct NvKmsKapiDynamicDisplayParams {
/* [OUT] Connection status */
NvU32 connected;
/* [OUT] VRR status */
NvBool vrrSupported;
/* [IN/OUT] EDID of connected monitor/ Input to override EDID */
struct {
NvU16 bufferSize;
@ -484,6 +497,38 @@ struct NvKmsKapiFunctionsTable {
*/
void (*releaseOwnership)(struct NvKmsKapiDevice *device);
/*!
* Grant modeset permissions for a display to fd. Only one (dispIndex, head,
* display) is currently supported.
*
* \param [in] fd fd from opening /dev/nvidia-modeset.
*
* \param [in] device A device returned by allocateDevice().
*
* \param [in] head head of display.
*
* \param [in] display The display to grant.
*
* \return NV_TRUE on success, NV_FALSE on failure.
*/
NvBool (*grantPermissions)
(
NvS32 fd,
struct NvKmsKapiDevice *device,
NvU32 head,
NvKmsKapiDisplay display
);
/*!
* Revoke modeset permissions previously granted. This currently applies for all
* previous grant requests for this device.
*
* \param [in] device A device returned by allocateDevice().
*
* \return NV_TRUE on success, NV_FALSE on failure.
*/
NvBool (*revokePermissions)(struct NvKmsKapiDevice *device);
/*!
* Registers for notification, via
* NvKmsKapiAllocateDeviceParams::eventCallback, of the events specified

View File

@ -234,12 +234,14 @@ extern "C" {
#define DRF_EXTENT(drf) (drf##_HIGH_FIELD)
#define DRF_SHIFT(drf) ((drf##_LOW_FIELD) % 32U)
#define DRF_SHIFT_RT(drf) ((drf##_HIGH_FIELD) % 32U)
#define DRF_SIZE(drf) ((drf##_HIGH_FIELD)-(drf##_LOW_FIELD)+1U)
#define DRF_MASK(drf) (0xFFFFFFFFU >> (31U - ((drf##_HIGH_FIELD) % 32U) + ((drf##_LOW_FIELD) % 32U)))
#else
#define DRF_BASE(drf) (NV_FALSE?drf) // much better
#define DRF_EXTENT(drf) (NV_TRUE?drf) // much better
#define DRF_SHIFT(drf) (((NvU32)DRF_BASE(drf)) % 32U)
#define DRF_SHIFT_RT(drf) (((NvU32)DRF_EXTENT(drf)) % 32U)
#define DRF_SIZE(drf) (DRF_EXTENT(drf)-DRF_BASE(drf)+1U)
#define DRF_MASK(drf) (0xFFFFFFFFU>>(31U - DRF_SHIFT_RT(drf) + DRF_SHIFT(drf)))
#endif
#define DRF_DEF(d,r,f,c) (((NvU32)(NV ## d ## r ## f ## c))<<DRF_SHIFT(NV ## d ## r ## f))
@ -249,12 +251,12 @@ extern "C" {
#define DRF_EXTENT(drf) (1?drf) // much better
#define DRF_SHIFT(drf) ((DRF_ISBIT(0,drf)) % 32)
#define DRF_SHIFT_RT(drf) ((DRF_ISBIT(1,drf)) % 32)
#define DRF_SIZE(drf) (DRF_EXTENT(drf)-DRF_BASE(drf)+1U)
#define DRF_MASK(drf) (0xFFFFFFFFU>>(31-((DRF_ISBIT(1,drf)) % 32)+((DRF_ISBIT(0,drf)) % 32)))
#define DRF_DEF(d,r,f,c) ((NV ## d ## r ## f ## c)<<DRF_SHIFT(NV ## d ## r ## f))
#define DRF_NUM(d,r,f,n) (((n)&DRF_MASK(NV ## d ## r ## f))<<DRF_SHIFT(NV ## d ## r ## f))
#endif
#define DRF_SHIFTMASK(drf) (DRF_MASK(drf)<<(DRF_SHIFT(drf)))
#define DRF_SIZE(drf) (DRF_EXTENT(drf)-DRF_BASE(drf)+1U)
#define DRF_VAL(d,r,f,v) (((v)>>DRF_SHIFT(NV ## d ## r ## f))&DRF_MASK(NV ## d ## r ## f))
#endif
@ -907,6 +909,16 @@ static NV_FORCEINLINE void *NV_NVUPTR_TO_PTR(NvUPtr address)
return uAddr.p;
}
// Get bit at pos (k) from x
#define NV_BIT_GET(k, x) (((x) >> (k)) & 1)
// Get bit at pos (n) from (hi) if >= 64, otherwise from (lo). This is paired with NV_BIT_SET_128 which sets the bit.
#define NV_BIT_GET_128(n, lo, hi) (((n) < 64) ? NV_BIT_GET((n), (lo)) : NV_BIT_GET((n) - 64, (hi)))
//
// Set the bit at pos (b) for U64 which is < 128. Since the (b) can be >= 64, we need 2 U64 to store this.
// Use (lo) if (b) is less than 64, and (hi) if >= 64.
//
#define NV_BIT_SET_128(b, lo, hi) { nvAssert( (b) < 128 ); if ( (b) < 64 ) (lo) |= NVBIT64(b); else (hi) |= NVBIT64( b & 0x3F ); }
#ifdef __cplusplus
}
#endif //__cplusplus

View File

@ -24,11 +24,6 @@
#ifndef SDK_NVSTATUS_H
#define SDK_NVSTATUS_H
/* XAPIGEN - this file is not suitable for (nor needed by) xapigen. */
/* Rather than #ifdef out every such include in every sdk */
/* file, punt here. */
#if !defined(XAPIGEN) /* rest of file */
#ifdef __cplusplus
extern "C" {
#endif
@ -125,6 +120,4 @@ const char *nvstatusToString(NV_STATUS nvStatusIn);
}
#endif
#endif // XAPIGEN
#endif /* SDK_NVSTATUS_H */

View File

@ -24,11 +24,6 @@
#ifndef SDK_NVSTATUSCODES_H
#define SDK_NVSTATUSCODES_H
/* XAPIGEN - this file is not suitable for (nor needed by) xapigen. */
/* Rather than #ifdef out every such include in every sdk */
/* file, punt here. */
#if !defined(XAPIGEN) /* rest of file */
NV_STATUS_CODE(NV_OK, 0x00000000, "Success")
NV_STATUS_CODE(NV_ERR_GENERIC, 0x0000FFFF, "Failure: Generic Error")
@ -153,6 +148,7 @@ NV_STATUS_CODE(NV_ERR_NVLINK_CLOCK_ERROR, 0x00000076, "Nvlink Clock
NV_STATUS_CODE(NV_ERR_NVLINK_TRAINING_ERROR, 0x00000077, "Nvlink Training Error")
NV_STATUS_CODE(NV_ERR_NVLINK_CONFIGURATION_ERROR, 0x00000078, "Nvlink Configuration Error")
NV_STATUS_CODE(NV_ERR_RISCV_ERROR, 0x00000079, "Generic RISC-V assert or halt")
NV_STATUS_CODE(NV_ERR_FABRIC_MANAGER_NOT_PRESENT, 0x0000007A, "Fabric Manager is not loaded")
// Warnings:
NV_STATUS_CODE(NV_WARN_HOT_SWITCH, 0x00010001, "WARNING Hot switch")
@ -164,6 +160,4 @@ NV_STATUS_CODE(NV_WARN_NOTHING_TO_DO, 0x00010006, "WARNING Noth
NV_STATUS_CODE(NV_WARN_NULL_OBJECT, 0x00010007, "WARNING NULL object found")
NV_STATUS_CODE(NV_WARN_OUT_OF_RANGE, 0x00010008, "WARNING value out of range")
#endif // XAPIGEN
#endif /* SDK_NVSTATUSCODES_H */

View File

@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 1999-2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 1999-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@ -143,6 +143,14 @@ void NV_API_CALL os_free_semaphore (void *);
NV_STATUS NV_API_CALL os_acquire_semaphore (void *);
NV_STATUS NV_API_CALL os_cond_acquire_semaphore (void *);
NV_STATUS NV_API_CALL os_release_semaphore (void *);
void* NV_API_CALL os_alloc_rwlock (void);
void NV_API_CALL os_free_rwlock (void *);
NV_STATUS NV_API_CALL os_acquire_rwlock_read (void *);
NV_STATUS NV_API_CALL os_acquire_rwlock_write (void *);
NV_STATUS NV_API_CALL os_cond_acquire_rwlock_read (void *);
NV_STATUS NV_API_CALL os_cond_acquire_rwlock_write(void *);
void NV_API_CALL os_release_rwlock_read (void *);
void NV_API_CALL os_release_rwlock_write (void *);
NvBool NV_API_CALL os_semaphore_may_sleep (void);
NV_STATUS NV_API_CALL os_get_version_info (os_version_info*);
NvBool NV_API_CALL os_is_isr (void);

View File

@ -588,7 +588,9 @@ compile_test() {
# is present.
#
# Added by commit 3c299dc22635 ("PCI: add
# pci_get_domain_bus_and_slot function") in 2.6.33.
# pci_get_domain_bus_and_slot function") in 2.6.33 but aarch64
# support was added by commit d1e6dc91b532
# ("arm64: Add architectural support for PCI") in 3.18-rc1
#
CODE="
#include <linux/pci.h>
@ -649,7 +651,7 @@ compile_test() {
#include <linux/cpu.h>
void conftest_register_cpu_notifier(void) {
register_cpu_notifier();
}" > conftest$$.c
}"
compile_check_conftest "$CODE" "NV_REGISTER_CPU_NOTIFIER_PRESENT" "" "functions"
;;
@ -665,7 +667,7 @@ compile_test() {
#include <linux/cpu.h>
void conftest_cpuhp_setup_state(void) {
cpuhp_setup_state();
}" > conftest$$.c
}"
compile_check_conftest "$CODE" "NV_CPUHP_SETUP_STATE_PRESENT" "" "functions"
;;
@ -697,66 +699,6 @@ compile_test() {
compile_check_conftest "$CODE" "NV_IOREMAP_WC_PRESENT" "" "functions"
;;
file_operations)
# 'ioctl' field removed by commit b19dd42faf41
# ("bkl: Remove locked .ioctl file operation") in v2.6.36
CODE="
#include <linux/fs.h>
int conftest_file_operations(void) {
return offsetof(struct file_operations, ioctl);
}"
compile_check_conftest "$CODE" "NV_FILE_OPERATIONS_HAS_IOCTL" "" "types"
;;
sg_alloc_table)
#
# sg_alloc_table_from_pages added by commit efc42bc98058
# ("scatterlist: add sg_alloc_table_from_pages function") in v3.6
#
CODE="
#include <linux/scatterlist.h>
void conftest_sg_alloc_table_from_pages(void) {
sg_alloc_table_from_pages();
}"
compile_check_conftest "$CODE" "NV_SG_ALLOC_TABLE_FROM_PAGES_PRESENT" "" "functions"
;;
efi_enabled)
#
# Added in 2.6.12 as a variable
#
# Determine if the efi_enabled symbol is present (as a variable),
# or if the efi_enabled() function is present and how many
# arguments it takes.
#
# Converted from a variable to a function by commit 83e68189745a
# ("efi: Make 'efi_enabled' a function to query EFI facilities")
# in v3.8
#
echo "$CONFTEST_PREAMBLE
#if defined(NV_LINUX_EFI_H_PRESENT)
#include <linux/efi.h>
#endif
int conftest_efi_enabled(void) {
return efi_enabled(0);
}" > conftest$$.c
$CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
rm -f conftest$$.c
if [ -f conftest$$.o ]; then
echo "#define NV_EFI_ENABLED_PRESENT" | append_conftest "functions"
echo "#define NV_EFI_ENABLED_ARGUMENT_COUNT 1" | append_conftest "functions"
rm -f conftest$$.o
return
else
echo "#define NV_EFI_ENABLED_PRESENT" | append_conftest "symbols"
return
fi
;;
dom0_kernel_present)
# Add config parameter if running on DOM0.
if [ -n "$VGX_BUILD" ]; then
@ -967,6 +909,38 @@ compile_test() {
compile_check_conftest "$CODE" "NV_MDEV_GET_TYPE_GROUP_ID_PRESENT" "" "functions"
;;
vfio_device_mig_state)
#
# Determine if vfio_device_mig_state enum is present or not
#
# Added by commit 115dcec65f61d ("vfio: Define device
# migration protocol v2") in v5.18
#
CODE="
#include <linux/pci.h>
#include <linux/vfio.h>
enum vfio_device_mig_state device_state;
"
compile_check_conftest "$CODE" "NV_VFIO_DEVICE_MIG_STATE_PRESENT" "" "types"
;;
vfio_migration_ops)
#
# Determine if vfio_migration_ops struct is present or not
#
# Added by commit 6e97eba8ad874 ("vfio: Split migration ops
# from main device ops") in v6.0
#
CODE="
#include <linux/pci.h>
#include <linux/vfio.h>
struct vfio_migration_ops mig_ops;
"
compile_check_conftest "$CODE" "NV_VFIO_MIGRATION_OPS_PRESENT" "" "types"
;;
mdev_parent)
#
# Determine if the struct mdev_parent type is present.
@ -1051,6 +1025,40 @@ compile_test() {
compile_check_conftest "$CODE" "NV_MDEV_PARENT_OPS_HAS_OPEN_DEVICE" "" "types"
;;
mdev_parent_ops_has_device_driver)
#
# Determine if 'mdev_parent_ops' structure has 'device_driver' field.
#
# Added by commit 88a21f265ce5 ("vfio/mdev: Allow the mdev_parent_ops
# to specify the device driver to bind) in v5.14 (2021-06-17)
#
CODE="
#include <linux/pci.h>
#include <linux/mdev.h>
int conftest_mdev_parent_ops_has_device_driver(void) {
return offsetof(struct mdev_parent_ops, device_driver);
}"
compile_check_conftest "$CODE" "NV_MDEV_PARENT_OPS_HAS_DEVICE_DRIVER" "" "types"
;;
mdev_driver_has_supported_type_groups)
#
# Determine if 'mdev_driver' structure has 'supported_type_groups' field.
#
# Added by commit 6b42f491e17c ("vfio/mdev: Remove mdev_parent_ops)
# in v5.19 (2022-04-11)
#
CODE="
#include <linux/pci.h>
#include <linux/mdev.h>
int conftest_mdev_driver_has_supported_type_groups(void) {
return offsetof(struct mdev_driver, supported_type_groups);
}"
compile_check_conftest "$CODE" "NV_MDEV_DRIVER_HAS_SUPPORTED_TYPE_GROUPS" "" "types"
;;
pci_irq_vector_helpers)
#
# Determine if pci_alloc_irq_vectors(), pci_free_irq_vectors()
@ -1105,6 +1113,61 @@ compile_test() {
compile_check_conftest "$CODE" "NV_VFIO_DEVICE_MIGRATION_HAS_START_PFN" "" "types"
;;
vfio_uninit_group_dev)
#
# Determine if vfio_uninit_group_dev() function is present or not.
#
# Added by commit ae03c3771b8c (vfio: Introduce a vfio_uninit_group_dev()
# API call) in v5.15
#
CODE="
#include <linux/vfio.h>
void conftest_vfio_uninit_group_dev() {
vfio_uninit_group_dev();
}"
compile_check_conftest "$CODE" "NV_VFIO_UNINIT_GROUP_DEV_PRESENT" "" "functions"
;;
vfio_pci_core_available)
# Determine if VFIO_PCI_CORE is available
#
# Added by commit 7fa005caa35e ("vfio/pci: Introduce
# vfio_pci_core.ko") in v5.16 (2021-08-26)
#
CODE="
#if defined(NV_LINUX_VFIO_PCI_CORE_H_PRESENT)
#include <linux/vfio_pci_core.h>
#endif
#if !defined(CONFIG_VFIO_PCI_CORE) && !defined(CONFIG_VFIO_PCI_CORE_MODULE)
#error VFIO_PCI_CORE not enabled
#endif
void conftest_vfio_pci_core_available(void) {
struct vfio_pci_core_device dev;
}"
compile_check_conftest "$CODE" "NV_VFIO_PCI_CORE_PRESENT" "" "generic"
;;
vfio_register_emulated_iommu_dev)
#
# Determine if vfio_register_emulated_iommu_dev() function is present or not.
#
# Added by commit c68ea0d00ad8 (vfio: simplify iommu group allocation
# for mediated devices) in v5.16
#
CODE="
#include <linux/vfio.h>
void conftest_vfio_register_emulated_iommu_dev() {
vfio_register_emulated_iommu_dev();
}"
compile_check_conftest "$CODE" "NV_VFIO_REGISTER_EMULATED_IOMMU_DEV_PRESENT" "" "functions"
;;
drm_available)
# Determine if the DRM subsystem is usable
CODE="
@ -1192,22 +1255,6 @@ compile_test() {
compile_check_conftest "$CODE" "NV_GET_NUM_PHYSPAGES_PRESENT" "" "functions"
;;
proc_remove)
#
# Determine if the proc_remove() function is present.
#
# Added by commit a8ca16ea7b0a ("proc: Supply a function to
# remove a proc entry by PDE") in v3.10
#
CODE="
#include <linux/proc_fs.h>
void conftest_proc_remove(void) {
proc_remove();
}"
compile_check_conftest "$CODE" "NV_PROC_REMOVE_PRESENT" "" "functions"
;;
backing_dev_info)
#
# Determine if the 'address_space' structure has
@ -1225,77 +1272,6 @@ compile_test() {
compile_check_conftest "$CODE" "NV_ADDRESS_SPACE_HAS_BACKING_DEV_INFO" "" "types"
;;
address_space)
#
# Determine if the 'address_space' structure has
# a 'tree_lock' field of type rwlock_t.
#
# 'tree_lock' was changed to spinlock_t by commit 19fd6231279b
# ("mm: spinlock tree_lock") in v2.6.27
#
# It was removed altogether by commit b93b016313b3 ("page cache:
# use xa_lock") in v4.17
#
CODE="
#include <linux/fs.h>
int conftest_address_space(void) {
struct address_space as;
rwlock_init(&as.tree_lock);
return offsetof(struct address_space, tree_lock);
}"
compile_check_conftest "$CODE" "NV_ADDRESS_SPACE_HAS_RWLOCK_TREE_LOCK" "" "types"
;;
address_space_init_once)
#
# Determine if address_space_init_once is present.
#
# Added by commit 2aa15890f3c1 ("mm: prevent concurrent
# unmap_mapping_range() on the same inode") in v2.6.38
#
# If not present, it will be defined in uvm-linux.h.
#
CODE="
#include <linux/fs.h>
void conftest_address_space_init_once(void) {
address_space_init_once();
}"
compile_check_conftest "$CODE" "NV_ADDRESS_SPACE_INIT_ONCE_PRESENT" "" "functions"
;;
kuid_t)
#
# Determine if the 'kuid_t' type is present.
#
# Added by commit 7a4e7408c5ca ("userns: Add kuid_t and kgid_t
# and associated infrastructure in uidgid.h") in v3.5
#
CODE="
#include <linux/sched.h>
kuid_t conftest_kuid_t;
"
compile_check_conftest "$CODE" "NV_KUID_T_PRESENT" "" "types"
;;
pm_vt_switch_required)
#
# Determine if the pm_vt_switch_required() function is present.
#
# Added by commit f43f627d2f17 ("PM: make VT switching to the
# suspend console optional v3") in v3.10
#
CODE="
#include <linux/pm.h>
void conftest_pm_vt_switch_required(void) {
pm_vt_switch_required();
}"
compile_check_conftest "$CODE" "NV_PM_VT_SWITCH_REQUIRED_PRESENT" "" "functions"
;;
xen_ioemu_inject_msi)
# Determine if the xen_ioemu_inject_msi() function is present.
CODE="
@ -1473,39 +1449,6 @@ compile_test() {
compile_check_conftest "$CODE" "NV_NVHOST_DMA_FENCE_UNPACK_PRESENT" "" "functions"
;;
of_get_property)
#
# Determine if the of_get_property function is present.
#
# Support for kernels without CONFIG_OF defined added by commit
# 89272b8c0d42 ("dt: add empty of_get_property for non-dt") in v3.1
#
# Test if linux/of.h header file inclusion is successful or not and
# define/undefine NV_LINUX_OF_H_USABLE depending upon status of inclusion
#
echo "$CONFTEST_PREAMBLE
#include <linux/of.h>
" > conftest$$.c
$CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
rm -f conftest$$.c
if [ -f conftest$$.o ]; then
rm -f conftest$$.o
echo "#define NV_LINUX_OF_H_USABLE" | append_conftest "generic"
CODE="
#include <linux/of.h>
void conftest_of_get_property() {
of_get_property();
}"
compile_check_conftest "$CODE" "NV_OF_GET_PROPERTY_PRESENT" "" "functions"
else
echo "#undef NV_LINUX_OF_H_USABLE" | append_conftest "generic"
echo "#undef NV_OF_GET_PROPERTY_PRESENT" | append_conftest "functions"
fi
;;
of_find_node_by_phandle)
#
# Determine if the of_find_node_by_phandle function is present.
@ -1594,50 +1537,28 @@ compile_test() {
compile_check_conftest "$CODE" "NV_PNV_PCI_GET_NPU_DEV_PRESENT" "" "functions"
;;
kernel_write)
kernel_write_has_pointer_pos_arg)
#
# Determine if the function kernel_write() is present.
#
# First exported by commit 7bb307e894d5 ("export kernel_write(),
# convert open-coded instances") in v3.9
# Determine the pos argument type, which was changed by
# commit e13ec939e96b1 (fs: fix kernel_write prototype) on
# 9/1/2017.
#
echo "$CONFTEST_PREAMBLE
#include <linux/fs.h>
void conftest_kernel_write(void) {
kernel_write();
ssize_t kernel_write(struct file *file, const void *buf,
size_t count, loff_t *pos)
{
return 0;
}" > conftest$$.c;
$CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
$CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
rm -f conftest$$.c
if [ -f conftest$$.o ]; then
echo "#undef NV_KERNEL_WRITE_PRESENT" | append_conftest "function"
if [ -f conftest$$.o ]; then
echo "#define NV_KERNEL_WRITE_HAS_POINTER_POS_ARG" | append_conftest "function"
rm -f conftest$$.o
else
echo "#define NV_KERNEL_WRITE_PRESENT" | append_conftest "function"
#
# Determine the pos argument type, which was changed by
# commit e13ec939e96b1 (fs: fix kernel_write prototype) on
# 9/1/2017.
#
echo "$CONFTEST_PREAMBLE
#include <linux/fs.h>
ssize_t kernel_write(struct file *file, const void *buf,
size_t count, loff_t *pos)
{
return 0;
}" > conftest$$.c;
$CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
rm -f conftest$$.c
if [ -f conftest$$.o ]; then
echo "#define NV_KERNEL_WRITE_HAS_POINTER_POS_ARG" | append_conftest "function"
rm -f conftest$$.o
else
echo "#undef NV_KERNEL_WRITE_HAS_POINTER_POS_ARG" | append_conftest "function"
fi
echo "#undef NV_KERNEL_WRITE_HAS_POINTER_POS_ARG" | append_conftest "function"
fi
;;
@ -2004,6 +1925,7 @@ compile_test() {
}" > conftest$$.c
$CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
rm -f conftest$$.c
if [ -f conftest$$.o ]; then
rm -f conftest$$.o
@ -2013,7 +1935,7 @@ compile_test() {
else
echo "#undef NV_DRM_UNIVERSAL_PLANE_INIT_HAS_FORMAT_MODIFIERS_ARG" | append_conftest "types"
echo "$CONFTEST_PREAMBLE
CODE="
#if defined(NV_DRM_DRMP_H_PRESENT)
#include <drm/drmP.h>
#endif
@ -2033,35 +1955,10 @@ compile_test() {
0, /* unsigned int format_count */
DRM_PLANE_TYPE_PRIMARY,
NULL); /* const char *name */
}" > conftest$$.c
}"
$CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
if [ -f conftest$$.o ]; then
rm -f conftest$$.o
echo "#define NV_DRM_UNIVERSAL_PLANE_INIT_HAS_NAME_ARG" | append_conftest "types"
else
echo "#undef NV_DRM_UNIVERSAL_PLANE_INIT_HAS_NAME_ARG" | append_conftest "types"
fi
compile_check_conftest "$CODE" "NV_DRM_UNIVERSAL_PLANE_INIT_HAS_NAME_ARG" "" "types"
fi
;;
vzalloc)
#
# Determine if the vzalloc function is present
#
# Added by commit e1ca7788dec6 ("mm: add vzalloc() and
# vzalloc_node() helpers") in v2.6.37 (2010-10-26)
#
CODE="
#include <linux/vmalloc.h>
void conftest_vzalloc() {
vzalloc();
}"
compile_check_conftest "$CODE" "NV_VZALLOC_PRESENT" "" "functions"
;;
drm_driver_has_set_busid)
@ -2186,29 +2083,14 @@ compile_test() {
echo "#error wait_on_bit_lock() conftest failed!" | append_conftest "functions"
;;
bitmap_clear)
#
# Determine if the bitmap_clear function is present
#
# Added by commit c1a2a962a2ad ("bitmap: introduce bitmap_set,
# bitmap_clear, bitmap_find_next_zero_area") in v2.6.33
# (2009-12-15)
#
CODE="
#include <linux/bitmap.h>
void conftest_bitmap_clear() {
bitmap_clear();
}"
compile_check_conftest "$CODE" "NV_BITMAP_CLEAR_PRESENT" "" "functions"
;;
pci_stop_and_remove_bus_device)
#
# Determine if the pci_stop_and_remove_bus_device() function is present.
#
# Added by commit 210647af897a ("PCI: Rename pci_remove_bus_device
# to pci_stop_and_remove_bus_device") in v3.4 (2012-02-25)
# to pci_stop_and_remove_bus_device") in v3.4 (2012-02-25) but
# aarch64 support was added by commit d1e6dc91b532
# ("arm64: Add architectural support for PCI") in v3.18-rc1.
#
CODE="
#include <linux/types.h>
@ -2220,23 +2102,6 @@ compile_test() {
compile_check_conftest "$CODE" "NV_PCI_STOP_AND_REMOVE_BUS_DEVICE_PRESENT" "" "functions"
;;
pci_remove_bus_device)
#
# Determine if the pci_remove_bus_device() function is present.
# Added before Linux-2.6.12-rc2 2005-04-16
# Because we support builds on non-PCI platforms, we still need
# to check for this function's presence.
#
CODE="
#include <linux/types.h>
#include <linux/pci.h>
void conftest_pci_remove_bus_device() {
pci_remove_bus_device();
}"
compile_check_conftest "$CODE" "NV_PCI_REMOVE_BUS_DEVICE_PRESENT" "" "functions"
;;
drm_helper_mode_fill_fb_struct | drm_helper_mode_fill_fb_struct_has_const_mode_cmd_arg)
#
# Determine if the drm_helper_mode_fill_fb_struct function takes
@ -2334,23 +2199,6 @@ compile_test() {
compile_check_conftest "$CODE" "NV_PCI_DEV_HAS_ATS_ENABLED" "" "types"
;;
mt_device_gre)
#
# Determine if MT_DEVICE_GRE flag is present.
#
# MT_DEVICE_GRE flag is removed by commit 58cc6b72a21274
# ("arm64: mm: Remove unused support for Device-GRE memory type") in v5.14-rc1
# (2021-06-01).
#
CODE="
#include <asm/memory.h>
unsigned int conftest_mt_device_gre(void) {
return MT_DEVICE_GRE;
}"
compile_check_conftest "$CODE" "NV_MT_DEVICE_GRE_PRESENT" "" "types"
;;
get_user_pages)
#
# Conftest for get_user_pages()
@ -2668,20 +2516,146 @@ compile_test() {
fi
;;
usleep_range)
pin_user_pages)
#
# Determine if the function usleep_range() is present.
# Determine if the function pin_user_pages() is present.
# Presence of pin_user_pages() also implies the presence of
# unpin-user_page(). Both were added in the v5.6-rc1
#
# Added by commit 5e7f5a178bba ("timer: Added usleep_range timer")
# in v2.6.36 (2010-08-04)
# pin_user_pages() was added by commit eddb1c228f7951d399240
# ("mm/gup: introduce pin_user_pages*() and FOLL_PIN") in
# v5.6-rc1 (2020-01-30)
# conftest #1: check if pin_user_pages() is available
# return if not available.
#
CODE="
#include <linux/delay.h>
void conftest_usleep_range(void) {
usleep_range();
#include <linux/mm.h>
void conftest_pin_user_pages(void) {
pin_user_pages();
}"
compile_check_conftest "$CODE" "NV_USLEEP_RANGE_PRESENT" "" "functions"
compile_check_conftest "$CODE" "NV_PIN_USER_PAGES_PRESENT" "" "functions"
;;
pin_user_pages_remote)
# Determine if the function pin_user_pages_remote() is present
#
# pin_user_pages_remote() was added by commit eddb1c228f7951d399240
# ("mm/gup: introduce pin_user_pages*() and FOLL_PIN")
# in v5.6 (2020-01-30)
# pin_user_pages_remote() removed 'tsk' parameter by
# commit 64019a2e467a ("mm/gup: remove task_struct pointer for
# all gup code") in v5.9-rc1 (2020-08-11).
#
# This function sets the NV_PIN_USER_PAGES_REMOTE_* macros as per
# the below passing conftest's
#
set_pin_user_pages_remote_defines () {
if [ "$1" = "" ]; then
echo "#undef NV_PIN_USER_PAGES_REMOTE_PRESENT" | append_conftest "functions"
else
echo "#define NV_PIN_USER_PAGES_REMOTE_PRESENT" | append_conftest "functions"
fi
if [ "$1" = "NV_PIN_USER_PAGES_REMOTE_HAS_ARGS_TSK" ]; then
echo "#define NV_PIN_USER_PAGES_REMOTE_HAS_ARGS_TSK" | append_conftest "functions"
else
echo "#undef NV_PIN_USER_PAGES_REMOTE_HAS_ARGS_TSK" | append_conftest "functions"
fi
}
# conftest #1: check if pin_user_pages_remote() is available
# return if not available.
# Fall through to conftest #2 if it is present
#
echo "$CONFTEST_PREAMBLE
#include <linux/mm.h>
void conftest_pin_user_pages_remote(void) {
pin_user_pages_remote();
}" > conftest$$.c
$CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
rm -f conftest$$.c
if [ -f conftest$$.o ]; then
set_pin_user_pages_remote_defines ""
rm -f conftest$$.o
return
fi
# conftest #2: Check if pin_user_pages_remote() has tsk argument
echo "$CONFTEST_PREAMBLE
#include <linux/mm.h>
long pin_user_pages_remote(struct task_struct *tsk,
struct mm_struct *mm,
unsigned long start,
unsigned long nr_pages,
unsigned int gup_flags,
struct page **pages,
struct vm_area_struct **vmas,
int *locked) {
return 0;
}" > conftest$$.c
$CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
rm -f conftest$$.c
if [ -f conftest$$.o ]; then
set_pin_user_pages_remote_defines "NV_PIN_USER_PAGES_REMOTE_HAS_ARGS_TSK"
rm -f conftest$$.o
else
set_pin_user_pages_remote_defines "NV_PIN_USER_PAGES_REMOTE_PRESENT"
fi
;;
vfio_pin_pages)
#
# Determine if vfio_pin_pages() kABI accepts "struct vfio_device *"
# argument instead of "struct device *"
#
# Replaced "struct device *" with "struct vfio_device *" by commit
# 8e432bb015b6c ("vfio/mdev: Pass in a struct vfio_device * to
# vfio_pin/unpin_pages()") in v5.19
#
echo "$CONFTEST_PREAMBLE
#include <linux/pci.h>
#include <linux/vfio.h>
int vfio_pin_pages(struct vfio_device *device,
unsigned long *user_pfn,
int npage,
int prot,
unsigned long *phys_pfn) {
return 0;
}" > conftest$$.c
$CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
rm -f conftest$$.c
if [ -f conftest$$.o ]; then
echo "#define NV_VFIO_PIN_PAGES_HAS_VFIO_DEVICE_ARG" | append_conftest "functions"
rm -f conftest$$.o
else
echo "#undef NV_VFIO_PIN_PAGES_HAS_VFIO_DEVICE_ARG" | append_conftest "functions"
fi
;;
pci_driver_has_driver_managed_dma)
#
# Determine if "struct pci_driver" has .driver_managed_dma member.
#
# Added by commit 512881eacfa7 ("bus: platform,amba,fsl-mc,PCI:
# Add device DMA ownership management") in v5.19
#
CODE="
#include <linux/pci.h>
int conftest_pci_driver_has_driver_managed_dma(void) {
return offsetof(struct pci_driver, driver_managed_dma);
}"
compile_check_conftest "$CODE" "NV_PCI_DRIVER_HAS_DRIVER_MANAGED_DMA" "" "types"
;;
radix_tree_empty)
@ -2751,6 +2725,130 @@ compile_test() {
compile_check_conftest "$CODE" "NV_DRM_MASTER_DROP_HAS_FROM_RELEASE_ARG" "" "types"
;;
drm_connector_lookup)
#
# Determine if function drm_connector_lookup() is present.
#
# Added by commit b164d31f50b2 ("drm/modes: add connector reference
# counting. (v2)") in v4.7 (2016-05-04), when it replaced
# drm_connector_find().
#
# It was originally added in drm_crtc.h, then moved to
# drm_connector.h by commit 522171951761
# ("drm: Extract drm_connector.[hc]") in v4.9 (2016-08-12)
#
CODE="
#if defined(NV_DRM_DRM_CRTC_H_PRESENT)
#include <drm/drm_crtc.h>
#endif
#if defined(NV_DRM_DRM_CONNECTOR_H_PRESENT)
#include <drm/drm_connector.h>
#endif
void conftest_drm_connector_lookup(void) {
drm_connector_lookup();
}"
compile_check_conftest "$CODE" "NV_DRM_CONNECTOR_LOOKUP_PRESENT" "" "functions"
;;
drm_connector_put)
#
# Determine if function drm_connector_put() is present.
#
# Added by commit ad09360750af ("drm: Introduce
# drm_connector_{get,put}()") in v4.12 (2017-02-28),
# when it replaced drm_connector_unreference() that
# was added with NV_DRM_CONNECTOR_LOOKUP_PRESENT.
#
CODE="
#if defined(NV_DRM_DRM_CONNECTOR_H_PRESENT)
#include <drm/drm_connector.h>
#endif
void conftest_drm_connector_put(void) {
drm_connector_put();
}"
compile_check_conftest "$CODE" "NV_DRM_CONNECTOR_PUT_PRESENT" "" "functions"
;;
drm_modeset_lock_all_end)
#
# Determine the number of arguments of the
# DRM_MODESET_LOCK_ALL_END() macro.
#
# DRM_MODESET_LOCK_ALL_END() is added with two arguments by commit
# b7ea04d299c7 (drm: drm: Add DRM_MODESET_LOCK_BEGIN/END helpers)
# in v5.0 (2018-11-29). The definition and prototype is changed to
# also take the third argument drm_device, by commit 77ef38574beb
# (drm/modeset-lock: Take the modeset BKL for legacy drivers)
# in v5.9 (2020-08-17).
#
DRM_MODESET_3_COMPILED=0
DRM_MODESET_2_COMPILED=0
DRM_MODESET_INCLUDES="
#if defined(NV_DRM_DRM_DEVICE_H_PRESENT)
#include <drm/drm_device.h>
#endif
#if defined(NV_DRM_DRM_DRV_H_PRESENT)
#include <drm/drm_drv.h>
#endif
#if defined(NV_DRM_DRM_MODESET_LOCK_H_PRESENT)
#include <drm/drm_modeset_lock.h>
#endif"
echo "$CONFTEST_PREAMBLE
$DRM_MODESET_INCLUDES
void conftest_drm_modeset_lock_all_end(
struct drm_device *dev,
struct drm_modeset_acquire_ctx ctx,
int ret) {
DRM_MODESET_LOCK_ALL_BEGIN(dev, ctx, 0, ret);
DRM_MODESET_LOCK_ALL_END(dev, ctx, ret);
}" > conftest$$.c
$CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
rm -f conftest$$.c
if [ -f conftest$$.o ]; then
DRM_MODESET_3_COMPILED=1
rm -f conftest$$.o
fi
echo "$CONFTEST_PREAMBLE
$DRM_MODESET_INCLUDES
void conftest_drm_modeset_lock_all_end(
struct drm_device *dev,
struct drm_modeset_acquire_ctx ctx,
int ret) {
DRM_MODESET_LOCK_ALL_BEGIN(dev, ctx, 0, ret);
DRM_MODESET_LOCK_ALL_END(ctx, ret);
}" > conftest$$.c
$CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
rm -f conftest$$.c
if [ -f conftest$$.o ]; then
DRM_MODESET_2_COMPILED=1
rm -f conftest$$.o
fi
# If the macro is undefined, both code snippets will still compile,
# so we need to check both and make sure only one compiles successfully.
if [ "$DRM_MODESET_3_COMPILED" = "1" ] &&
[ "$DRM_MODESET_2_COMPILED" = "0" ]; then
echo "#define NV_DRM_MODESET_LOCK_ALL_END_ARGUMENT_COUNT 3" | append_conftest "functions"
elif [ "$DRM_MODESET_3_COMPILED" = "0" ] &&
[ "$DRM_MODESET_2_COMPILED" = "1" ]; then
echo "#define NV_DRM_MODESET_LOCK_ALL_END_ARGUMENT_COUNT 2" | append_conftest "functions"
else
echo "#define NV_DRM_MODESET_LOCK_ALL_END_ARGUMENT_COUNT 0" | append_conftest "functions"
fi
;;
drm_atomic_state_ref_counting)
#
# Determine if functions drm_atomic_state_get/put() are
@ -3038,23 +3136,6 @@ compile_test() {
fi
;;
kthread_create_on_node)
#
# Determine if kthread_create_on_node is available
#
# kthread_create_on_node was added in by commit 207205a2ba26
# ("kthread: NUMA aware kthread_create_on_node()") in v2.6.39
# (2011-03-22).
#
CODE="
#include <linux/kthread.h>
void kthread_create_on_node_conftest(void) {
(void)kthread_create_on_node();
}"
compile_check_conftest "$CODE" "NV_KTHREAD_CREATE_ON_NODE_PRESENT" "" "functions"
;;
cpumask_of_node)
#
# Determine whether cpumask_of_node is available.
@ -3397,8 +3478,16 @@ compile_test() {
# Note that drm_connector.h by introduced by commit 522171951761
# ("drm: Extract drm_connector.[hc]") in v4.9 (2016-08-12)
#
# Note: up to 4.9 function was provided by drm_crtc.h by commit
# f453ba046074 in 2.6.29 (2008-12-29)
#
CODE="
#if defined(NV_DRM_DRM_CONNECTOR_H_PRESENT)
#include <drm/drm_connector.h>
#endif
#if defined(NV_DRM_DRM_CRTC_H_PRESENT)
#include <drm/drm_crtc.h>
#endif
void conftest_drm_connector_funcs_have_mode_in_name(void) {
drm_mode_connector_attach_encoder();
}"
@ -3406,21 +3495,25 @@ compile_test() {
compile_check_conftest "$CODE" "NV_DRM_CONNECTOR_FUNCS_HAVE_MODE_IN_NAME" "" "functions"
;;
node_states_n_memory)
drm_connector_has_vrr_capable_property)
#
# Determine if the N_MEMORY constant exists.
# Determine if drm_connector_attach_vrr_capable_property and
# drm_connector_set_vrr_capable_property is present
#
# Added by commit 8219fc48adb3 ("mm: node_states: introduce
# N_MEMORY") in v3.8 (2012-12-12).
# Added by commit ba1b0f6c73d4ea1390f0d5381f715ffa20c75f09 ("drm:
# Add vrr_capable property to the drm connector") in v5.0-rc1
# (2018-11-28)
#
CODE="
#include <linux/nodemask.h>
int conftest_node_states_n_memory(void) {
return N_MEMORY;
#if defined(NV_DRM_DRM_CONNECTOR_H_PRESENT)
#include <drm/drm_connector.h>
#endif
void conftest_drm_connector_has_vrr_capable_property(void) {
drm_connector_attach_vrr_capable_property();
}"
compile_check_conftest "$CODE" "NV_NODE_STATES_N_MEMORY_PRESENT" "" "types"
compile_check_conftest "$CODE" "NV_DRM_CONNECTOR_HAS_VRR_CAPABLE_PROPERTY" "" "functions"
;;
vm_fault_t)
@ -3615,92 +3708,6 @@ compile_test() {
compile_check_conftest "$CODE" "NV_PM_RUNTIME_AVAILABLE" "" "generic"
;;
device_driver_of_match_table)
#
# Determine if the device_driver struct has an of_match_table member.
#
# of_match_table was added by commit 597b9d1e44e9 ("drivercore:
# Add of_match_table to the common device drivers") in v2.6.35
# (2010-04-13).
#
CODE="
#include <linux/device.h>
int conftest_device_driver_of_match_table(void) {
return offsetof(struct device_driver, of_match_table);
}"
compile_check_conftest "$CODE" "NV_DEVICE_DRIVER_OF_MATCH_TABLE_PRESENT" "" "types"
;;
device_of_node)
#
# Determine if the device struct has an of_node member.
#
# of_node member was added by commit d706c1b05027 ("driver-core:
# Add device node pointer to struct device") in v2.6.35
# (2010-04-13).
#
CODE="
#include <linux/device.h>
int conftest_device_of_node(void) {
return offsetof(struct device, of_node);
}"
compile_check_conftest "$CODE" "NV_DEVICE_OF_NODE_PRESENT" "" "types"
;;
dev_is_pci)
#
# Determine if the dev_is_pci() macro is present.
#
# dev_is_pci() macro was added by commit fb8a0d9d1bfd ("pci: Add
# SR-IOV convenience functions and macros") in v2.6.34
# (2010-02-10).
#
CODE="
#include <linux/pci.h>
void conftest_dev_is_pci(void) {
if(dev_is_pci()) {}
}
"
compile_check_conftest "$CODE" "NV_DEV_IS_PCI_PRESENT" "" "functions"
;;
of_find_matching_node)
#
# Determine if the of_find_matching_node() function is present.
#
# Test if linux/of.h header file inclusion is successful or not and
# define/undefine NV_LINUX_OF_H_USABLE depending upon status of inclusion.
#
# of_find_matching_node was added by commit 283029d16a88
# ("[POWERPC] Add of_find_matching_node() helper function") in
# v2.6.25 (2008-01-09).
#
echo "$CONFTEST_PREAMBLE
#include <linux/of.h>
" > conftest$$.c
$CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
rm -f conftest$$.c
if [ -f conftest$$.o ]; then
rm -f conftest$$.o
echo "#define NV_LINUX_OF_H_USABLE" | append_conftest "generic"
CODE="
#include <linux/of.h>
void conftest_of_find_matching_node() {
of_find_matching_node();
}"
compile_check_conftest "$CODE" "NV_OF_FIND_MATCHING_NODE_PRESENT" "" "functions"
else
echo "#undef NV_LINUX_OF_H_USABLE" | append_conftest "generic"
echo "#undef NV_OF_FIND_MATCHING_NODE_PRESENT" | append_conftest "functions"
fi
;;
dma_direct_map_resource)
#
# Determine whether dma_is_direct() exists.
@ -3857,7 +3864,7 @@ compile_test() {
#include <drm/drmP.h>
#endif
#if defined(NV_DRM_CONNECTOR_H_PRESENT)
#if defined(NV_DRM_DRM_CONNECTOR_H_PRESENT)
#include <drm/drm_connector.h>
#endif
@ -4009,6 +4016,26 @@ compile_test() {
compile_check_conftest "$CODE" "NV_DRM_CRTC_STATE_HAS_PAGEFLIP_FLAGS" "" "types"
;;
drm_crtc_state_has_vrr_enabled)
#
# Determine if 'drm_crtc_state' structure has a
# 'vrr_enabled' field.
#
# Added by commit 1398958cfd8d331342d657d37151791dd7256b40 ("drm:
# Add vrr_enabled property to drm CRTC") in v5.0-rc1 (2018-11-28)
#
CODE="
#if defined(NV_DRM_DRM_CRTC_H_PRESENT)
#include <drm/drm_crtc.h>
#endif
int conftest_drm_crtc_state_has_vrr_enabled(void) {
return offsetof(struct drm_crtc_state, vrr_enabled);
}"
compile_check_conftest "$CODE" "NV_DRM_CRTC_STATE_HAS_VRR_ENABLED" "" "types"
;;
ktime_get_raw_ts64)
#
# Determine if ktime_get_raw_ts64() is present
@ -4146,36 +4173,6 @@ compile_test() {
fi
;;
hlist_for_each_entry)
#
# Determine how many arguments hlist_for_each_entry takes.
#
# Changed by commit b67bfe0d42c ("hlist: drop the node parameter
# from iterators") in v3.9 (2013-02-28)
#
echo "$CONFTEST_PREAMBLE
#include <linux/list.h>
void conftest_hlist_for_each_entry(void) {
struct hlist_head *head;
struct dummy
{
struct hlist_node hlist;
};
struct dummy *pos;
hlist_for_each_entry(pos, head, hlist) {}
}" > conftest$$.c
$CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
rm -f conftest$$.c
if [ -f conftest$$.o ]; then
rm -f conftest$$.o
echo "#define NV_HLIST_FOR_EACH_ENTRY_ARGUMENT_COUNT 3" | append_conftest "functions"
else
echo "#define NV_HLIST_FOR_EACH_ENTRY_ARGUMENT_COUNT 4" | append_conftest "functions"
fi
;;
drm_vma_offset_exact_lookup_locked)
#
# Determine if the drm_vma_offset_exact_lookup_locked() function
@ -4533,38 +4530,6 @@ compile_test() {
compile_check_conftest "$CODE" "NV_DRM_GEM_OBJECT_VMAP_HAS_MAP_ARG" "" "types"
;;
set_close_on_exec)
#
# __set_close_on_exec(() was added by
# commit 1dce27c5aa67 ("Wrap accesses to the fd_sets")
# in v3.4-rc1 (2012-02-19)
#
CODE="
#include <linux/types.h>
#include <linux/fdtable.h>
void conftest_set_close_on_exec(void) {
__set_close_on_exec();
}"
compile_check_conftest "$CODE" "NV_SET_CLOSE_ON_EXEC_PRESENT" "" "functions"
;;
iterate_fd)
#
# iterate_fd() was added by
# commit c3c073f808b2 ("new helper: iterate_fd()")
# in v3.7-rc1 (2012-09-26)
#
CODE="
#include <linux/types.h>
#include <linux/fdtable.h>
void conftest_iterate_fd(void) {
iterate_fd();
}"
compile_check_conftest "$CODE" "NV_ITERATE_FD_PRESENT" "" "functions"
;;
seq_read_iter)
#
# Determine if seq_read_iter() is present
@ -4599,23 +4564,6 @@ compile_test() {
compile_check_conftest "$CODE" "NV_PCI_CLASS_MULTIMEDIA_HD_AUDIO_PRESENT" "" "generic"
;;
sg_page_iter_page)
#
# Determine if sg_page_iter_page() is present
#
# sg_page_iter_page() was added by commit 2db76d7c3c6db
# ("lib/scatterlist: sg_page_iter: support sg lists w/o backing
# pages") in v3.10-rc1 (2013-05-11).
#
CODE="
#include <linux/scatterlist.h>
void conftest_sg_page_iter_page(void) {
sg_page_iter_page();
}"
compile_check_conftest "$CODE" "NV_SG_PAGE_ITER_PAGE_PRESENT" "" "functions"
;;
unsafe_follow_pfn)
#
# Determine if unsafe_follow_pfn() is present.
@ -5294,6 +5242,48 @@ compile_test() {
compile_check_conftest "$CODE" "NV_PLATFORM_IRQ_COUNT_PRESENT" "" "functions"
;;
devm_clk_bulk_get_all)
#
# Determine if devm_clk_bulk_get_all() function is present
#
# Added by commit f08c2e286 ("clk: add managed version of clk_bulk_get_all")
#
CODE="
#if defined(NV_LINUX_CLK_H_PRESENT)
#include <linux/clk.h>
#endif
void conftest_devm_clk_bulk_get_all(void)
{
devm_clk_bulk_get_all();
}
"
compile_check_conftest "$CODE" "NV_DEVM_CLK_BULK_GET_ALL_PRESENT" "" "functions"
;;
mmget_not_zero)
#
# Determine if mmget_not_zero() function is present
#
# mmget_not_zero() function was added by commit
# d2005e3f41d4f9299e2df6a967c8beb5086967a9 ("userfaultfd: don't pin
# the user memory in userfaultfd_file_create()") in v4.7
# (2016-05-20) in linux/sched.h but then moved to linux/sched/mm.h
# by commit 68e21be2916b359fd8afb536c1911dc014cfd03e
# ("sched/headers: Move task->mm handling methods to
# <linux/sched/mm.h>") in v4.11 (2017-02-01).
CODE="
#if defined(NV_LINUX_SCHED_MM_H_PRESENT)
#include <linux/sched/mm.h>
#elif defined(NV_LINUX_SCHED_H_PRESENT)
#include <linux/sched.h>
#endif
void conftest_mmget_not_zero(void) {
mmget_not_zero();
}"
compile_check_conftest "$CODE" "NV_MMGET_NOT_ZERO_PRESENT" "" "functions"
;;
dma_resv_add_fence)
#
# Determine if the dma_resv_add_fence() function is present.
@ -5377,7 +5367,7 @@ compile_test() {
# Determine if 'num_registered_fb' variable is present.
#
# 'num_registered_fb' was removed by commit 5727dcfd8486
# ("fbdev: Make registered_fb[] private to fbmem.c) for
# ("fbdev: Make registered_fb[] private to fbmem.c") for
# v5.20 linux-next (2022-07-27).
#
CODE="
@ -5389,6 +5379,31 @@ compile_test() {
compile_check_conftest "$CODE" "NV_NUM_REGISTERED_FB_PRESENT" "" "types"
;;
acpi_video_backlight_use_native)
#
# Determine if acpi_video_backlight_use_native() function is present
#
# acpi_video_backlight_use_native was added by commit 2600bfa3df99
# (ACPI: video: Add acpi_video_backlight_use_native() helper) for
# v6.0 (2022-08-17). Note: the include directive for <linux/types.h>
# in this conftest is necessary in order to support kernels between
# commit 0b9f7d93ca61 ("ACPI / i915: ignore firmware requests for
# backlight change") for v3.16 (2014-07-07) and commit 3bd6bce369f5
# ("ACPI / video: Port to new backlight interface selection API")
# for v4.2 (2015-07-16). Kernels within this range use the 'bool'
# type and the related 'false' value in <acpi/video.h> without first
# including the definitions of that type and value.
#
CODE="
#include <linux/types.h>
#include <acpi/video.h>
void conftest_acpi_video_backglight_use_native(void) {
acpi_video_backlight_use_native(0);
}"
compile_check_conftest "$CODE" "NV_ACPI_VIDEO_BACKLIGHT_USE_NATIVE" "" "functions"
;;
# When adding a new conftest entry, please use the correct format for
# specifying the relevant upstream Linux kernel commit.
#

View File

@ -118,6 +118,11 @@ __nv_drm_detect_encoder(struct NvKmsKapiDynamicDisplayParams *pDetectParams,
return false;
}
#if defined(NV_DRM_CONNECTOR_HAS_VRR_CAPABLE_PROPERTY)
drm_connector_attach_vrr_capable_property(&nv_connector->base);
drm_connector_set_vrr_capable_property(&nv_connector->base, pDetectParams->vrrSupported ? true : false);
#endif
if (pDetectParams->connected) {
if (!pDetectParams->overrideEdid && pDetectParams->edid.bufferSize) {

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2015-2022, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@ -46,6 +46,35 @@
#include <linux/nvhost.h>
#endif
#if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
static int
nv_drm_atomic_replace_property_blob_from_id(struct drm_device *dev,
struct drm_property_blob **blob,
uint64_t blob_id,
ssize_t expected_size)
{
struct drm_property_blob *new_blob = NULL;
if (blob_id != 0) {
new_blob = drm_property_lookup_blob(dev, blob_id);
if (new_blob == NULL) {
return -EINVAL;
}
if ((expected_size > 0) &&
(new_blob->length != expected_size)) {
drm_property_blob_put(new_blob);
return -EINVAL;
}
}
drm_property_replace_blob(blob, new_blob);
drm_property_blob_put(new_blob);
return 0;
}
#endif
static void nv_drm_plane_destroy(struct drm_plane *plane)
{
struct nv_drm_plane *nv_plane = to_nv_plane(plane);
@ -84,9 +113,6 @@ cursor_plane_req_config_update(struct drm_plane *plane,
{
struct nv_drm_plane *nv_plane = to_nv_plane(plane);
struct NvKmsKapiCursorRequestedConfig old_config = *req_config;
struct nv_drm_device *nv_dev = to_nv_device(plane->dev);
struct nv_drm_plane_state *nv_drm_plane_state =
to_nv_drm_plane_state(plane_state);
if (plane_state->fb == NULL) {
cursor_req_config_disable(req_config);
@ -186,7 +212,6 @@ plane_req_config_update(struct drm_plane *plane,
struct nv_drm_device *nv_dev = to_nv_device(plane->dev);
struct nv_drm_plane_state *nv_drm_plane_state =
to_nv_drm_plane_state(plane_state);
int ret = 0;
if (plane_state->fb == NULL) {
plane_req_config_disable(req_config);
@ -309,6 +334,9 @@ plane_req_config_update(struct drm_plane *plane,
nv_plane->defaultCompositionMode;
#endif
req_config->config.inputColorSpace =
nv_drm_plane_state->input_colorspace;
req_config->config.syncptParams.preSyncptSpecified = false;
req_config->config.syncptParams.postSyncptRequested = false;
@ -320,10 +348,10 @@ plane_req_config_update(struct drm_plane *plane,
#if defined(NV_LINUX_NVHOST_H_PRESENT) && defined(CONFIG_TEGRA_GRHOST)
#if defined(NV_NVHOST_DMA_FENCE_UNPACK_PRESENT)
if (plane_state->fence != NULL) {
ret = nvhost_dma_fence_unpack(
plane_state->fence,
&req_config->config.syncptParams.preSyncptId,
&req_config->config.syncptParams.preSyncptValue);
int ret = nvhost_dma_fence_unpack(
plane_state->fence,
&req_config->config.syncptParams.preSyncptId,
&req_config->config.syncptParams.preSyncptValue);
if (ret != 0) {
return ret;
}
@ -339,6 +367,60 @@ plane_req_config_update(struct drm_plane *plane,
#endif
}
#if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
if (nv_drm_plane_state->hdr_output_metadata != NULL) {
struct hdr_output_metadata *hdr_metadata =
nv_drm_plane_state->hdr_output_metadata->data;
struct hdr_metadata_infoframe *info_frame =
&hdr_metadata->hdmi_metadata_type1;
struct nv_drm_device *nv_dev = to_nv_device(plane->dev);
uint32_t i;
if (hdr_metadata->metadata_type != HDMI_STATIC_METADATA_TYPE1) {
NV_DRM_DEV_LOG_ERR(nv_dev, "Unsupported Metadata Type");
return -1;
}
for (i = 0; i < ARRAY_SIZE(info_frame->display_primaries); i ++) {
req_config->config.hdrMetadata.displayPrimaries[i].x =
info_frame->display_primaries[i].x;
req_config->config.hdrMetadata.displayPrimaries[i].y =
info_frame->display_primaries[i].y;
}
req_config->config.hdrMetadata.whitePoint.x =
info_frame->white_point.x;
req_config->config.hdrMetadata.whitePoint.y =
info_frame->white_point.y;
req_config->config.hdrMetadata.maxDisplayMasteringLuminance =
info_frame->max_display_mastering_luminance;
req_config->config.hdrMetadata.minDisplayMasteringLuminance =
info_frame->min_display_mastering_luminance;
req_config->config.hdrMetadata.maxCLL =
info_frame->max_cll;
req_config->config.hdrMetadata.maxFALL =
info_frame->max_fall;
req_config->config.hdrMetadataSpecified = true;
switch (info_frame->eotf) {
case HDMI_EOTF_SMPTE_ST2084:
req_config->config.tf = NVKMS_OUTPUT_TF_PQ;
break;
case HDMI_EOTF_TRADITIONAL_GAMMA_SDR:
req_config->config.tf =
NVKMS_OUTPUT_TF_TRADITIONAL_GAMMA_SDR;
break;
default:
NV_DRM_DEV_LOG_ERR(nv_dev, "Unsupported EOTF");
return -1;
}
} else {
req_config->config.hdrMetadataSpecified = false;
req_config->config.tf = NVKMS_OUTPUT_TF_NONE;
}
#endif
/*
* Unconditionally mark the surface as changed, even if nothing changed,
* so that we always get a flip event: a DRM client may flip with
@ -509,9 +591,21 @@ static int nv_drm_plane_atomic_set_property(
nv_drm_plane_state->fd_user_ptr = u64_to_user_ptr(val);
#endif
return 0;
} else {
return -EINVAL;
} else if (property == nv_dev->nv_input_colorspace_property) {
nv_drm_plane_state->input_colorspace = val;
return 0;
}
#if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
else if (property == nv_dev->nv_hdr_output_metadata_property) {
return nv_drm_atomic_replace_property_blob_from_id(
nv_dev->dev,
&nv_drm_plane_state->hdr_output_metadata,
val,
sizeof(struct hdr_output_metadata));
}
#endif
return -EINVAL;
}
static int nv_drm_plane_atomic_get_property(
@ -521,12 +615,26 @@ static int nv_drm_plane_atomic_get_property(
uint64_t *val)
{
struct nv_drm_device *nv_dev = to_nv_device(plane->dev);
const struct nv_drm_plane_state *nv_drm_plane_state =
to_nv_drm_plane_state_const(state);
if (property == nv_dev->nv_out_fence_property) {
return 0;
} else {
return -EINVAL;
} else if (property == nv_dev->nv_input_colorspace_property) {
*val = nv_drm_plane_state->input_colorspace;
return 0;
}
#if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
else if (property == nv_dev->nv_hdr_output_metadata_property) {
const struct nv_drm_plane_state *nv_drm_plane_state =
to_nv_drm_plane_state_const(state);
*val = nv_drm_plane_state->hdr_output_metadata ?
nv_drm_plane_state->hdr_output_metadata->base.id : 0;
return 0;
}
#endif
return -EINVAL;
}
static struct drm_plane_state *
@ -544,6 +652,14 @@ nv_drm_plane_atomic_duplicate_state(struct drm_plane *plane)
__drm_atomic_helper_plane_duplicate_state(plane, &nv_plane_state->base);
nv_plane_state->fd_user_ptr = nv_old_plane_state->fd_user_ptr;
nv_plane_state->input_colorspace = nv_old_plane_state->input_colorspace;
#if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
nv_plane_state->hdr_output_metadata = nv_old_plane_state->hdr_output_metadata;
if (nv_plane_state->hdr_output_metadata) {
drm_property_blob_get(nv_plane_state->hdr_output_metadata);
}
#endif
return &nv_plane_state->base;
}
@ -557,6 +673,12 @@ static inline void __nv_drm_plane_atomic_destroy_state(
#else
__drm_atomic_helper_plane_destroy_state(state);
#endif
#if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
struct nv_drm_plane_state *nv_drm_plane_state =
to_nv_drm_plane_state(state);
drm_property_blob_put(nv_drm_plane_state->hdr_output_metadata);
#endif
}
static void nv_drm_plane_atomic_destroy_state(
@ -803,7 +925,8 @@ static const struct drm_crtc_helper_funcs nv_crtc_helper_funcs = {
};
static void nv_drm_plane_install_properties(
struct drm_plane *plane)
struct drm_plane *plane,
NvBool supportsHDR)
{
struct nv_drm_device *nv_dev = to_nv_device(plane->dev);
@ -811,6 +934,19 @@ static void nv_drm_plane_install_properties(
drm_object_attach_property(
&plane->base, nv_dev->nv_out_fence_property, 0);
}
if (nv_dev->nv_input_colorspace_property) {
drm_object_attach_property(
&plane->base, nv_dev->nv_input_colorspace_property,
NVKMS_INPUT_COLORSPACE_NONE);
}
#if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
if (supportsHDR && nv_dev->nv_hdr_output_metadata_property) {
drm_object_attach_property(
&plane->base, nv_dev->nv_hdr_output_metadata_property, 0);
}
#endif
}
static void
@ -990,7 +1126,9 @@ nv_drm_plane_create(struct drm_device *dev,
drm_plane_helper_add(plane, &nv_plane_helper_funcs);
if (plane_type != DRM_PLANE_TYPE_CURSOR) {
nv_drm_plane_install_properties(plane);
nv_drm_plane_install_properties(
plane,
pResInfo->supportsHDR[layer_idx]);
}
__nv_drm_plane_create_alpha_blending_properties(
@ -1141,11 +1279,13 @@ void nv_drm_enumerate_crtcs_and_planes(
}
for (layer = 0; layer < pResInfo->numLayers[i]; layer++) {
struct drm_plane *overlay_plane = NULL;
if (layer == NVKMS_KAPI_LAYER_PRIMARY_IDX) {
continue;
}
struct drm_plane *overlay_plane =
overlay_plane =
nv_drm_plane_create(nv_dev->dev,
DRM_PLANE_TYPE_OVERLAY,
layer,

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2016-2022, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@ -205,6 +205,10 @@ static inline struct nv_drm_plane *to_nv_plane(struct drm_plane *plane)
struct nv_drm_plane_state {
struct drm_plane_state base;
s32 __user *fd_user_ptr;
enum NvKmsInputColorSpace input_colorspace;
#if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
struct drm_property_blob *hdr_output_metadata;
#endif
};
static inline struct nv_drm_plane_state *to_nv_drm_plane_state(struct drm_plane_state *state)
@ -212,6 +216,11 @@ static inline struct nv_drm_plane_state *to_nv_drm_plane_state(struct drm_plane_
return container_of(state, struct nv_drm_plane_state, base);
}
static inline const struct nv_drm_plane_state *to_nv_drm_plane_state_const(const struct drm_plane_state *state)
{
return container_of(state, const struct nv_drm_plane_state, base);
}
static inline struct nv_drm_crtc *to_nv_crtc(struct drm_crtc *crtc)
{
if (crtc == NULL) {

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2015-2022, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@ -86,6 +86,23 @@
static struct nv_drm_device *dev_list = NULL;
static const char* nv_get_input_colorspace_name(
enum NvKmsInputColorSpace colorSpace)
{
switch (colorSpace) {
case NVKMS_INPUT_COLORSPACE_NONE:
return "None";
case NVKMS_INPUT_COLORSPACE_SCRGB_LINEAR:
return "IEC 61966-2-2 linear FP";
case NVKMS_INPUT_COLORSPACE_BT2100_PQ:
return "ITU-R BT.2100-PQ YCbCr";
default:
/* We shoudn't hit this */
WARN_ON("Unsupported input colorspace");
return "None";
}
};
#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
static void nv_drm_output_poll_changed(struct drm_device *dev)
@ -332,6 +349,15 @@ static void nv_drm_enumerate_encoders_and_connectors
*/
static int nv_drm_create_properties(struct nv_drm_device *nv_dev)
{
struct drm_prop_enum_list enum_list[3] = { };
int i, len = 0;
for (i = 0; i < 3; i++) {
enum_list[len].type = i;
enum_list[len].name = nv_get_input_colorspace_name(i);
len++;
}
#if defined(NV_LINUX_NVHOST_H_PRESENT) && defined(CONFIG_TEGRA_GRHOST)
if (!nv_dev->supportsSyncpts) {
return 0;
@ -345,6 +371,23 @@ static int nv_drm_create_properties(struct nv_drm_device *nv_dev)
}
#endif
nv_dev->nv_input_colorspace_property =
drm_property_create_enum(nv_dev->dev, 0, "NV_INPUT_COLORSPACE",
enum_list, len);
if (nv_dev->nv_input_colorspace_property == NULL) {
NV_DRM_LOG_ERR("Failed to create NV_INPUT_COLORSPACE property");
return -ENOMEM;
}
#if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
nv_dev->nv_hdr_output_metadata_property =
drm_property_create(nv_dev->dev, DRM_MODE_PROP_BLOB,
"NV_HDR_STATIC_METADATA", 0);
if (nv_dev->nv_hdr_output_metadata_property == NULL) {
return -ENOMEM;
}
#endif
return 0;
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2019-2022, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@ -40,9 +40,16 @@ static const u32 nvkms_to_drm_format[] = {
[NvKmsSurfaceMemoryFormatR5G6B5] = DRM_FORMAT_RGB565,
[NvKmsSurfaceMemoryFormatA8R8G8B8] = DRM_FORMAT_ARGB8888,
[NvKmsSurfaceMemoryFormatX8R8G8B8] = DRM_FORMAT_XRGB8888,
[NvKmsSurfaceMemoryFormatX8B8G8R8] = DRM_FORMAT_XBGR8888,
[NvKmsSurfaceMemoryFormatA2B10G10R10] = DRM_FORMAT_ABGR2101010,
[NvKmsSurfaceMemoryFormatX2B10G10R10] = DRM_FORMAT_XBGR2101010,
[NvKmsSurfaceMemoryFormatA8B8G8R8] = DRM_FORMAT_ABGR8888,
#if defined(DRM_FORMAT_ABGR16161616F)
[NvKmsSurfaceMemoryFormatRF16GF16BF16AF16] = DRM_FORMAT_ABGR16161616F,
#endif
#if defined(DRM_FORMAT_XBGR16161616F)
[NvKmsSurfaceMemoryFormatRF16GF16BF16XF16] = DRM_FORMAT_XBGR16161616F,
#endif
[NvKmsSurfaceMemoryFormatY8_U8__Y8_V8_N422] = DRM_FORMAT_YUYV,
[NvKmsSurfaceMemoryFormatU8_Y8__V8_Y8_N422] = DRM_FORMAT_UYVY,

View File

@ -113,7 +113,6 @@ static vm_fault_t __nv_drm_gem_user_memory_handle_vma_fault(
page_offset = vmf->pgoff - drm_vma_node_start(&gem->vma_node);
BUG_ON(page_offset > nv_user_memory->pages_count);
ret = vm_insert_page(vma, address, nv_user_memory->pages[page_offset]);
switch (ret) {
case 0:

View File

@ -93,8 +93,6 @@ int nv_drm_lock_user_pages(unsigned long address,
{
struct mm_struct *mm = current->mm;
struct page **user_pages;
const int write = 1;
const int force = 0;
int pages_pinned;
user_pages = nv_drm_calloc(pages_count, sizeof(*user_pages));
@ -105,7 +103,7 @@ int nv_drm_lock_user_pages(unsigned long address,
nv_mmap_read_lock(mm);
pages_pinned = NV_GET_USER_PAGES(address, pages_count, write, force,
pages_pinned = NV_PIN_USER_PAGES(address, pages_count, FOLL_WRITE,
user_pages, NULL);
nv_mmap_read_unlock(mm);
@ -123,7 +121,7 @@ failed:
int i;
for (i = 0; i < pages_pinned; i++) {
put_page(user_pages[i]);
NV_UNPIN_USER_PAGE(user_pages[i]);
}
}
@ -138,8 +136,7 @@ void nv_drm_unlock_user_pages(unsigned long pages_count, struct page **pages)
for (i = 0; i < pages_count; i++) {
set_page_dirty_lock(pages[i]);
put_page(pages[i]);
NV_UNPIN_USER_PAGE(pages[i]);
}
nv_drm_free(pages);
@ -174,12 +171,7 @@ static void __exit nv_linux_drm_exit(void)
module_init(nv_linux_drm_init);
module_exit(nv_linux_drm_exit);
#if defined(MODULE_LICENSE)
MODULE_LICENSE("Dual MIT/GPL");
#endif
#if defined(MODULE_INFO)
MODULE_INFO(supported, "external");
#endif
#if defined(MODULE_VERSION)
MODULE_VERSION(NV_VERSION_STRING);
#endif
MODULE_INFO(supported, "external");
MODULE_VERSION(NV_VERSION_STRING);

View File

@ -93,9 +93,6 @@ static bool __will_generate_flip_event(struct drm_crtc *crtc,
to_nv_crtc_state(new_crtc_state);
struct drm_plane_state *old_plane_state = NULL;
struct drm_plane *plane = NULL;
struct drm_plane *primary_plane = crtc->primary;
bool primary_event = false;
bool overlay_event = false;
int i;
if (!old_crtc_state->active && !new_crtc_state->active) {
@ -274,6 +271,9 @@ nv_drm_atomic_apply_modeset_config(struct drm_device *dev,
nv_new_crtc_state->nv_flip = NULL;
}
#if defined(NV_DRM_CRTC_STATE_HAS_VRR_ENABLED)
requested_config->headRequestedConfig[nv_crtc->head].modeSetConfig.vrrEnabled = new_crtc_state->vrr_enabled;
#endif
}
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2015-2022, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@ -122,6 +122,11 @@ struct nv_drm_device {
NvBool supportsSyncpts;
struct drm_property *nv_out_fence_property;
struct drm_property *nv_input_colorspace_property;
#if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
struct drm_property *nv_hdr_output_metadata_property;
#endif
struct nv_drm_device *next;
};

View File

@ -59,11 +59,14 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_dev_unref
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_reinit_primary_mode_group
NV_CONFTEST_FUNCTION_COMPILE_TESTS += get_user_pages_remote
NV_CONFTEST_FUNCTION_COMPILE_TESTS += get_user_pages
NV_CONFTEST_FUNCTION_COMPILE_TESTS += pin_user_pages_remote
NV_CONFTEST_FUNCTION_COMPILE_TESTS += pin_user_pages
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_gem_object_lookup
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_atomic_state_ref_counting
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_driver_has_gem_prime_res_obj
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_atomic_helper_connector_dpms
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_connector_funcs_have_mode_in_name
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_connector_has_vrr_capable_property
NV_CONFTEST_FUNCTION_COMPILE_TESTS += vmf_insert_pfn
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_framebuffer_get
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_gem_object_get
@ -100,6 +103,7 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += vm_fault_t
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_gem_object_has_resv
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_crtc_state_has_async_flip
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_crtc_state_has_pageflip_flags
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_crtc_state_has_vrr_enabled
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_format_modifiers_present
NV_CONFTEST_TYPE_COMPILE_TESTS += mm_has_mmap_lock
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_vma_node_is_allowed_has_tag_arg
@ -115,6 +119,7 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += drm_plane_atomic_check_has_atomic_state_arg
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_device_has_pdev
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_crtc_state_has_no_vblank
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_mode_config_has_allow_fb_modifiers
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_has_hdr_output_metadata
NV_CONFTEST_TYPE_COMPILE_TESTS += dma_resv_add_fence
NV_CONFTEST_TYPE_COMPILE_TESTS += dma_resv_reserve_fences
NV_CONFTEST_TYPE_COMPILE_TESTS += reservation_object_reserve_shared_has_num_fences_arg

View File

@ -169,7 +169,6 @@ void nv_kthread_q_stop(nv_kthread_q_t *q)
//
// This function is never invoked when there is no NUMA preference (preferred
// node is NUMA_NO_NODE).
#if NV_KTHREAD_Q_SUPPORTS_AFFINITY() == 1
static struct task_struct *thread_create_on_node(int (*threadfn)(void *data),
nv_kthread_q_t *q,
int preferred_node,
@ -217,7 +216,6 @@ static struct task_struct *thread_create_on_node(int (*threadfn)(void *data),
return thread[i];
}
#endif
int nv_kthread_q_init_on_node(nv_kthread_q_t *q, const char *q_name, int preferred_node)
{
@ -231,11 +229,7 @@ int nv_kthread_q_init_on_node(nv_kthread_q_t *q, const char *q_name, int preferr
q->q_kthread = kthread_create(_main_loop, q, q_name);
}
else {
#if NV_KTHREAD_Q_SUPPORTS_AFFINITY() == 1
q->q_kthread = thread_create_on_node(_main_loop, q, preferred_node, q_name);
#else
return -ENOTSUPP;
#endif
}
if (IS_ERR(q->q_kthread)) {

View File

@ -35,6 +35,8 @@
#include <linux/list.h>
#include <linux/rwsem.h>
#include <acpi/video.h>
#include "nvstatus.h"
#include "nv-register-module.h"
@ -956,6 +958,12 @@ nvkms_register_backlight(NvU32 gpu_id, NvU32 display_id, void *drv_priv,
struct nvkms_backlight_device *nvkms_bd = NULL;
int i;
#if defined(NV_ACPI_VIDEO_BACKLIGHT_USE_NATIVE)
if (!acpi_video_backlight_use_native()) {
return NULL;
}
#endif
gpu_info = nvkms_alloc(NV_MAX_GPUS * sizeof(*gpu_info), NV_TRUE);
if (gpu_info == NULL) {
return NULL;
@ -1346,29 +1354,7 @@ static void nvkms_proc_exit(void)
return;
}
#if defined(NV_PROC_REMOVE_PRESENT)
proc_remove(nvkms_proc_dir);
#else
/*
* On kernel versions without proc_remove(), we need to explicitly
* remove each proc file beneath nvkms_proc_dir.
* nvkms_proc_init() only creates files directly under
* nvkms_proc_dir, so those are the only files we need to remove
* here: warn if there is any deeper directory nesting.
*/
{
struct proc_dir_entry *entry = nvkms_proc_dir->subdir;
while (entry != NULL) {
struct proc_dir_entry *next = entry->next;
WARN_ON(entry->subdir != NULL);
remove_proc_entry(entry->name, entry->parent);
entry = next;
}
}
remove_proc_entry(nvkms_proc_dir->name, nvkms_proc_dir->parent);
#endif /* NV_PROC_REMOVE_PRESENT */
#endif /* CONFIG_PROC_FS */
}
@ -1630,12 +1616,7 @@ restart:
module_init(nvkms_init);
module_exit(nvkms_exit);
#if defined(MODULE_LICENSE)
MODULE_LICENSE("Dual MIT/GPL");
#endif
#if defined(MODULE_INFO)
MODULE_INFO(supported, "external");
#endif
#if defined(MODULE_VERSION)
MODULE_VERSION(NV_VERSION_STRING);
#endif
MODULE_INFO(supported, "external");
MODULE_VERSION(NV_VERSION_STRING);

View File

@ -85,15 +85,11 @@ $(obj)/$(NVIDIA_MODESET_INTERFACE): $(addprefix $(obj)/,$(NVIDIA_MODESET_OBJECTS
NV_OBJECTS_DEPEND_ON_CONFTEST += $(NVIDIA_MODESET_OBJECTS)
NV_CONFTEST_TYPE_COMPILE_TESTS += file_operations
NV_CONFTEST_TYPE_COMPILE_TESTS += node_states_n_memory
NV_CONFTEST_TYPE_COMPILE_TESTS += timespec64
NV_CONFTEST_TYPE_COMPILE_TESTS += proc_ops
NV_CONFTEST_FUNCTION_COMPILE_TESTS += pde_data
NV_CONFTEST_FUNCTION_COMPILE_TESTS += proc_remove
NV_CONFTEST_FUNCTION_COMPILE_TESTS += timer_setup
NV_CONFTEST_FUNCTION_COMPILE_TESTS += kthread_create_on_node
NV_CONFTEST_FUNCTION_COMPILE_TESTS += list_is_first
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ktime_get_real_ts64
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ktime_get_raw_ts64
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_kthread_create_on_node
NV_CONFTEST_FUNCTION_COMPILE_TESTS += acpi_video_backlight_use_native

View File

@ -30,8 +30,18 @@ NVIDIA_PEERMEM_CFLAGS += -UDEBUG -U_DEBUG -DNDEBUG -DNV_BUILD_MODULE_INSTANCES=0
# MOFED's Module.symvers is needed for the build
# to find the additional ib_* symbols.
#
# Also, MOFED doesn't use kbuild ARCH names.
# So adapt OFA_ARCH to match MOFED's conventions.
#
ifeq ($(ARCH), arm64)
OFA_ARCH := aarch64
else ifeq ($(ARCH), powerpc)
OFA_ARCH := ppc64le
else
OFA_ARCH := $(ARCH)
endif
OFA_DIR := /usr/src/ofa_kernel
OFA_CANDIDATES = $(OFA_DIR)/$(ARCH)/$(KERNELRELEASE) $(OFA_DIR)/$(KERNELRELEASE) $(OFA_DIR)/default /var/lib/dkms/mlnx-ofed-kernel
OFA_CANDIDATES = $(OFA_DIR)/$(OFA_ARCH)/$(KERNELRELEASE) $(OFA_DIR)/$(KERNELRELEASE) $(OFA_DIR)/default /var/lib/dkms/mlnx-ofed-kernel
MLNX_OFED_KERNEL := $(shell for d in $(OFA_CANDIDATES); do \
if [ -d "$$d" ]; then \
echo "$$d"; \

View File

@ -481,16 +481,6 @@ static int _check_cpu_affinity_test(void)
int result, node;
nv_kthread_q_t local_q;
// If the API does not support CPU affinity, check whether the correct
// error code is returned.
// Non-affinitized queue allocation has been verified by previous test
// so just ensure that the affinitized version also works.
if (!NV_KTHREAD_Q_SUPPORTS_AFFINITY()) {
result = nv_kthread_q_init_on_node(&local_q, "should_fail", 0);
TEST_CHECK_RET(result == -ENOTSUPP);
return 0;
}
for_each_online_node(node) {
unsigned i;
const unsigned max_i = 100;

View File

@ -169,7 +169,6 @@ void nv_kthread_q_stop(nv_kthread_q_t *q)
//
// This function is never invoked when there is no NUMA preference (preferred
// node is NUMA_NO_NODE).
#if NV_KTHREAD_Q_SUPPORTS_AFFINITY() == 1
static struct task_struct *thread_create_on_node(int (*threadfn)(void *data),
nv_kthread_q_t *q,
int preferred_node,
@ -217,7 +216,6 @@ static struct task_struct *thread_create_on_node(int (*threadfn)(void *data),
return thread[i];
}
#endif
int nv_kthread_q_init_on_node(nv_kthread_q_t *q, const char *q_name, int preferred_node)
{
@ -231,11 +229,7 @@ int nv_kthread_q_init_on_node(nv_kthread_q_t *q, const char *q_name, int preferr
q->q_kthread = kthread_create(_main_loop, q, q_name);
}
else {
#if NV_KTHREAD_Q_SUPPORTS_AFFINITY() == 1
q->q_kthread = thread_create_on_node(_main_loop, q, preferred_node, q_name);
#else
return -ENOTSUPP;
#endif
}
if (IS_ERR(q->q_kthread)) {

View File

@ -67,17 +67,11 @@ endif
NV_OBJECTS_DEPEND_ON_CONFTEST += $(NVIDIA_UVM_OBJECTS)
NV_CONFTEST_FUNCTION_COMPILE_TESTS += address_space_init_once
NV_CONFTEST_FUNCTION_COMPILE_TESTS += vzalloc
NV_CONFTEST_FUNCTION_COMPILE_TESTS += wait_on_bit_lock_argument_count
NV_CONFTEST_FUNCTION_COMPILE_TESTS += pde_data
NV_CONFTEST_FUNCTION_COMPILE_TESTS += proc_remove
NV_CONFTEST_FUNCTION_COMPILE_TESTS += bitmap_clear
NV_CONFTEST_FUNCTION_COMPILE_TESTS += usleep_range
NV_CONFTEST_FUNCTION_COMPILE_TESTS += radix_tree_empty
NV_CONFTEST_FUNCTION_COMPILE_TESTS += radix_tree_replace_slot
NV_CONFTEST_FUNCTION_COMPILE_TESTS += pnv_npu2_init_context
NV_CONFTEST_FUNCTION_COMPILE_TESTS += kthread_create_on_node
NV_CONFTEST_FUNCTION_COMPILE_TESTS += vmf_insert_pfn
NV_CONFTEST_FUNCTION_COMPILE_TESTS += cpumask_of_node
NV_CONFTEST_FUNCTION_COMPILE_TESTS += list_is_first
@ -88,17 +82,16 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += set_pages_uc
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ktime_get_raw_ts64
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ioasid_get
NV_CONFTEST_FUNCTION_COMPILE_TESTS += migrate_vma_setup
NV_CONFTEST_FUNCTION_COMPILE_TESTS += mmget_not_zero
NV_CONFTEST_TYPE_COMPILE_TESTS += file_operations
NV_CONFTEST_TYPE_COMPILE_TESTS += kuid_t
NV_CONFTEST_TYPE_COMPILE_TESTS += address_space
NV_CONFTEST_TYPE_COMPILE_TESTS += backing_dev_info
NV_CONFTEST_TYPE_COMPILE_TESTS += mm_context_t
NV_CONFTEST_TYPE_COMPILE_TESTS += get_user_pages_remote
NV_CONFTEST_TYPE_COMPILE_TESTS += get_user_pages
NV_CONFTEST_TYPE_COMPILE_TESTS += pin_user_pages_remote
NV_CONFTEST_TYPE_COMPILE_TESTS += pin_user_pages
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_fault_has_address
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_ops_fault_removed_vma_arg
NV_CONFTEST_TYPE_COMPILE_TESTS += node_states_n_memory
NV_CONFTEST_TYPE_COMPILE_TESTS += kmem_cache_has_kobj_remove_work
NV_CONFTEST_TYPE_COMPILE_TESTS += sysfs_slab_unlink
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_fault_t

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2021 NVIDIA Corporation
Copyright (c) 2015-2022 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -41,73 +41,6 @@
static dev_t g_uvm_base_dev;
static struct cdev g_uvm_cdev;
// List of fault service contexts for CPU faults
static LIST_HEAD(g_cpu_service_block_context_list);
static uvm_spinlock_t g_cpu_service_block_context_list_lock;
NV_STATUS uvm_service_block_context_init(void)
{
unsigned num_preallocated_contexts = 4;
uvm_spin_lock_init(&g_cpu_service_block_context_list_lock, UVM_LOCK_ORDER_LEAF);
// Pre-allocate some fault service contexts for the CPU and add them to the global list
while (num_preallocated_contexts-- > 0) {
uvm_service_block_context_t *service_context = uvm_kvmalloc(sizeof(*service_context));
if (!service_context)
return NV_ERR_NO_MEMORY;
list_add(&service_context->cpu_fault.service_context_list, &g_cpu_service_block_context_list);
}
return NV_OK;
}
void uvm_service_block_context_exit(void)
{
uvm_service_block_context_t *service_context, *service_context_tmp;
// Free fault service contexts for the CPU and add clear the global list
list_for_each_entry_safe(service_context, service_context_tmp, &g_cpu_service_block_context_list,
cpu_fault.service_context_list) {
uvm_kvfree(service_context);
}
INIT_LIST_HEAD(&g_cpu_service_block_context_list);
}
// Get a fault service context from the global list or allocate a new one if there are no
// available entries
static uvm_service_block_context_t *uvm_service_block_context_cpu_alloc(void)
{
uvm_service_block_context_t *service_context;
uvm_spin_lock(&g_cpu_service_block_context_list_lock);
service_context = list_first_entry_or_null(&g_cpu_service_block_context_list, uvm_service_block_context_t,
cpu_fault.service_context_list);
if (service_context)
list_del(&service_context->cpu_fault.service_context_list);
uvm_spin_unlock(&g_cpu_service_block_context_list_lock);
if (!service_context)
service_context = uvm_kvmalloc(sizeof(*service_context));
return service_context;
}
// Put a fault service context in the global list
static void uvm_service_block_context_cpu_free(uvm_service_block_context_t *service_context)
{
uvm_spin_lock(&g_cpu_service_block_context_list_lock);
list_add(&service_context->cpu_fault.service_context_list, &g_cpu_service_block_context_list);
uvm_spin_unlock(&g_cpu_service_block_context_list_lock);
}
static int uvm_open(struct inode *inode, struct file *filp)
{
NV_STATUS status = uvm_global_get_status();
@ -489,139 +422,10 @@ static void uvm_vm_close_managed_entry(struct vm_area_struct *vma)
static vm_fault_t uvm_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
uvm_va_space_t *va_space = uvm_va_space_get(vma->vm_file);
uvm_va_block_t *va_block;
NvU64 fault_addr = nv_page_fault_va(vmf);
bool is_write = vmf->flags & FAULT_FLAG_WRITE;
NV_STATUS status = uvm_global_get_status();
bool tools_enabled;
bool major_fault = false;
uvm_service_block_context_t *service_context;
uvm_global_processor_mask_t gpus_to_check_for_ecc;
if (status != NV_OK)
goto convert_error;
// TODO: Bug 2583279: Lock tracking is disabled for the power management
// lock in order to suppress reporting of a lock policy violation.
// The violation consists in acquiring the power management lock multiple
// times, and it is manifested as an error during release. The
// re-acquisition of the power management locks happens upon re-entry in the
// UVM module, and it is benign on itself, but when combined with certain
// power management scenarios, it is indicative of a potential deadlock.
// Tracking will be re-enabled once the power management locking strategy is
// modified to avoid deadlocks.
if (!uvm_down_read_trylock_no_tracking(&g_uvm_global.pm.lock)) {
status = NV_ERR_BUSY_RETRY;
goto convert_error;
}
service_context = uvm_service_block_context_cpu_alloc();
if (!service_context) {
status = NV_ERR_NO_MEMORY;
goto unlock;
}
service_context->cpu_fault.wakeup_time_stamp = 0;
// The mmap_lock might be held in write mode, but the mode doesn't matter
// for the purpose of lock ordering and we don't rely on it being in write
// anywhere so just record it as read mode in all cases.
uvm_record_lock_mmap_lock_read(vma->vm_mm);
do {
bool do_sleep = false;
if (status == NV_WARN_MORE_PROCESSING_REQUIRED) {
NvU64 now = NV_GETTIME();
if (now < service_context->cpu_fault.wakeup_time_stamp)
do_sleep = true;
if (do_sleep)
uvm_tools_record_throttling_start(va_space, fault_addr, UVM_ID_CPU);
// Drop the VA space lock while we sleep
uvm_va_space_up_read(va_space);
// usleep_range is preferred because msleep has a 20ms granularity
// and udelay uses a busy-wait loop. usleep_range uses high-resolution
// timers and, by adding a range, the Linux scheduler may coalesce
// our wakeup with others, thus saving some interrupts.
if (do_sleep) {
unsigned long nap_us = (service_context->cpu_fault.wakeup_time_stamp - now) / 1000;
usleep_range(nap_us, nap_us + nap_us / 2);
}
}
uvm_va_space_down_read(va_space);
if (do_sleep)
uvm_tools_record_throttling_end(va_space, fault_addr, UVM_ID_CPU);
status = uvm_va_block_find_create_managed(va_space, fault_addr, &va_block);
if (status != NV_OK) {
UVM_ASSERT_MSG(status == NV_ERR_NO_MEMORY, "status: %s\n", nvstatusToString(status));
break;
}
// Watch out, current->mm might not be vma->vm_mm
UVM_ASSERT(vma == uvm_va_range_vma(va_block->va_range));
// Loop until thrashing goes away.
status = uvm_va_block_cpu_fault(va_block, fault_addr, is_write, service_context);
} while (status == NV_WARN_MORE_PROCESSING_REQUIRED);
if (status != NV_OK) {
UvmEventFatalReason reason;
reason = uvm_tools_status_to_fatal_fault_reason(status);
UVM_ASSERT(reason != UvmEventFatalReasonInvalid);
uvm_tools_record_cpu_fatal_fault(va_space, fault_addr, is_write, reason);
}
tools_enabled = va_space->tools.enabled;
if (status == NV_OK) {
uvm_va_space_global_gpus_in_mask(va_space,
&gpus_to_check_for_ecc,
&service_context->cpu_fault.gpus_to_check_for_ecc);
uvm_global_mask_retain(&gpus_to_check_for_ecc);
}
uvm_va_space_up_read(va_space);
uvm_record_unlock_mmap_lock_read(vma->vm_mm);
if (status == NV_OK) {
status = uvm_global_mask_check_ecc_error(&gpus_to_check_for_ecc);
uvm_global_mask_release(&gpus_to_check_for_ecc);
}
if (tools_enabled)
uvm_tools_flush_events();
// Major faults involve I/O in order to resolve the fault.
// If any pages were DMA'ed between the GPU and host memory, that makes it a major fault.
// A process can also get statistics for major and minor faults by calling readproc().
major_fault = service_context->cpu_fault.did_migrate;
uvm_service_block_context_cpu_free(service_context);
unlock:
// TODO: Bug 2583279: See the comment above the matching lock acquisition
uvm_up_read_no_tracking(&g_uvm_global.pm.lock);
convert_error:
switch (status) {
case NV_OK:
case NV_ERR_BUSY_RETRY:
return VM_FAULT_NOPAGE | (major_fault ? VM_FAULT_MAJOR : 0);
case NV_ERR_NO_MEMORY:
return VM_FAULT_OOM;
default:
return VM_FAULT_SIGBUS;
}
return uvm_va_space_cpu_fault_managed(va_space, vma, vmf);
}
static vm_fault_t uvm_vm_fault_entry(struct vm_area_struct *vma, struct vm_fault *vmf)
{
UVM_ENTRY_RET(uvm_vm_fault(vma, vmf));
@ -986,8 +790,6 @@ bool uvm_file_is_nvidia_uvm(struct file *filp)
NV_STATUS uvm_test_register_unload_state_buffer(UVM_TEST_REGISTER_UNLOAD_STATE_BUFFER_PARAMS *params, struct file *filp)
{
long ret;
int write = 1;
int force = 0;
struct page *page;
NV_STATUS status = NV_OK;
@ -998,7 +800,7 @@ NV_STATUS uvm_test_register_unload_state_buffer(UVM_TEST_REGISTER_UNLOAD_STATE_B
// are not used because unload_state_buf may be a managed memory pointer and
// therefore a locking assertion from the CPU fault handler could be fired.
nv_mmap_read_lock(current->mm);
ret = NV_GET_USER_PAGES(params->unload_state_buf, 1, write, force, &page, NULL);
ret = NV_PIN_USER_PAGES(params->unload_state_buf, 1, FOLL_WRITE, &page, NULL);
nv_mmap_read_unlock(current->mm);
if (ret < 0)
@ -1008,7 +810,7 @@ NV_STATUS uvm_test_register_unload_state_buffer(UVM_TEST_REGISTER_UNLOAD_STATE_B
uvm_mutex_lock(&g_uvm_global.global_lock);
if (g_uvm_global.unload_state.ptr) {
put_page(page);
NV_UNPIN_USER_PAGE(page);
status = NV_ERR_IN_USE;
goto error;
}
@ -1027,7 +829,7 @@ static void uvm_test_unload_state_exit(void)
{
if (g_uvm_global.unload_state.ptr) {
kunmap(g_uvm_global.unload_state.page);
put_page(g_uvm_global.unload_state.page);
NV_UNPIN_USER_PAGE(g_uvm_global.unload_state.page);
}
}

View File

@ -25,9 +25,62 @@
#include "uvm_ats_faults.h"
#include "uvm_migrate_pageable.h"
// TODO: Bug 2103669: Implement a real prefetching policy and remove or adapt
// these experimental parameters. These are intended to help guide that policy.
static unsigned int uvm_exp_perf_prefetch_ats_order_replayable = 0;
module_param(uvm_exp_perf_prefetch_ats_order_replayable, uint, 0644);
MODULE_PARM_DESC(uvm_exp_perf_prefetch_ats_order_replayable,
"Max order of pages (2^N) to prefetch on replayable ATS faults");
static unsigned int uvm_exp_perf_prefetch_ats_order_non_replayable = 0;
module_param(uvm_exp_perf_prefetch_ats_order_non_replayable, uint, 0644);
MODULE_PARM_DESC(uvm_exp_perf_prefetch_ats_order_non_replayable,
"Max order of pages (2^N) to prefetch on non-replayable ATS faults");
// Expand the fault region to the naturally-aligned region with order given by
// the module parameters, clamped to the vma containing fault_addr (if any).
// Note that this means the region contains fault_addr but may not begin at
// fault_addr.
static void expand_fault_region(struct mm_struct *mm,
NvU64 fault_addr,
uvm_fault_client_type_t client_type,
unsigned long *start,
unsigned long *size)
{
struct vm_area_struct *vma;
unsigned int order;
unsigned long outer, aligned_start, aligned_size;
*start = fault_addr;
*size = PAGE_SIZE;
if (client_type == UVM_FAULT_CLIENT_TYPE_HUB)
order = uvm_exp_perf_prefetch_ats_order_non_replayable;
else
order = uvm_exp_perf_prefetch_ats_order_replayable;
if (order == 0)
return;
vma = find_vma_intersection(mm, fault_addr, fault_addr + 1);
if (!vma)
return;
UVM_ASSERT(order < BITS_PER_LONG - PAGE_SHIFT);
aligned_size = (1UL << order) * PAGE_SIZE;
aligned_start = fault_addr & ~(aligned_size - 1);
*start = max(vma->vm_start, aligned_start);
outer = min(vma->vm_end, aligned_start + aligned_size);
*size = outer - *start;
}
static NV_STATUS uvm_ats_service_fault(uvm_gpu_va_space_t *gpu_va_space,
NvU64 fault_addr,
uvm_fault_access_type_t access_type)
uvm_fault_access_type_t access_type,
uvm_fault_client_type_t client_type)
{
uvm_va_space_t *va_space = gpu_va_space->va_space;
struct mm_struct *mm = va_space->va_space_mm.mm;
@ -66,8 +119,6 @@ static NV_STATUS uvm_ats_service_fault(uvm_gpu_va_space_t *gpu_va_space,
{
.va_space = va_space,
.mm = mm,
.start = fault_addr,
.length = PAGE_SIZE,
.dst_id = gpu_va_space->gpu->parent->id,
.dst_node_id = -1,
.populate_permissions = write ? UVM_POPULATE_PERMISSIONS_WRITE : UVM_POPULATE_PERMISSIONS_ANY,
@ -79,6 +130,8 @@ static NV_STATUS uvm_ats_service_fault(uvm_gpu_va_space_t *gpu_va_space,
UVM_ASSERT(uvm_ats_can_service_faults(gpu_va_space, mm));
expand_fault_region(mm, fault_addr, client_type, &uvm_migrate_args.start, &uvm_migrate_args.length);
// TODO: Bug 2103669: Service more than a single fault at a time
//
// We are trying to use migrate_vma API in the kernel (if it exists) to
@ -131,7 +184,10 @@ NV_STATUS uvm_ats_service_fault_entry(uvm_gpu_va_space_t *gpu_va_space,
}
else {
// TODO: Bug 2103669: Service more than a single fault at a time
status = uvm_ats_service_fault(gpu_va_space, current_entry->fault_address, service_access_type);
status = uvm_ats_service_fault(gpu_va_space,
current_entry->fault_address,
service_access_type,
current_entry->fault_source.client_type);
}
// Do not flag prefetch faults as fatal unless something fatal happened
@ -155,7 +211,8 @@ NV_STATUS uvm_ats_service_fault_entry(uvm_gpu_va_space_t *gpu_va_space,
uvm_fault_access_type_mask_test(current_entry->access_type_mask, UVM_FAULT_ACCESS_TYPE_READ)) {
status = uvm_ats_service_fault(gpu_va_space,
current_entry->fault_address,
UVM_FAULT_ACCESS_TYPE_READ);
UVM_FAULT_ACCESS_TYPE_READ,
current_entry->fault_source.client_type);
// If read accesses are also invalid, cancel the fault. If a
// different error code is returned, exit

View File

@ -24,6 +24,7 @@
#include "uvm_channel.h"
#include "uvm_api.h"
#include "uvm_common.h"
#include "uvm_global.h"
#include "uvm_hal.h"
#include "uvm_procfs.h"
@ -68,6 +69,38 @@ typedef enum
UVM_CHANNEL_UPDATE_MODE_FORCE_ALL
} uvm_channel_update_mode_t;
static void channel_pool_lock_init(uvm_channel_pool_t *pool)
{
if (uvm_channel_pool_is_proxy(pool))
uvm_mutex_init(&pool->mutex, UVM_LOCK_ORDER_CHANNEL);
else
uvm_spin_lock_init(&pool->spinlock, UVM_LOCK_ORDER_CHANNEL);
}
void uvm_channel_pool_lock(uvm_channel_pool_t *pool)
{
if (uvm_channel_pool_is_proxy(pool))
uvm_mutex_lock(&pool->mutex);
else
uvm_spin_lock(&pool->spinlock);
}
void uvm_channel_pool_unlock(uvm_channel_pool_t *pool)
{
if (uvm_channel_pool_is_proxy(pool))
uvm_mutex_unlock(&pool->mutex);
else
uvm_spin_unlock(&pool->spinlock);
}
void uvm_channel_pool_assert_locked(uvm_channel_pool_t *pool)
{
if (uvm_channel_pool_is_proxy(pool))
uvm_assert_mutex_locked(&pool->mutex);
else
uvm_assert_spinlock_locked(&pool->spinlock);
}
// Update channel progress, completing up to max_to_complete entries
static NvU32 uvm_channel_update_progress_with_max(uvm_channel_t *channel,
NvU32 max_to_complete,
@ -80,7 +113,7 @@ static NvU32 uvm_channel_update_progress_with_max(uvm_channel_t *channel,
NvU64 completed_value = uvm_channel_update_completed_value(channel);
uvm_spin_lock(&channel->pool->lock);
uvm_channel_pool_lock(channel->pool);
// Completed value should never exceed the queued value
UVM_ASSERT_MSG_RELEASE(completed_value <= channel->tracking_sem.queued_value,
@ -108,7 +141,7 @@ static NvU32 uvm_channel_update_progress_with_max(uvm_channel_t *channel,
channel->gpu_get = gpu_get;
uvm_spin_unlock(&channel->pool->lock);
uvm_channel_pool_unlock(channel->pool);
if (cpu_put >= gpu_get)
pending_gpfifos = cpu_put - gpu_get;
@ -157,7 +190,7 @@ static bool channel_is_available(uvm_channel_t *channel, NvU32 num_gpfifo_entrie
{
NvU32 pending_entries;
uvm_assert_spinlock_locked(&channel->pool->lock);
uvm_channel_pool_assert_locked(channel->pool);
if (channel->cpu_put >= channel->gpu_get)
pending_entries = channel->cpu_put - channel->gpu_get;
@ -174,14 +207,14 @@ static bool try_claim_channel(uvm_channel_t *channel, NvU32 num_gpfifo_entries)
UVM_ASSERT(num_gpfifo_entries > 0);
UVM_ASSERT(num_gpfifo_entries < channel->num_gpfifo_entries);
uvm_spin_lock(&channel->pool->lock);
uvm_channel_pool_lock(channel->pool);
if (channel_is_available(channel, num_gpfifo_entries)) {
channel->current_gpfifo_count += num_gpfifo_entries;
claimed = true;
}
uvm_spin_unlock(&channel->pool->lock);
uvm_channel_pool_unlock(channel->pool);
return claimed;
}
@ -248,7 +281,8 @@ static NV_STATUS channel_reserve_in_pool(uvm_channel_pool_t *pool, uvm_channel_t
NV_STATUS uvm_channel_reserve_type(uvm_channel_manager_t *manager, uvm_channel_type_t type, uvm_channel_t **channel_out)
{
UVM_ASSERT(type < UVM_CHANNEL_TYPE_COUNT);
UVM_ASSERT(type < UVM_CHANNEL_TYPE_COUNT);
return channel_reserve_in_pool(manager->pool_to_use.default_for_type[type], channel_out);
}
@ -289,14 +323,14 @@ static NvU32 channel_get_available_push_info_index(uvm_channel_t *channel)
{
uvm_push_info_t *push_info;
uvm_spin_lock(&channel->pool->lock);
uvm_channel_pool_lock(channel->pool);
push_info = list_first_entry_or_null(&channel->available_push_infos, uvm_push_info_t, available_list_node);
UVM_ASSERT(push_info != NULL);
UVM_ASSERT(push_info->on_complete == NULL && push_info->on_complete_data == NULL);
list_del(&push_info->available_list_node);
uvm_spin_unlock(&channel->pool->lock);
uvm_channel_pool_unlock(channel->pool);
return push_info - channel->push_infos;
}
@ -355,10 +389,6 @@ static void proxy_channel_submit_work(uvm_push_t *push, NvU32 push_size)
UVM_ASSERT(uvm_channel_is_proxy(channel));
// nvUvmInterfacePagingChannelPushStream should not sleep, because a
// spinlock is currently held.
uvm_assert_spinlock_locked(&channel->pool->lock);
status = nvUvmInterfacePagingChannelPushStream(channel->proxy.handle, (char *) push->begin, push_size);
if (status != NV_OK) {
@ -409,7 +439,7 @@ void uvm_channel_end_push(uvm_push_t *push)
NvU32 cpu_put;
NvU32 new_cpu_put;
uvm_spin_lock(&channel->pool->lock);
uvm_channel_pool_lock(channel->pool);
new_tracking_value = ++channel->tracking_sem.queued_value;
new_payload = (NvU32)new_tracking_value;
@ -446,7 +476,7 @@ void uvm_channel_end_push(uvm_push_t *push)
// may notice the GPU work to be completed and hence all state tracking the
// push must be updated before that. Notably uvm_pushbuffer_end_push() has
// to be called first.
uvm_spin_unlock(&channel->pool->lock);
uvm_channel_pool_unlock(channel->pool);
unlock_push(channel);
// This memory barrier is borrowed from CUDA, as it supposedly fixes perf
@ -470,7 +500,7 @@ static void write_ctrl_gpfifo(uvm_channel_t *channel, NvU64 ctrl_fifo_entry_valu
NvU32 new_cpu_put;
uvm_gpu_t *gpu = channel->pool->manager->gpu;
uvm_spin_lock(&channel->pool->lock);
uvm_channel_pool_lock(channel->pool);
cpu_put = channel->cpu_put;
new_cpu_put = (cpu_put + 1) % channel->num_gpfifo_entries;
@ -505,7 +535,7 @@ static void write_ctrl_gpfifo(uvm_channel_t *channel, NvU64 ctrl_fifo_entry_valu
// The moment the channel is unlocked uvm_channel_update_progress_with_max()
// may notice the GPU work to be completed and hence all state tracking the
// push must be updated before that.
uvm_spin_unlock(&channel->pool->lock);
uvm_channel_pool_unlock(channel->pool);
unlock_push(channel);
// This memory barrier is borrowed from CUDA, as it supposedly fixes perf
@ -591,12 +621,12 @@ static uvm_gpfifo_entry_t *uvm_channel_get_first_pending_entry(uvm_channel_t *ch
if (pending_count == 0)
return NULL;
uvm_spin_lock(&channel->pool->lock);
uvm_channel_pool_lock(channel->pool);
if (channel->gpu_get != channel->cpu_put)
entry = &channel->gpfifo_entries[channel->gpu_get];
uvm_spin_unlock(&channel->pool->lock);
uvm_channel_pool_unlock(channel->pool);
return entry;
}
@ -720,9 +750,9 @@ static void channel_destroy(uvm_channel_pool_t *pool, uvm_channel_t *channel)
channel_update_progress_all(channel, UVM_CHANNEL_UPDATE_MODE_FORCE_ALL);
}
uvm_procfs_destroy_entry(channel->procfs.pushes);
uvm_procfs_destroy_entry(channel->procfs.info);
uvm_procfs_destroy_entry(channel->procfs.dir);
proc_remove(channel->procfs.pushes);
proc_remove(channel->procfs.info);
proc_remove(channel->procfs.dir);
uvm_kvfree(channel->push_acquire_infos);
uvm_kvfree(channel->push_infos);
@ -977,7 +1007,7 @@ static NV_STATUS channel_pool_add(uvm_channel_manager_t *channel_manager,
pool->engine_index = engine_index;
pool->pool_type = pool_type;
uvm_spin_lock_init(&pool->lock, UVM_LOCK_ORDER_CHANNEL);
channel_pool_lock_init(pool);
num_channels = channel_pool_type_num_channels(pool_type);
@ -1482,11 +1512,11 @@ void uvm_channel_manager_destroy(uvm_channel_manager_t *channel_manager)
if (channel_manager == NULL)
return;
uvm_procfs_destroy_entry(channel_manager->procfs.pending_pushes);
proc_remove(channel_manager->procfs.pending_pushes);
channel_manager_destroy_pools(channel_manager);
uvm_procfs_destroy_entry(channel_manager->procfs.channels_dir);
proc_remove(channel_manager->procfs.channels_dir);
uvm_pushbuffer_destroy(channel_manager->pushbuffer);
@ -1583,7 +1613,7 @@ static void uvm_channel_print_info(uvm_channel_t *channel, struct seq_file *s)
uvm_channel_manager_t *manager = channel->pool->manager;
UVM_SEQ_OR_DBG_PRINT(s, "Channel %s\n", channel->name);
uvm_spin_lock(&channel->pool->lock);
uvm_channel_pool_lock(channel->pool);
UVM_SEQ_OR_DBG_PRINT(s, "completed %llu\n", uvm_channel_update_completed_value(channel));
UVM_SEQ_OR_DBG_PRINT(s, "queued %llu\n", channel->tracking_sem.queued_value);
@ -1595,7 +1625,7 @@ static void uvm_channel_print_info(uvm_channel_t *channel, struct seq_file *s)
UVM_SEQ_OR_DBG_PRINT(s, "Semaphore GPU VA 0x%llx\n", uvm_channel_tracking_semaphore_get_gpu_va(channel));
UVM_SEQ_OR_DBG_PRINT(s, "Semaphore CPU VA 0x%llx\n", (NvU64)(uintptr_t)channel->tracking_sem.semaphore.payload);
uvm_spin_unlock(&channel->pool->lock);
uvm_channel_pool_unlock(channel->pool);
}
static void channel_print_push_acquires(uvm_push_acquire_info_t *push_acquire_info, struct seq_file *seq)
@ -1639,7 +1669,7 @@ static void channel_print_pushes(uvm_channel_t *channel, NvU32 finished_pushes_c
NvU64 completed_value = uvm_channel_update_completed_value(channel);
uvm_spin_lock(&channel->pool->lock);
uvm_channel_pool_lock(channel->pool);
cpu_put = channel->cpu_put;
@ -1687,7 +1717,7 @@ static void channel_print_pushes(uvm_channel_t *channel, NvU32 finished_pushes_c
channel_print_push_acquires(push_acquire_info, seq);
}
}
uvm_spin_unlock(&channel->pool->lock);
uvm_channel_pool_unlock(channel->pool);
}
void uvm_channel_print_pending_pushes(uvm_channel_t *channel)

View File

@ -163,7 +163,11 @@ typedef struct
uvm_channel_pool_type_t pool_type;
// Lock protecting the state of channels in the pool
uvm_spinlock_t lock;
union {
uvm_spinlock_t spinlock;
uvm_mutex_t mutex;
};
} uvm_channel_pool_t;
struct uvm_channel_struct
@ -309,10 +313,20 @@ struct uvm_channel_manager_struct
// Create a channel manager for the GPU
NV_STATUS uvm_channel_manager_create(uvm_gpu_t *gpu, uvm_channel_manager_t **manager_out);
void uvm_channel_pool_lock(uvm_channel_pool_t *pool);
void uvm_channel_pool_unlock(uvm_channel_pool_t *pool);
void uvm_channel_pool_assert_locked(uvm_channel_pool_t *pool);
static bool uvm_channel_pool_is_proxy(uvm_channel_pool_t *pool)
{
UVM_ASSERT(pool->pool_type < UVM_CHANNEL_POOL_TYPE_MASK);
return pool->pool_type == UVM_CHANNEL_POOL_TYPE_CE_PROXY;
}
static bool uvm_channel_is_proxy(uvm_channel_t *channel)
{
UVM_ASSERT(channel->pool->pool_type < UVM_CHANNEL_POOL_TYPE_MASK);
return channel->pool->pool_type == UVM_CHANNEL_POOL_TYPE_CE_PROXY;
return uvm_channel_pool_is_proxy(channel->pool);
}
static bool uvm_channel_is_ce(uvm_channel_t *channel)

View File

@ -747,14 +747,14 @@ static NvU32 get_available_gpfifo_entries(uvm_channel_t *channel)
{
NvU32 pending_entries;
uvm_spin_lock(&channel->pool->lock);
uvm_channel_pool_lock(channel->pool);
if (channel->cpu_put >= channel->gpu_get)
pending_entries = channel->cpu_put - channel->gpu_get;
else
pending_entries = channel->cpu_put + channel->num_gpfifo_entries - channel->gpu_get;
uvm_spin_unlock(&channel->pool->lock);
uvm_channel_pool_unlock(channel->pool);
return channel->num_gpfifo_entries - pending_entries - 1;
}

View File

@ -186,8 +186,7 @@ static void uvm_global_remove_parent_gpu(uvm_parent_gpu_t *parent_gpu)
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
uvm_assert_spinlock_locked(&g_uvm_global.gpu_table_lock);
UVM_ASSERT(g_uvm_global.parent_gpus[gpu_index]);
UVM_ASSERT(g_uvm_global.parent_gpus[gpu_index] == parent_gpu);
UVM_ASSERT(g_uvm_global.parent_gpus[gpu_index] == NULL || g_uvm_global.parent_gpus[gpu_index] == parent_gpu);
g_uvm_global.parent_gpus[gpu_index] = NULL;
}

View File

@ -694,7 +694,7 @@ static NV_STATUS init_parent_procfs_dir(uvm_parent_gpu_t *parent_gpu)
static void deinit_parent_procfs_dir(uvm_parent_gpu_t *parent_gpu)
{
uvm_procfs_destroy_entry(parent_gpu->procfs.dir);
proc_remove(parent_gpu->procfs.dir);
}
static NV_STATUS init_parent_procfs_files(uvm_parent_gpu_t *parent_gpu)
@ -722,8 +722,8 @@ static NV_STATUS init_parent_procfs_files(uvm_parent_gpu_t *parent_gpu)
static void deinit_parent_procfs_files(uvm_parent_gpu_t *parent_gpu)
{
uvm_procfs_destroy_entry(parent_gpu->procfs.access_counters_file);
uvm_procfs_destroy_entry(parent_gpu->procfs.fault_stats_file);
proc_remove(parent_gpu->procfs.access_counters_file);
proc_remove(parent_gpu->procfs.fault_stats_file);
}
static NV_STATUS init_procfs_dirs(uvm_gpu_t *gpu)
@ -774,9 +774,9 @@ static NV_STATUS init_procfs_dirs(uvm_gpu_t *gpu)
// The kernel waits on readers to finish before returning from those calls
static void deinit_procfs_dirs(uvm_gpu_t *gpu)
{
uvm_procfs_destroy_entry(gpu->procfs.dir_peers);
uvm_procfs_destroy_entry(gpu->procfs.dir_symlink);
uvm_procfs_destroy_entry(gpu->procfs.dir);
proc_remove(gpu->procfs.dir_peers);
proc_remove(gpu->procfs.dir_symlink);
proc_remove(gpu->procfs.dir);
}
static NV_STATUS init_procfs_files(uvm_gpu_t *gpu)
@ -790,15 +790,15 @@ static NV_STATUS init_procfs_files(uvm_gpu_t *gpu)
static void deinit_procfs_files(uvm_gpu_t *gpu)
{
uvm_procfs_destroy_entry(gpu->procfs.info_file);
proc_remove(gpu->procfs.info_file);
}
static void deinit_procfs_peer_cap_files(uvm_gpu_peer_t *peer_caps)
{
uvm_procfs_destroy_entry(peer_caps->procfs.peer_symlink_file[0]);
uvm_procfs_destroy_entry(peer_caps->procfs.peer_symlink_file[1]);
uvm_procfs_destroy_entry(peer_caps->procfs.peer_file[0]);
uvm_procfs_destroy_entry(peer_caps->procfs.peer_file[1]);
proc_remove(peer_caps->procfs.peer_symlink_file[0]);
proc_remove(peer_caps->procfs.peer_symlink_file[1]);
proc_remove(peer_caps->procfs.peer_file[0]);
proc_remove(peer_caps->procfs.peer_file[1]);
}
static NV_STATUS init_semaphore_pool(uvm_gpu_t *gpu)
@ -3080,41 +3080,41 @@ void uvm_gpu_dma_free_page(uvm_parent_gpu_t *parent_gpu, void *va, NvU64 dma_add
atomic64_sub(PAGE_SIZE, &parent_gpu->mapped_cpu_pages_size);
}
NV_STATUS uvm_gpu_map_cpu_pages(uvm_gpu_t *gpu, struct page *page, size_t size, NvU64 *dma_address_out)
NV_STATUS uvm_gpu_map_cpu_pages(uvm_parent_gpu_t *parent_gpu, struct page *page, size_t size, NvU64 *dma_address_out)
{
NvU64 dma_addr;
UVM_ASSERT(PAGE_ALIGNED(size));
dma_addr = dma_map_page(&gpu->parent->pci_dev->dev, page, 0, size, DMA_BIDIRECTIONAL);
if (dma_mapping_error(&gpu->parent->pci_dev->dev, dma_addr))
dma_addr = dma_map_page(&parent_gpu->pci_dev->dev, page, 0, size, DMA_BIDIRECTIONAL);
if (dma_mapping_error(&parent_gpu->pci_dev->dev, dma_addr))
return NV_ERR_OPERATING_SYSTEM;
if (dma_addr < gpu->parent->dma_addressable_start ||
dma_addr + size - 1 > gpu->parent->dma_addressable_limit) {
dma_unmap_page(&gpu->parent->pci_dev->dev, dma_addr, size, DMA_BIDIRECTIONAL);
if (dma_addr < parent_gpu->dma_addressable_start ||
dma_addr + size - 1 > parent_gpu->dma_addressable_limit) {
dma_unmap_page(&parent_gpu->pci_dev->dev, dma_addr, size, DMA_BIDIRECTIONAL);
UVM_ERR_PRINT_RL("PCI mapped range [0x%llx, 0x%llx) not in the addressable range [0x%llx, 0x%llx), GPU %s\n",
dma_addr,
dma_addr + (NvU64)size,
gpu->parent->dma_addressable_start,
gpu->parent->dma_addressable_limit + 1,
uvm_gpu_name(gpu));
parent_gpu->dma_addressable_start,
parent_gpu->dma_addressable_limit + 1,
parent_gpu->name);
return NV_ERR_INVALID_ADDRESS;
}
atomic64_add(size, &gpu->parent->mapped_cpu_pages_size);
*dma_address_out = dma_addr_to_gpu_addr(gpu->parent, dma_addr);
atomic64_add(size, &parent_gpu->mapped_cpu_pages_size);
*dma_address_out = dma_addr_to_gpu_addr(parent_gpu, dma_addr);
return NV_OK;
}
void uvm_gpu_unmap_cpu_pages(uvm_gpu_t *gpu, NvU64 dma_address, size_t size)
void uvm_gpu_unmap_cpu_pages(uvm_parent_gpu_t *parent_gpu, NvU64 dma_address, size_t size)
{
UVM_ASSERT(PAGE_ALIGNED(size));
dma_address = gpu_addr_to_dma_addr(gpu->parent, dma_address);
dma_unmap_page(&gpu->parent->pci_dev->dev, dma_address, size, DMA_BIDIRECTIONAL);
atomic64_sub(size, &gpu->parent->mapped_cpu_pages_size);
dma_address = gpu_addr_to_dma_addr(parent_gpu, dma_address);
dma_unmap_page(&parent_gpu->pci_dev->dev, dma_address, size, DMA_BIDIRECTIONAL);
atomic64_sub(size, &parent_gpu->mapped_cpu_pages_size);
}
// This function implements the UvmRegisterGpu API call, as described in uvm.h.

View File

@ -44,6 +44,7 @@
#include "uvm_va_block_types.h"
#include "uvm_perf_module.h"
#include "uvm_rb_tree.h"
#include "uvm_perf_prefetch.h"
#include "nv-kthread-q.h"
// Buffer length to store uvm gpu id, RM device name and gpu uuid.
@ -159,6 +160,12 @@ struct uvm_service_block_context_struct
// State used by the VA block routines called by the servicing routine
uvm_va_block_context_t block_context;
// Prefetch state hint
uvm_perf_prefetch_hint_t prefetch_hint;
// Prefetch temporary state.
uvm_perf_prefetch_bitmap_tree_t prefetch_bitmap_tree;
};
struct uvm_fault_service_batch_context_struct
@ -374,6 +381,16 @@ struct uvm_access_counter_service_batch_context_struct
// determine at fetch time that all the access counter notifications in the
// batch report the same instance_ptr
bool is_single_instance_ptr;
// Scratch space, used to generate artificial physically addressed notifications.
// Virtual address notifications are always aligned to 64k. This means up to 16
// different physical locations could have been accessed to trigger one notification.
// The sub-granularity mask can correspond to any of them.
struct {
uvm_processor_id_t resident_processors[16];
uvm_gpu_phys_address_t phys_addresses[16];
uvm_access_counter_buffer_entry_t phys_entry;
} scratch;
} virt;
struct
@ -1309,19 +1326,19 @@ NV_STATUS uvm_gpu_check_ecc_error_no_rm(uvm_gpu_t *gpu);
//
// Returns the physical address of the pages that can be used to access them on
// the GPU.
NV_STATUS uvm_gpu_map_cpu_pages(uvm_gpu_t *gpu, struct page *page, size_t size, NvU64 *dma_address_out);
NV_STATUS uvm_gpu_map_cpu_pages(uvm_parent_gpu_t *parent_gpu, struct page *page, size_t size, NvU64 *dma_address_out);
// Unmap num_pages pages previously mapped with uvm_gpu_map_cpu_pages().
void uvm_gpu_unmap_cpu_pages(uvm_gpu_t *gpu, NvU64 dma_address, size_t size);
void uvm_gpu_unmap_cpu_pages(uvm_parent_gpu_t *parent_gpu, NvU64 dma_address, size_t size);
static NV_STATUS uvm_gpu_map_cpu_page(uvm_gpu_t *gpu, struct page *page, NvU64 *dma_address_out)
static NV_STATUS uvm_gpu_map_cpu_page(uvm_parent_gpu_t *parent_gpu, struct page *page, NvU64 *dma_address_out)
{
return uvm_gpu_map_cpu_pages(gpu, page, PAGE_SIZE, dma_address_out);
return uvm_gpu_map_cpu_pages(parent_gpu, page, PAGE_SIZE, dma_address_out);
}
static void uvm_gpu_unmap_cpu_page(uvm_gpu_t *gpu, NvU64 dma_address)
static void uvm_gpu_unmap_cpu_page(uvm_parent_gpu_t *parent_gpu, NvU64 dma_address)
{
uvm_gpu_unmap_cpu_pages(gpu, dma_address, PAGE_SIZE);
uvm_gpu_unmap_cpu_pages(parent_gpu, dma_address, PAGE_SIZE);
}
// Allocate and map a page of system DMA memory on the GPU for physical access

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2017-2021 NVIDIA Corporation
Copyright (c) 2017-2022 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -41,6 +41,10 @@
#define UVM_PERF_ACCESS_COUNTER_THRESHOLD_MAX ((1 << 16) - 1)
#define UVM_PERF_ACCESS_COUNTER_THRESHOLD_DEFAULT 256
#define UVM_ACCESS_COUNTER_ACTION_NOTIFY 0x1
#define UVM_ACCESS_COUNTER_ACTION_CLEAR 0x2
#define UVM_ACCESS_COUNTER_ON_MANAGED 0x4
// Each page in a tracked physical range may belong to a different VA Block. We
// preallocate an array of reverse map translations. However, access counter
// granularity can be set to up to 16G, which would require an array too large
@ -934,25 +938,6 @@ static void preprocess_virt_notifications(uvm_gpu_t *gpu,
translate_virt_notifications_instance_ptrs(gpu, batch_context);
}
static NV_STATUS service_virt_notifications(uvm_gpu_t *gpu,
uvm_access_counter_service_batch_context_t *batch_context)
{
// TODO: Bug 1990466: Service virtual notifications. Entries with NULL
// va_space are simply dropped.
if (uvm_enable_builtin_tests) {
NvU32 i;
preprocess_virt_notifications(gpu, batch_context);
for (i = 0; i < batch_context->virt.num_notifications; ++i) {
const bool on_managed = false;
uvm_tools_broadcast_access_counter(gpu, batch_context->virt.notifications[i], on_managed);
}
}
return NV_OK;
}
// GPA notifications provide a physical address and an aperture. Sort
// accesses by aperture to try to coalesce operations on the same target
// processor.
@ -1046,9 +1031,19 @@ static NV_STATUS service_va_block_locked(uvm_processor_id_t processor,
uvm_page_mask_set(&service_context->thrashing_pin_mask, page_index);
}
// If the underlying VMA is gone, skip HMM migrations.
if (uvm_va_block_is_hmm(va_block)) {
status = uvm_hmm_find_vma(&service_context->block_context, address);
if (status == NV_ERR_INVALID_ADDRESS)
continue;
UVM_ASSERT(status == NV_OK);
}
service_context->block_context.policy = uvm_va_policy_get(va_block, address);
new_residency = uvm_va_block_select_residency(va_block,
&service_context->block_context,
page_index,
processor,
uvm_fault_access_type_mask_bit(UVM_FAULT_ACCESS_TYPE_PREFETCH),
@ -1158,7 +1153,7 @@ static NV_STATUS service_phys_single_va_block(uvm_gpu_t *gpu,
const uvm_access_counter_buffer_entry_t *current_entry,
const uvm_reverse_map_t *reverse_mappings,
size_t num_reverse_mappings,
bool *clear_counter)
unsigned *out_flags)
{
size_t index;
uvm_va_block_t *va_block = reverse_mappings[0].va_block;
@ -1168,7 +1163,7 @@ static NV_STATUS service_phys_single_va_block(uvm_gpu_t *gpu,
const uvm_processor_id_t processor = current_entry->counter_type == UVM_ACCESS_COUNTER_TYPE_MIMC?
gpu->id: UVM_ID_CPU;
*clear_counter = false;
*out_flags &= ~UVM_ACCESS_COUNTER_ACTION_CLEAR;
UVM_ASSERT(num_reverse_mappings > 0);
@ -1217,7 +1212,7 @@ static NV_STATUS service_phys_single_va_block(uvm_gpu_t *gpu,
uvm_mutex_unlock(&va_block->lock);
if (status == NV_OK)
*clear_counter = true;
*out_flags |= UVM_ACCESS_COUNTER_ACTION_CLEAR;
}
done:
@ -1238,25 +1233,26 @@ static NV_STATUS service_phys_va_blocks(uvm_gpu_t *gpu,
const uvm_access_counter_buffer_entry_t *current_entry,
const uvm_reverse_map_t *reverse_mappings,
size_t num_reverse_mappings,
bool *clear_counter)
unsigned *out_flags)
{
NV_STATUS status = NV_OK;
size_t index;
*clear_counter = false;
*out_flags &= ~UVM_ACCESS_COUNTER_ACTION_CLEAR;
for (index = 0; index < num_reverse_mappings; ++index) {
bool clear_counter_local = false;
unsigned out_flags_local = 0;
status = service_phys_single_va_block(gpu,
batch_context,
current_entry,
reverse_mappings + index,
1,
&clear_counter_local);
&out_flags_local);
if (status != NV_OK)
break;
*clear_counter = *clear_counter || clear_counter_local;
UVM_ASSERT((out_flags_local & ~UVM_ACCESS_COUNTER_ACTION_CLEAR) == 0);
*out_flags |= out_flags_local;
}
// In the case of failure, drop the refcounts for the remaining reverse mappings
@ -1267,18 +1263,13 @@ static NV_STATUS service_phys_va_blocks(uvm_gpu_t *gpu,
}
// Iterate over all regions set in the given sub_granularity mask
#define for_each_sub_granularity_region(region_start, region_end, sub_granularity, config) \
for ((region_start) = find_first_bit(&(sub_granularity), (config)->sub_granularity_regions_per_translation), \
(region_end) = find_next_zero_bit(&(sub_granularity), \
(config)->sub_granularity_regions_per_translation, \
(region_start) + 1); \
(region_start) < config->sub_granularity_regions_per_translation; \
(region_start) = find_next_bit(&(sub_granularity), \
(config)->sub_granularity_regions_per_translation, \
(region_end) + 1), \
(region_end) = find_next_zero_bit(&(sub_granularity), \
(config)->sub_granularity_regions_per_translation, \
(region_start) + 1))
#define for_each_sub_granularity_region(region_start, region_end, sub_granularity, num_regions) \
for ((region_start) = find_first_bit(&(sub_granularity), (num_regions)), \
(region_end) = find_next_zero_bit(&(sub_granularity), (num_regions), (region_start) + 1); \
(region_start) < (num_regions); \
(region_start) = find_next_bit(&(sub_granularity), (num_regions), (region_end) + 1), \
(region_end) = find_next_zero_bit(&(sub_granularity), (num_regions), (region_start) + 1))
static bool are_reverse_mappings_on_single_block(const uvm_reverse_map_t *reverse_mappings, size_t num_reverse_mappings)
{
@ -1309,7 +1300,7 @@ static NV_STATUS service_phys_notification_translation(uvm_gpu_t *gpu,
NvU64 address,
unsigned long sub_granularity,
size_t *num_reverse_mappings,
bool *clear_counter)
unsigned *out_flags)
{
NV_STATUS status;
NvU32 region_start, region_end;
@ -1318,7 +1309,7 @@ static NV_STATUS service_phys_notification_translation(uvm_gpu_t *gpu,
// Get the reverse_map translations for all the regions set in the
// sub_granularity field of the counter.
for_each_sub_granularity_region(region_start, region_end, sub_granularity, config) {
for_each_sub_granularity_region(region_start, region_end, sub_granularity, config->sub_granularity_regions_per_translation) {
NvU64 local_address = address + region_start * config->sub_granularity_region_size;
NvU32 local_translation_size = (region_end - region_start) * config->sub_granularity_region_size;
uvm_reverse_map_t *local_reverse_mappings = batch_context->phys.translations + *num_reverse_mappings;
@ -1350,7 +1341,7 @@ static NV_STATUS service_phys_notification_translation(uvm_gpu_t *gpu,
current_entry,
batch_context->phys.translations,
*num_reverse_mappings,
clear_counter);
out_flags);
}
else {
status = service_phys_va_blocks(gpu,
@ -1358,7 +1349,7 @@ static NV_STATUS service_phys_notification_translation(uvm_gpu_t *gpu,
current_entry,
batch_context->phys.translations,
*num_reverse_mappings,
clear_counter);
out_flags);
}
return status;
@ -1366,7 +1357,8 @@ static NV_STATUS service_phys_notification_translation(uvm_gpu_t *gpu,
static NV_STATUS service_phys_notification(uvm_gpu_t *gpu,
uvm_access_counter_service_batch_context_t *batch_context,
const uvm_access_counter_buffer_entry_t *current_entry)
const uvm_access_counter_buffer_entry_t *current_entry,
unsigned *out_flags)
{
NvU64 address;
NvU64 translation_index;
@ -1377,7 +1369,7 @@ static NV_STATUS service_phys_notification(uvm_gpu_t *gpu,
size_t total_reverse_mappings = 0;
uvm_gpu_t *resident_gpu = NULL;
NV_STATUS status = NV_OK;
bool clear_counter = false;
unsigned flags = 0;
address = current_entry->address.address;
UVM_ASSERT(address % config->translation_size == 0);
@ -1405,7 +1397,7 @@ static NV_STATUS service_phys_notification(uvm_gpu_t *gpu,
for (translation_index = 0; translation_index < config->translations_per_counter; ++translation_index) {
size_t num_reverse_mappings;
bool clear_counter_local = false;
unsigned out_flags_local = 0;
status = service_phys_notification_translation(gpu,
resident_gpu,
batch_context,
@ -1414,9 +1406,11 @@ static NV_STATUS service_phys_notification(uvm_gpu_t *gpu,
address,
sub_granularity,
&num_reverse_mappings,
&clear_counter_local);
&out_flags_local);
total_reverse_mappings += num_reverse_mappings;
clear_counter = clear_counter || clear_counter_local;
UVM_ASSERT((out_flags_local & ~UVM_ACCESS_COUNTER_ACTION_CLEAR) == 0);
flags |= out_flags_local;
if (status != NV_OK)
break;
@ -1425,17 +1419,14 @@ static NV_STATUS service_phys_notification(uvm_gpu_t *gpu,
sub_granularity = sub_granularity >> config->sub_granularity_regions_per_translation;
}
// TODO: Bug 1990466: Here we already have virtual addresses and
// address spaces. Merge virtual and physical notification handling
// Currently we only report events for our tests, not for tools
if (uvm_enable_builtin_tests) {
const bool on_managed = total_reverse_mappings != 0;
uvm_tools_broadcast_access_counter(gpu, current_entry, on_managed);
*out_flags |= UVM_ACCESS_COUNTER_ACTION_NOTIFY;
*out_flags |= ((total_reverse_mappings != 0) ? UVM_ACCESS_COUNTER_ON_MANAGED : 0);
}
if (status == NV_OK && clear_counter)
status = access_counter_clear_targeted(gpu, current_entry);
if (status == NV_OK && (flags & UVM_ACCESS_COUNTER_ACTION_CLEAR))
*out_flags |= UVM_ACCESS_COUNTER_ACTION_CLEAR;
return status;
}
@ -1450,11 +1441,18 @@ static NV_STATUS service_phys_notifications(uvm_gpu_t *gpu,
for (i = 0; i < batch_context->phys.num_notifications; ++i) {
NV_STATUS status;
uvm_access_counter_buffer_entry_t *current_entry = batch_context->phys.notifications[i];
unsigned flags = 0;
if (!UVM_ID_IS_VALID(current_entry->physical_info.resident_id))
continue;
status = service_phys_notification(gpu, batch_context, current_entry);
status = service_phys_notification(gpu, batch_context, current_entry, &flags);
if (flags & UVM_ACCESS_COUNTER_ACTION_NOTIFY)
uvm_tools_broadcast_access_counter(gpu, current_entry, flags & UVM_ACCESS_COUNTER_ON_MANAGED);
if (status == NV_OK && (flags & UVM_ACCESS_COUNTER_ACTION_CLEAR))
status = access_counter_clear_targeted(gpu, current_entry);
if (status != NV_OK)
return status;
}
@ -1462,6 +1460,191 @@ static NV_STATUS service_phys_notifications(uvm_gpu_t *gpu,
return NV_OK;
}
static int cmp_sort_gpu_phys_addr(const void *_a, const void *_b)
{
return uvm_gpu_phys_addr_cmp(*(uvm_gpu_phys_address_t*)_a,
*(uvm_gpu_phys_address_t*)_b);
}
static bool gpu_phys_same_region(uvm_gpu_phys_address_t a, uvm_gpu_phys_address_t b, NvU64 granularity)
{
if (a.aperture != b.aperture)
return false;
UVM_ASSERT(is_power_of_2(granularity));
return UVM_ALIGN_DOWN(a.address, granularity) == UVM_ALIGN_DOWN(b.address, granularity);
}
static bool phys_address_in_accessed_sub_region(uvm_gpu_phys_address_t address,
NvU64 region_size,
NvU64 sub_region_size,
NvU32 accessed_mask)
{
const unsigned accessed_index = (address.address % region_size) / sub_region_size;
// accessed_mask is only filled for tracking granularities larger than 64K
if (region_size == UVM_PAGE_SIZE_64K)
return true;
UVM_ASSERT(accessed_index < 32);
return ((1 << accessed_index) & accessed_mask) != 0;
}
static NV_STATUS service_virt_notification(uvm_gpu_t *gpu,
uvm_access_counter_service_batch_context_t *batch_context,
const uvm_access_counter_buffer_entry_t *current_entry,
unsigned *out_flags)
{
NV_STATUS status = NV_OK;
NvU64 notification_size;
NvU64 address;
uvm_processor_id_t *resident_processors = batch_context->virt.scratch.resident_processors;
uvm_gpu_phys_address_t *phys_addresses = batch_context->virt.scratch.phys_addresses;
int num_addresses = 0;
int i;
// Virtual address notifications are always 64K aligned
NvU64 region_start = current_entry->address.address;
NvU64 region_end = current_entry->address.address + UVM_PAGE_SIZE_64K;
uvm_access_counter_buffer_info_t *access_counters = &gpu->parent->access_counter_buffer_info;
uvm_access_counter_type_t counter_type = current_entry->counter_type;
const uvm_gpu_access_counter_type_config_t *config = get_config_for_type(access_counters, counter_type);
uvm_va_space_t *va_space = current_entry->virtual_info.va_space;
UVM_ASSERT(counter_type == UVM_ACCESS_COUNTER_TYPE_MIMC);
// Entries with NULL va_space are simply dropped.
if (!va_space)
return NV_OK;
status = config_granularity_to_bytes(config->rm.granularity, &notification_size);
if (status != NV_OK)
return status;
// Collect physical locations that could have been touched
// in the reported 64K VA region. The notification mask can
// correspond to any of them.
uvm_va_space_down_read(va_space);
for (address = region_start; address < region_end;) {
uvm_va_block_t *va_block;
NV_STATUS local_status = uvm_va_block_find(va_space, address, &va_block);
if (local_status == NV_ERR_INVALID_ADDRESS || local_status == NV_ERR_OBJECT_NOT_FOUND) {
address += PAGE_SIZE;
continue;
}
uvm_mutex_lock(&va_block->lock);
while (address < va_block->end && address < region_end) {
const unsigned page_index = uvm_va_block_cpu_page_index(va_block, address);
// UVM va_block always maps the closest resident location to processor
const uvm_processor_id_t res_id = uvm_va_block_page_get_closest_resident(va_block, page_index, gpu->id);
// Add physical location if it's valid and not local vidmem
if (UVM_ID_IS_VALID(res_id) && !uvm_id_equal(res_id, gpu->id)) {
uvm_gpu_phys_address_t phys_address = uvm_va_block_res_phys_page_address(va_block, page_index, res_id, gpu);
if (phys_address_in_accessed_sub_region(phys_address,
notification_size,
config->sub_granularity_region_size,
current_entry->sub_granularity)) {
resident_processors[num_addresses] = res_id;
phys_addresses[num_addresses] = phys_address;
++num_addresses;
}
else {
UVM_DBG_PRINT_RL("Skipping phys address %llx:%s, because it couldn't have been accessed in mask %x",
phys_address.address,
uvm_aperture_string(phys_address.aperture),
current_entry->sub_granularity);
}
}
address += PAGE_SIZE;
}
uvm_mutex_unlock(&va_block->lock);
}
uvm_va_space_up_read(va_space);
// The addresses need to be sorted to aid coalescing.
sort(phys_addresses,
num_addresses,
sizeof(*phys_addresses),
cmp_sort_gpu_phys_addr,
NULL);
for (i = 0; i < num_addresses; ++i) {
uvm_access_counter_buffer_entry_t *fake_entry = &batch_context->virt.scratch.phys_entry;
// Skip the current pointer if the physical region was already handled
if (i > 0 && gpu_phys_same_region(phys_addresses[i - 1], phys_addresses[i], notification_size)) {
UVM_ASSERT(uvm_id_equal(resident_processors[i - 1], resident_processors[i]));
continue;
}
UVM_DBG_PRINT_RL("Faking MIMC address[%i/%i]: %llx (granularity mask: %llx) in aperture %s on device %s\n",
i,
num_addresses,
phys_addresses[i].address,
notification_size - 1,
uvm_aperture_string(phys_addresses[i].aperture),
uvm_gpu_name(gpu));
// Construct a fake phys addr AC entry
fake_entry->counter_type = current_entry->counter_type;
fake_entry->address.address = UVM_ALIGN_DOWN(phys_addresses[i].address, notification_size);
fake_entry->address.aperture = phys_addresses[i].aperture;
fake_entry->address.is_virtual = false;
fake_entry->physical_info.resident_id = resident_processors[i];
fake_entry->counter_value = current_entry->counter_value;
fake_entry->sub_granularity = current_entry->sub_granularity;
status = service_phys_notification(gpu, batch_context, fake_entry, out_flags);
if (status != NV_OK)
break;
}
return status;
}
static NV_STATUS service_virt_notifications(uvm_gpu_t *gpu,
uvm_access_counter_service_batch_context_t *batch_context)
{
NvU32 i;
NV_STATUS status = NV_OK;
preprocess_virt_notifications(gpu, batch_context);
for (i = 0; i < batch_context->virt.num_notifications; ++i) {
unsigned flags = 0;
uvm_access_counter_buffer_entry_t *current_entry = batch_context->virt.notifications[i];
status = service_virt_notification(gpu, batch_context, current_entry, &flags);
UVM_DBG_PRINT_RL("Processed virt access counter (%d/%d): %sMANAGED (status: %d) clear: %s\n",
i + 1,
batch_context->virt.num_notifications,
(flags & UVM_ACCESS_COUNTER_ON_MANAGED) ? "" : "NOT ",
status,
(flags & UVM_ACCESS_COUNTER_ACTION_CLEAR) ? "YES" : "NO");
if (uvm_enable_builtin_tests)
uvm_tools_broadcast_access_counter(gpu, current_entry, flags & UVM_ACCESS_COUNTER_ON_MANAGED);
if (status == NV_OK && (flags & UVM_ACCESS_COUNTER_ACTION_CLEAR))
status = access_counter_clear_targeted(gpu, current_entry);
if (status != NV_OK)
break;
}
return status;
}
void uvm_gpu_service_access_counters(uvm_gpu_t *gpu)
{
NV_STATUS status = NV_OK;

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2017-2021 NVIDIA Corporation
Copyright (c) 2017-2022 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -338,7 +338,6 @@ static NV_STATUS service_managed_fault_in_block_locked(uvm_gpu_t *gpu,
uvm_processor_id_t new_residency;
bool read_duplicate;
uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
uvm_va_range_t *va_range = va_block->va_range;
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &gpu->parent->fault_buffer_info.non_replayable;
UVM_ASSERT(!fault_entry->is_fatal);
@ -365,8 +364,11 @@ static NV_STATUS service_managed_fault_in_block_locked(uvm_gpu_t *gpu,
}
// Check logical permissions
status = uvm_va_range_check_logical_permissions(va_range,
status = uvm_va_block_check_logical_permissions(va_block,
&service_context->block_context,
gpu->id,
uvm_va_block_cpu_page_index(va_block,
fault_entry->fault_address),
fault_entry->fault_access_type,
uvm_range_group_address_migratable(va_space,
fault_entry->fault_address));
@ -386,6 +388,7 @@ static NV_STATUS service_managed_fault_in_block_locked(uvm_gpu_t *gpu,
// Compute new residency and update the masks
new_residency = uvm_va_block_select_residency(va_block,
&service_context->block_context,
page_index,
gpu->id,
fault_entry->access_type_mask,
@ -422,7 +425,6 @@ static NV_STATUS service_managed_fault_in_block_locked(uvm_gpu_t *gpu,
}
static NV_STATUS service_managed_fault_in_block(uvm_gpu_t *gpu,
struct mm_struct *mm,
uvm_va_block_t *va_block,
uvm_fault_buffer_entry_t *fault_entry)
{
@ -432,7 +434,6 @@ static NV_STATUS service_managed_fault_in_block(uvm_gpu_t *gpu,
service_context->operation = UVM_SERVICE_OPERATION_NON_REPLAYABLE_FAULTS;
service_context->num_retries = 0;
service_context->block_context.mm = mm;
uvm_mutex_lock(&va_block->lock);
@ -598,6 +599,7 @@ static NV_STATUS service_fault(uvm_gpu_t *gpu, uvm_fault_buffer_entry_t *fault_e
// to remain valid until we release. If no mm is registered, we
// can only service managed faults, not ATS/HMM faults.
mm = uvm_va_space_mm_retain_lock(va_space);
va_block_context->mm = mm;
uvm_va_space_down_read(va_space);
@ -622,12 +624,11 @@ static NV_STATUS service_fault(uvm_gpu_t *gpu, uvm_fault_buffer_entry_t *fault_e
if (!fault_entry->is_fatal) {
status = uvm_va_block_find_create(fault_entry->va_space,
mm,
fault_entry->fault_address,
va_block_context,
&va_block);
if (status == NV_OK)
status = service_managed_fault_in_block(gpu_va_space->gpu, mm, va_block, fault_entry);
status = service_managed_fault_in_block(gpu_va_space->gpu, va_block, fault_entry);
else
status = service_non_managed_fault(gpu_va_space, mm, fault_entry, status);

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2021 NVIDIA Corporation
Copyright (c) 2015-2022 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -1055,13 +1055,17 @@ static NV_STATUS preprocess_fault_batch(uvm_gpu_t *gpu, uvm_fault_service_batch_
// - service_access_type: highest access type that can be serviced.
static uvm_fault_access_type_t check_fault_access_permissions(uvm_gpu_t *gpu,
uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
uvm_fault_buffer_entry_t *fault_entry,
bool allow_migration)
{
NV_STATUS perm_status;
perm_status = uvm_va_range_check_logical_permissions(va_block->va_range,
perm_status = uvm_va_block_check_logical_permissions(va_block,
va_block_context,
gpu->id,
uvm_va_block_cpu_page_index(va_block,
fault_entry->fault_address),
fault_entry->fault_access_type,
allow_migration);
if (perm_status == NV_OK)
@ -1083,8 +1087,11 @@ static uvm_fault_access_type_t check_fault_access_permissions(uvm_gpu_t *gpu,
// service them before we can cancel the write/atomic faults. So we
// retry with read fault access type.
if (uvm_fault_access_type_mask_test(fault_entry->access_type_mask, UVM_FAULT_ACCESS_TYPE_READ)) {
perm_status = uvm_va_range_check_logical_permissions(va_block->va_range,
perm_status = uvm_va_block_check_logical_permissions(va_block,
va_block_context,
gpu->id,
uvm_va_block_cpu_page_index(va_block,
fault_entry->fault_address),
UVM_FAULT_ACCESS_TYPE_READ,
allow_migration);
if (perm_status == NV_OK)
@ -1156,14 +1163,16 @@ static NV_STATUS service_batch_managed_faults_in_block_locked(uvm_gpu_t *gpu,
UVM_ASSERT(ordered_fault_cache[first_fault_index]->fault_address >= va_block->start);
UVM_ASSERT(ordered_fault_cache[first_fault_index]->fault_address <= va_block->end);
end = va_block->end;
if (uvm_va_block_is_hmm(va_block))
if (uvm_va_block_is_hmm(va_block)) {
uvm_hmm_find_policy_end(va_block,
&block_context->block_context,
ordered_fault_cache[first_fault_index]->fault_address,
&end);
else
}
else {
block_context->block_context.policy = uvm_va_range_get_policy(va_block->va_range);
end = va_block->end;
}
// Scan the sorted array and notify the fault event for all fault entries
// in the block
@ -1226,7 +1235,11 @@ static NV_STATUS service_batch_managed_faults_in_block_locked(uvm_gpu_t *gpu,
UVM_ASSERT(iter.start <= current_entry->fault_address && iter.end >= current_entry->fault_address);
service_access_type = check_fault_access_permissions(gpu, va_block, current_entry, iter.migratable);
service_access_type = check_fault_access_permissions(gpu,
va_block,
&block_context->block_context,
current_entry,
iter.migratable);
// Do not exit early due to logical errors such as access permission
// violation.
@ -1269,6 +1282,7 @@ static NV_STATUS service_batch_managed_faults_in_block_locked(uvm_gpu_t *gpu,
// Compute new residency and update the masks
new_residency = uvm_va_block_select_residency(va_block,
&block_context->block_context,
page_index,
gpu->id,
service_access_type_mask,
@ -1348,7 +1362,6 @@ static NV_STATUS service_batch_managed_faults_in_block_locked(uvm_gpu_t *gpu,
// See the comments for function service_fault_batch_block_locked for
// implementation details and error codes.
static NV_STATUS service_batch_managed_faults_in_block(uvm_gpu_t *gpu,
struct mm_struct *mm,
uvm_va_block_t *va_block,
NvU32 first_fault_index,
uvm_fault_service_batch_context_t *batch_context,
@ -1361,7 +1374,6 @@ static NV_STATUS service_batch_managed_faults_in_block(uvm_gpu_t *gpu,
fault_block_context->operation = UVM_SERVICE_OPERATION_REPLAYABLE_FAULTS;
fault_block_context->num_retries = 0;
fault_block_context->block_context.mm = mm;
uvm_mutex_lock(&va_block->lock);
@ -1531,6 +1543,7 @@ static NV_STATUS service_fault_batch(uvm_gpu_t *gpu,
// to remain valid until we release. If no mm is registered, we
// can only service managed faults, not ATS/HMM faults.
mm = uvm_va_space_mm_retain_lock(va_space);
va_block_context->mm = mm;
uvm_va_space_down_read(va_space);
@ -1576,13 +1589,11 @@ static NV_STATUS service_fault_batch(uvm_gpu_t *gpu,
// TODO: Bug 2103669: Service more than one ATS fault at a time so we
// don't do an unconditional VA range lookup for every ATS fault.
status = uvm_va_block_find_create(va_space,
mm,
current_entry->fault_address,
va_block_context,
&va_block);
if (status == NV_OK) {
status = service_batch_managed_faults_in_block(gpu_va_space->gpu,
mm,
va_block,
i,
batch_context,

View File

@ -118,6 +118,13 @@ static bool is_canary(NvU32 val)
return (val & ~UVM_SEMAPHORE_CANARY_MASK) == UVM_SEMAPHORE_CANARY_BASE;
}
// Can the GPU access the semaphore, i.e., can Host/Esched address the semaphore
// pool?
static bool gpu_can_access_semaphore_pool(uvm_gpu_t *gpu, uvm_rm_mem_t *rm_mem)
{
return ((uvm_rm_mem_get_gpu_uvm_va(rm_mem, gpu) + rm_mem->size - 1) < gpu->parent->max_host_va);
}
static NV_STATUS pool_alloc_page(uvm_gpu_semaphore_pool_t *pool)
{
NV_STATUS status;
@ -142,6 +149,9 @@ static NV_STATUS pool_alloc_page(uvm_gpu_semaphore_pool_t *pool)
if (status != NV_OK)
goto error;
// Verify the GPU can access the semaphore pool.
UVM_ASSERT(gpu_can_access_semaphore_pool(pool->gpu, pool_page->memory));
// All semaphores are initially free
bitmap_fill(pool_page->free_semaphores, UVM_SEMAPHORE_COUNT_PER_PAGE);

View File

@ -46,6 +46,7 @@ MODULE_PARM_DESC(uvm_disable_hmm,
#include "uvm_lock.h"
#include "uvm_api.h"
#include "uvm_va_policy.h"
#include "uvm_tools.h"
bool uvm_hmm_is_enabled_system_wide(void)
{
@ -96,6 +97,9 @@ NV_STATUS uvm_hmm_va_space_initialize_test(uvm_va_space_t *va_space)
if (!uvm_hmm_is_enabled_system_wide() || !mm)
return NV_WARN_NOTHING_TO_DO;
if (va_space->initialization_flags & UVM_INIT_FLAGS_DISABLE_HMM)
return NV_ERR_INVALID_STATE;
uvm_assert_mmap_lock_locked_write(mm);
uvm_assert_rwsem_locked_write(&va_space->lock);
@ -179,12 +183,19 @@ static bool hmm_invalidate(uvm_va_block_t *va_block,
mmu_interval_set_seq(mni, cur_seq);
// Note: unmap_vmas() does MMU_NOTIFY_UNMAP [0, 0xffffffffffffffff]
// Also note that hmm_invalidate() can be called when a new va_block is not
// yet inserted into the va_space->hmm.blocks table while the original
// va_block is being split. The original va_block may have its end address
// updated before the mmu interval notifier is updated so this invalidate
// may be for a range past the va_block end address.
start = range->start;
end = (range->end == ULONG_MAX) ? range->end : range->end - 1;
if (start < va_block->start)
start = va_block->start;
if (end > va_block->end)
end = va_block->end;
if (start > end)
goto unlock;
if (range->event == MMU_NOTIFY_UNMAP)
uvm_va_policy_clear(va_block, start, end);
@ -266,6 +277,7 @@ static NV_STATUS hmm_va_block_find_create(uvm_va_space_t *va_space,
UVM_ASSERT(uvm_va_space_initialized(va_space) == NV_OK);
UVM_ASSERT(mm);
UVM_ASSERT(!va_block_context || va_block_context->mm == mm);
uvm_assert_mmap_lock_locked(mm);
uvm_assert_rwsem_locked(&va_space->lock);
UVM_ASSERT(PAGE_ALIGNED(addr));
@ -294,11 +306,13 @@ static NV_STATUS hmm_va_block_find_create(uvm_va_space_t *va_space,
// a maximum interval that doesn't overlap any existing UVM va_ranges.
// We know that 'addr' is not within a va_range or
// hmm_va_block_find_create() wouldn't be called.
uvm_range_tree_adjust_interval(&va_space->va_range_tree, addr, &start, &end);
status = uvm_range_tree_find_hole_in(&va_space->va_range_tree, addr, &start, &end);
UVM_ASSERT(status == NV_OK);
// Search for existing HMM va_blocks in the start/end interval and create
// a maximum interval that doesn't overlap any existing HMM va_blocks.
uvm_range_tree_adjust_interval(&va_space->hmm.blocks, addr, &start, &end);
status = uvm_range_tree_find_hole_in(&va_space->hmm.blocks, addr, &start, &end);
UVM_ASSERT(status == NV_OK);
// Create a HMM va_block with a NULL va_range pointer.
status = uvm_va_block_create(NULL, start, end, &va_block);
@ -321,10 +335,7 @@ static NV_STATUS hmm_va_block_find_create(uvm_va_space_t *va_space,
}
status = uvm_range_tree_add(&va_space->hmm.blocks, &va_block->hmm.node);
if (status != NV_OK) {
UVM_ASSERT(status != NV_ERR_UVM_ADDRESS_IN_USE);
goto err_unreg;
}
UVM_ASSERT(status == NV_OK);
done:
uvm_mutex_unlock(&va_space->hmm.blocks_lock);
@ -333,9 +344,6 @@ done:
*va_block_ptr = va_block;
return NV_OK;
err_unreg:
mmu_interval_notifier_remove(&va_block->hmm.notifier);
err_release:
uvm_va_block_release(va_block);
@ -352,10 +360,67 @@ NV_STATUS uvm_hmm_va_block_find_create(uvm_va_space_t *va_space,
return hmm_va_block_find_create(va_space, addr, false, va_block_context, va_block_ptr);
}
NV_STATUS uvm_hmm_find_vma(uvm_va_block_context_t *va_block_context, NvU64 addr)
{
struct mm_struct *mm = va_block_context->mm;
struct vm_area_struct *vma;
if (!mm)
return NV_ERR_INVALID_ADDRESS;
uvm_assert_mmap_lock_locked(mm);
vma = find_vma(mm, addr);
if (!uvm_hmm_vma_is_valid(vma, addr, false))
return NV_ERR_INVALID_ADDRESS;
va_block_context->hmm.vma = vma;
return NV_OK;
}
bool uvm_hmm_va_block_context_vma_is_valid(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
uvm_va_block_region_t region)
{
uvm_assert_mutex_locked(&va_block->lock);
if (uvm_va_block_is_hmm(va_block)) {
struct vm_area_struct *vma = va_block_context->hmm.vma;
UVM_ASSERT(vma);
UVM_ASSERT(va_block_context->mm == vma->vm_mm);
uvm_assert_mmap_lock_locked(va_block_context->mm);
UVM_ASSERT(vma->vm_start <= uvm_va_block_region_start(va_block, region));
UVM_ASSERT(vma->vm_end > uvm_va_block_region_end(va_block, region));
}
return true;
}
NV_STATUS uvm_hmm_test_va_block_inject_split_error(uvm_va_space_t *va_space, NvU64 addr)
{
uvm_va_block_test_t *block_test;
uvm_va_block_t *va_block;
NV_STATUS status;
if (!uvm_hmm_is_enabled(va_space))
return NV_ERR_INVALID_ADDRESS;
status = hmm_va_block_find_create(va_space, addr, false, NULL, &va_block);
if (status != NV_OK)
return status;
block_test = uvm_va_block_get_test(va_block);
if (block_test)
block_test->inject_split_error = true;
return NV_OK;
}
typedef struct {
struct mmu_interval_notifier notifier;
uvm_va_block_t *existing_block;
uvm_va_block_t *new_block;
} hmm_split_invalidate_data_t;
static bool hmm_split_invalidate(struct mmu_interval_notifier *mni,
@ -363,14 +428,9 @@ static bool hmm_split_invalidate(struct mmu_interval_notifier *mni,
unsigned long cur_seq)
{
hmm_split_invalidate_data_t *split_data = container_of(mni, hmm_split_invalidate_data_t, notifier);
uvm_va_block_t *existing_block = split_data->existing_block;
uvm_va_block_t *new_block = split_data->new_block;
if (uvm_ranges_overlap(existing_block->start, existing_block->end, range->start, range->end - 1))
hmm_invalidate(existing_block, range, cur_seq);
if (uvm_ranges_overlap(new_block->start, new_block->end, range->start, range->end - 1))
hmm_invalidate(new_block, range, cur_seq);
uvm_tools_test_hmm_split_invalidate(split_data->existing_block->hmm.va_space);
hmm_invalidate(split_data->existing_block, range, cur_seq);
return true;
}
@ -404,6 +464,7 @@ static NV_STATUS hmm_split_block(uvm_va_block_t *va_block,
uvm_va_space_t *va_space = va_block->hmm.va_space;
struct mm_struct *mm = va_space->va_space_mm.mm;
hmm_split_invalidate_data_t split_data;
NvU64 delay_us;
uvm_va_block_t *new_va_block;
NV_STATUS status;
int ret;
@ -419,22 +480,23 @@ static NV_STATUS hmm_split_block(uvm_va_block_t *va_block,
return status;
// Initialize the newly created HMM va_block.
new_va_block->hmm.node.start = new_va_block->start;
new_va_block->hmm.node.end = new_va_block->end;
new_va_block->hmm.va_space = va_space;
uvm_range_tree_init(&new_va_block->hmm.va_policy_tree);
// The MMU interval notifier has to be removed in order to resize it.
// That means there would be a window of time where invalidation callbacks
// could be missed. To handle this case, we register a temporary notifier
// to cover the same address range while resizing the old notifier (it is
// OK to have multiple notifiers for the same range, we may simply try to
// invalidate twice).
split_data.existing_block = va_block;
split_data.new_block = new_va_block;
ret = mmu_interval_notifier_insert(&split_data.notifier,
ret = mmu_interval_notifier_insert(&new_va_block->hmm.notifier,
mm,
va_block->start,
new_va_block->end - va_block->start + 1,
&hmm_notifier_split_ops);
new_va_block->start,
uvm_va_block_size(new_va_block),
&uvm_hmm_notifier_ops);
// Since __mmu_notifier_register() was called when the va_space was
// initially created, we know that mm->notifier_subscriptions is valid
// and mmu_interval_notifier_insert() can't return ENOMEM.
// The only error return is for start + length overflowing but we already
// registered the same address range before so there should be no error.
UVM_ASSERT(!ret);
uvm_mutex_lock(&va_block->lock);
@ -444,40 +506,38 @@ static NV_STATUS hmm_split_block(uvm_va_block_t *va_block,
uvm_mutex_unlock(&va_block->lock);
// Since __mmu_notifier_register() was called when the va_space was
// initially created, we know that mm->notifier_subscriptions is valid
// and mmu_interval_notifier_insert() can't return ENOMEM.
// The only error return is for start + length overflowing but we already
// registered the same address range before so there should be no error.
// The MMU interval notifier has to be removed in order to resize it.
// That means there would be a window of time when invalidation callbacks
// could be missed. To handle this case, we register a temporary notifier
// to cover the address range while resizing the old notifier (it is
// OK to have multiple notifiers for the same range, we may simply try to
// invalidate twice).
split_data.existing_block = va_block;
ret = mmu_interval_notifier_insert(&split_data.notifier,
mm,
va_block->start,
new_end - va_block->start + 1,
&hmm_notifier_split_ops);
UVM_ASSERT(!ret);
mmu_interval_notifier_remove(&va_block->hmm.notifier);
// Delay to allow hmm_sanity test to trigger an mmu_notifier during the
// critical window where the split invalidate callback is active.
delay_us = atomic64_read(&va_space->test.split_invalidate_delay_us);
if (delay_us)
udelay(delay_us);
uvm_range_tree_shrink_node(&va_space->hmm.blocks, &va_block->hmm.node, va_block->start, va_block->end);
mmu_interval_notifier_remove(&va_block->hmm.notifier);
// Enable notifications on the old block with the smaller size.
ret = mmu_interval_notifier_insert(&va_block->hmm.notifier,
mm,
va_block->start,
va_block->end - va_block->start + 1,
&uvm_hmm_notifier_ops);
UVM_ASSERT(!ret);
new_va_block->hmm.node.start = new_va_block->start;
new_va_block->hmm.node.end = new_va_block->end;
ret = mmu_interval_notifier_insert(&new_va_block->hmm.notifier,
mm,
new_va_block->start,
new_va_block->end - new_va_block->start + 1,
uvm_va_block_size(va_block),
&uvm_hmm_notifier_ops);
UVM_ASSERT(!ret);
mmu_interval_notifier_remove(&split_data.notifier);
status = uvm_range_tree_add(&va_space->hmm.blocks, &new_va_block->hmm.node);
UVM_ASSERT(status == NV_OK);
if (new_block_ptr)
*new_block_ptr = new_va_block;
@ -485,7 +545,7 @@ static NV_STATUS hmm_split_block(uvm_va_block_t *va_block,
err:
uvm_mutex_unlock(&va_block->lock);
mmu_interval_notifier_remove(&split_data.notifier);
mmu_interval_notifier_remove(&new_va_block->hmm.notifier);
uvm_va_block_release(new_va_block);
return status;
}
@ -536,9 +596,9 @@ static NV_STATUS split_block_if_needed(uvm_va_block_t *va_block,
// page tables. However, it doesn't destroy the va_block because that would
// require calling mmu_interval_notifier_remove() which can't be called from
// the invalidate callback due to Linux locking constraints. If a process
// calls mmap()/munmap() for SAM and then creates a UVM managed allocation,
// calls mmap()/munmap() for SAM and then creates a managed allocation,
// the same VMA range can be picked and there would be a UVM/HMM va_block
// conflict. Creating a UVM managed allocation (or other va_range) calls this
// conflict. Creating a managed allocation (or other va_range) calls this
// function to remove stale HMM va_blocks or split the HMM va_block so there
// is no overlap.
NV_STATUS uvm_hmm_va_block_reclaim(uvm_va_space_t *va_space,
@ -585,6 +645,18 @@ NV_STATUS uvm_hmm_va_block_reclaim(uvm_va_space_t *va_space,
return NV_OK;
}
void uvm_hmm_va_block_split_tree(uvm_va_block_t *existing_va_block, uvm_va_block_t *new_block)
{
uvm_va_space_t *va_space = existing_va_block->hmm.va_space;
UVM_ASSERT(uvm_va_block_is_hmm(existing_va_block));
uvm_assert_rwsem_locked_write(&va_space->lock);
uvm_range_tree_split(&existing_va_block->hmm.va_space->hmm.blocks,
&existing_va_block->hmm.node,
&new_block->hmm.node);
}
NV_STATUS uvm_hmm_split_as_needed(uvm_va_space_t *va_space,
NvU64 addr,
uvm_va_policy_is_split_needed_t split_needed_cb,
@ -733,7 +805,7 @@ void uvm_hmm_find_policy_end(uvm_va_block_t *va_block,
{
struct vm_area_struct *vma = va_block_context->hmm.vma;
uvm_va_policy_node_t *node;
NvU64 end = *endp;
NvU64 end = va_block->end;
uvm_assert_mmap_lock_locked(vma->vm_mm);
uvm_assert_mutex_locked(&va_block->lock);
@ -747,8 +819,9 @@ void uvm_hmm_find_policy_end(uvm_va_block_t *va_block,
if (end > node->node.end)
end = node->node.end;
}
else
else {
va_block_context->policy = &uvm_va_policy_default;
}
*endp = end;
}
@ -760,7 +833,7 @@ NV_STATUS uvm_hmm_find_policy_vma_and_outer(uvm_va_block_t *va_block,
{
struct vm_area_struct *vma;
unsigned long addr;
NvU64 end = va_block->end;
NvU64 end;
uvm_page_index_t outer;
UVM_ASSERT(uvm_va_block_is_hmm(va_block));
@ -801,9 +874,9 @@ static NV_STATUS hmm_clear_thrashing_policy(uvm_va_block_t *va_block,
// before the pinned pages information is destroyed.
status = UVM_VA_BLOCK_RETRY_LOCKED(va_block,
NULL,
unmap_remote_pinned_pages_from_all_processors(va_block,
block_context,
region));
uvm_perf_thrashing_unmap_remote_pinned_pages_all(va_block,
block_context,
region));
uvm_perf_thrashing_info_destroy(va_block);
@ -839,5 +912,186 @@ NV_STATUS uvm_hmm_clear_thrashing_policy(uvm_va_space_t *va_space)
return status;
}
uvm_va_block_region_t uvm_hmm_get_prefetch_region(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
NvU64 address)
{
struct vm_area_struct *vma = va_block_context->hmm.vma;
uvm_va_policy_t *policy = va_block_context->policy;
NvU64 start, end;
UVM_ASSERT(uvm_va_block_is_hmm(va_block));
// We need to limit the prefetch region to the VMA.
start = max(va_block->start, (NvU64)vma->vm_start);
end = min(va_block->end, (NvU64)vma->vm_end - 1);
// Also, we need to limit the prefetch region to the policy range.
if (policy == &uvm_va_policy_default) {
NV_STATUS status = uvm_range_tree_find_hole_in(&va_block->hmm.va_policy_tree,
address,
&start,
&end);
// We already know the hole exists and covers the fault region.
UVM_ASSERT(status == NV_OK);
}
else {
uvm_va_policy_node_t *node = uvm_va_policy_node_from_policy(policy);
start = max(start, node->node.start);
end = min(end, node->node.end);
}
return uvm_va_block_region_from_start_end(va_block, start, end);
}
uvm_prot_t uvm_hmm_compute_logical_prot(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
NvU64 addr)
{
struct vm_area_struct *vma = va_block_context->hmm.vma;
UVM_ASSERT(uvm_va_block_is_hmm(va_block));
uvm_assert_mmap_lock_locked(va_block_context->mm);
UVM_ASSERT(vma && addr >= vma->vm_start && addr < vma->vm_end);
if (!(vma->vm_flags & VM_READ))
return UVM_PROT_NONE;
else if (!(vma->vm_flags & VM_WRITE))
return UVM_PROT_READ_ONLY;
else
return UVM_PROT_READ_WRITE_ATOMIC;
}
NV_STATUS uvm_test_split_invalidate_delay(UVM_TEST_SPLIT_INVALIDATE_DELAY_PARAMS *params, struct file *filp)
{
uvm_va_space_t *va_space = uvm_va_space_get(filp);
atomic64_set(&va_space->test.split_invalidate_delay_us, params->delay_us);
return NV_OK;
}
NV_STATUS uvm_test_hmm_init(UVM_TEST_HMM_INIT_PARAMS *params, struct file *filp)
{
uvm_va_space_t *va_space = uvm_va_space_get(filp);
struct mm_struct *mm;
NV_STATUS status;
mm = uvm_va_space_mm_or_current_retain(va_space);
if (!mm)
return NV_WARN_NOTHING_TO_DO;
uvm_down_write_mmap_lock(mm);
uvm_va_space_down_write(va_space);
if (va_space->hmm.disable)
status = uvm_hmm_va_space_initialize_test(va_space);
else
status = NV_OK;
uvm_va_space_up_write(va_space);
uvm_up_write_mmap_lock(mm);
uvm_va_space_mm_or_current_release(va_space, mm);
return status;
}
NV_STATUS uvm_hmm_va_range_info(uvm_va_space_t *va_space,
struct mm_struct *mm,
UVM_TEST_VA_RANGE_INFO_PARAMS *params)
{
uvm_range_tree_node_t *tree_node;
uvm_va_policy_node_t *node;
struct vm_area_struct *vma;
uvm_va_block_t *va_block;
if (!mm || !uvm_hmm_is_enabled(va_space))
return NV_ERR_INVALID_ADDRESS;
uvm_assert_mmap_lock_locked(mm);
uvm_assert_rwsem_locked(&va_space->lock);
params->type = UVM_TEST_VA_RANGE_TYPE_MANAGED;
params->managed.subtype = UVM_TEST_RANGE_SUBTYPE_HMM;
params->va_range_start = 0;
params->va_range_end = ULONG_MAX;
params->read_duplication = UVM_TEST_READ_DUPLICATION_UNSET;
memset(&params->preferred_location, 0, sizeof(params->preferred_location));
params->accessed_by_count = 0;
params->managed.vma_start = 0;
params->managed.vma_end = 0;
params->managed.is_zombie = NV_FALSE;
params->managed.owned_by_calling_process = (mm == current->mm ? NV_TRUE : NV_FALSE);
vma = find_vma(mm, params->lookup_address);
if (!uvm_hmm_vma_is_valid(vma, params->lookup_address, false))
return NV_ERR_INVALID_ADDRESS;
params->va_range_start = vma->vm_start;
params->va_range_end = vma->vm_end - 1;
params->managed.vma_start = vma->vm_start;
params->managed.vma_end = vma->vm_end - 1;
uvm_mutex_lock(&va_space->hmm.blocks_lock);
tree_node = uvm_range_tree_find(&va_space->hmm.blocks, params->lookup_address);
if (!tree_node) {
UVM_ASSERT(uvm_range_tree_find_hole_in(&va_space->hmm.blocks, params->lookup_address,
&params->va_range_start, &params->va_range_end) == NV_OK);
uvm_mutex_unlock(&va_space->hmm.blocks_lock);
return NV_OK;
}
uvm_mutex_unlock(&va_space->hmm.blocks_lock);
va_block = hmm_va_block_from_node(tree_node);
uvm_mutex_lock(&va_block->lock);
params->va_range_start = va_block->start;
params->va_range_end = va_block->end;
node = uvm_va_policy_node_find(va_block, params->lookup_address);
if (node) {
uvm_processor_id_t processor_id;
if (params->va_range_start < node->node.start)
params->va_range_start = node->node.start;
if (params->va_range_end > node->node.end)
params->va_range_end = node->node.end;
params->read_duplication = node->policy.read_duplication;
if (!UVM_ID_IS_INVALID(node->policy.preferred_location))
uvm_va_space_processor_uuid(va_space, &params->preferred_location, node->policy.preferred_location);
for_each_id_in_mask(processor_id, &node->policy.accessed_by)
uvm_va_space_processor_uuid(va_space, &params->accessed_by[params->accessed_by_count++], processor_id);
}
else {
uvm_range_tree_find_hole_in(&va_block->hmm.va_policy_tree, params->lookup_address,
&params->va_range_start, &params->va_range_end);
}
uvm_mutex_unlock(&va_block->lock);
return NV_OK;
}
// TODO: Bug 3660968: Remove this hack as soon as HMM migration is implemented
// for VMAs other than anonymous private memory.
bool uvm_hmm_must_use_sysmem(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context)
{
struct vm_area_struct *vma = va_block_context->hmm.vma;
uvm_assert_mutex_locked(&va_block->lock);
if (!uvm_va_block_is_hmm(va_block))
return false;
UVM_ASSERT(vma);
UVM_ASSERT(va_block_context->mm == vma->vm_mm);
uvm_assert_mmap_lock_locked(va_block_context->mm);
return !vma_is_anonymous(vma);
}
#endif // UVM_IS_CONFIG_HMM()

View File

@ -65,6 +65,8 @@ typedef struct
// Initialize HMM for the given the va_space for testing.
// Bug 1750144: UVM: Add HMM (Heterogeneous Memory Management) support to
// the UVM driver. Remove this when enough HMM functionality is implemented.
// Locking: the va_space->va_space_mm.mm mmap_lock must be write locked
// and the va_space lock must be held in write mode.
NV_STATUS uvm_hmm_va_space_initialize_test(uvm_va_space_t *va_space);
// Destroy any HMM state for the given the va_space.
@ -87,6 +89,10 @@ typedef struct
//
// Return NV_ERR_INVALID_ADDRESS if there is no VMA associated with the
// address 'addr' or the VMA does not have at least PROT_READ permission.
// The caller is also responsible for checking that there is no UVM
// va_range covering the given address before calling this function.
// If va_block_context is not NULL, the VMA is cached in
// va_block_context->hmm.vma.
// Locking: This function must be called with mm retained and locked for
// at least read and the va_space lock at least for read.
NV_STATUS uvm_hmm_va_block_find_create(uvm_va_space_t *va_space,
@ -94,23 +100,53 @@ typedef struct
uvm_va_block_context_t *va_block_context,
uvm_va_block_t **va_block_ptr);
// Find the VMA for the given address and set va_block_context->hmm.vma.
// Return NV_ERR_INVALID_ADDRESS if va_block_context->mm is NULL or there
// is no VMA associated with the address 'addr' or the VMA does not have at
// least PROT_READ permission.
// Locking: This function must be called with mm retained and locked for
// at least read or mm equal to NULL.
NV_STATUS uvm_hmm_find_vma(uvm_va_block_context_t *va_block_context, NvU64 addr);
// If va_block is a HMM va_block, check that va_block_context->hmm.vma is
// not NULL and covers the given region. This always returns true and is
// intended to only be used with UVM_ASSERT().
// Locking: This function must be called with the va_block lock held and if
// va_block is a HMM block, va_block_context->mm must be retained and
// locked for at least read.
bool uvm_hmm_va_block_context_vma_is_valid(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
uvm_va_block_region_t region);
// Find or create a HMM va_block and mark it so the next va_block split
// will fail for testing purposes.
// Locking: This function must be called with mm retained and locked for
// at least read and the va_space lock at least for read.
NV_STATUS uvm_hmm_test_va_block_inject_split_error(uvm_va_space_t *va_space, NvU64 addr);
// Reclaim any HMM va_blocks that overlap the given range.
// Note that 'end' is inclusive.
// A HMM va_block can be reclaimed if it doesn't contain any "valid" VMAs.
// See uvm_hmm_vma_is_valid() for details.
// Note that 'end' is inclusive. If mm is NULL, any HMM va_block in the
// range will be reclaimed which assumes that the mm is being torn down
// and was not retained.
// Return values:
// NV_ERR_NO_MEMORY: Reclaim required a block split, which failed.
// NV_OK: There were no HMM blocks in the range, or all HMM
// blocks in the range were successfully reclaimed.
// Locking: If mm is not NULL, it must equal va_space_mm.mm, the caller
// must hold a reference on it, and it must be locked for at least read
// mode. Also, the va_space lock must be held in write mode.
// must retain it with uvm_va_space_mm_or_current_retain() or be sure that
// mm->mm_users is not zero, and it must be locked for at least read mode.
// Also, the va_space lock must be held in write mode.
// TODO: Bug 3372166: add asynchronous va_block reclaim.
NV_STATUS uvm_hmm_va_block_reclaim(uvm_va_space_t *va_space,
struct mm_struct *mm,
NvU64 start,
NvU64 end);
// This is called to update the va_space tree of HMM va_blocks after an
// existing va_block is split.
// Locking: the va_space lock must be held in write mode.
void uvm_hmm_va_block_split_tree(uvm_va_block_t *existing_va_block, uvm_va_block_t *new_block);
// Find a HMM policy range that needs to be split. The callback function
// 'split_needed_cb' returns true if the policy range needs to be split.
// If a policy range is split, the existing range is truncated to
@ -148,7 +184,7 @@ typedef struct
// Note that 'last_address' is inclusive.
// Locking: the va_space->va_space_mm.mm mmap_lock must be write locked
// and the va_space lock must be held in write mode.
// TODO: Bug 2046423: need to implement read duplication support in Linux.
// TODO: Bug 3660922: need to implement HMM read duplication support.
static NV_STATUS uvm_hmm_set_read_duplication(uvm_va_space_t *va_space,
uvm_read_duplication_policy_t new_policy,
NvU64 base,
@ -159,10 +195,11 @@ typedef struct
return NV_OK;
}
// Set va_block_context->policy to the policy covering the given address
// 'addr' and update the ending address '*endp' to the minimum of *endp,
// va_block_context->hmm.vma->vm_end - 1, and the ending address of the
// policy range.
// This function assigns va_block_context->policy to the policy covering
// the given address 'addr' and assigns the ending address '*endp' to the
// minimum of va_block->end, va_block_context->hmm.vma->vm_end - 1, and the
// ending address of the policy range. Note that va_block_context->hmm.vma
// is expected to be initialized before calling this function.
// Locking: This function must be called with
// va_block_context->hmm.vma->vm_mm retained and locked for least read and
// the va_block lock held.
@ -171,11 +208,11 @@ typedef struct
unsigned long addr,
NvU64 *endp);
// Find the VMA for the page index 'page_index',
// set va_block_context->policy to the policy covering the given address,
// and update the ending page range '*outerp' to the minimum of *outerp,
// va_block_context->hmm.vma->vm_end - 1, and the ending address of the
// policy range.
// This function finds the VMA for the page index 'page_index' and assigns
// it to va_block_context->vma, sets va_block_context->policy to the policy
// covering the given address, and sets the ending page range '*outerp'
// to the minimum of *outerp, va_block_context->hmm.vma->vm_end - 1, the
// ending address of the policy range, and va_block->end.
// Return NV_ERR_INVALID_ADDRESS if no VMA is found; otherwise, NV_OK.
// Locking: This function must be called with
// va_block_context->hmm.vma->vm_mm retained and locked for least read and
@ -189,6 +226,48 @@ typedef struct
// Locking: va_space lock must be held in write mode.
NV_STATUS uvm_hmm_clear_thrashing_policy(uvm_va_space_t *va_space);
// Return the expanded region around 'address' limited to the intersection
// of va_block start/end, vma start/end, and policy start/end.
// va_block_context must not be NULL, va_block_context->hmm.vma must be
// valid (this is usually set by uvm_hmm_va_block_find_create()), and
// va_block_context->policy must be valid.
// Locking: the caller must hold mm->mmap_lock in at least read mode, the
// va_space lock must be held in at least read mode, and the va_block lock
// held.
uvm_va_block_region_t uvm_hmm_get_prefetch_region(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
NvU64 address);
// Return the logical protection allowed of a HMM va_block for the page at
// the given address.
// va_block_context must not be NULL and va_block_context->hmm.vma must be
// valid (this is usually set by uvm_hmm_va_block_find_create()).
// Locking: the caller must hold va_block_context->mm mmap_lock in at least
// read mode.
uvm_prot_t uvm_hmm_compute_logical_prot(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
NvU64 addr);
NV_STATUS uvm_test_hmm_init(UVM_TEST_HMM_INIT_PARAMS *params, struct file *filp);
NV_STATUS uvm_test_split_invalidate_delay(UVM_TEST_SPLIT_INVALIDATE_DELAY_PARAMS *params,
struct file *filp);
NV_STATUS uvm_hmm_va_range_info(uvm_va_space_t *va_space,
struct mm_struct *mm,
UVM_TEST_VA_RANGE_INFO_PARAMS *params);
// Return true if GPU fault new residency location should be system memory.
// va_block_context must not be NULL and va_block_context->hmm.vma must be
// valid (this is usually set by uvm_hmm_va_block_find_create()).
// TODO: Bug 3660968: Remove this hack as soon as HMM migration is
// implemented for VMAs other than anonymous memory.
// Locking: the va_block lock must be held. If the va_block is a HMM
// va_block, the va_block_context->mm must be retained and locked for least
// read.
bool uvm_hmm_must_use_sysmem(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context);
#else // UVM_IS_CONFIG_HMM()
static bool uvm_hmm_is_enabled(uvm_va_space_t *va_space)
@ -230,6 +309,23 @@ typedef struct
return NV_ERR_INVALID_ADDRESS;
}
static NV_STATUS uvm_hmm_find_vma(uvm_va_block_context_t *va_block_context, NvU64 addr)
{
return NV_OK;
}
static bool uvm_hmm_va_block_context_vma_is_valid(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
uvm_va_block_region_t region)
{
return true;
}
static NV_STATUS uvm_hmm_test_va_block_inject_split_error(uvm_va_space_t *va_space, NvU64 addr)
{
return NV_ERR_INVALID_ADDRESS;
}
static NV_STATUS uvm_hmm_va_block_reclaim(uvm_va_space_t *va_space,
struct mm_struct *mm,
NvU64 start,
@ -238,6 +334,10 @@ typedef struct
return NV_OK;
}
static void uvm_hmm_va_block_split_tree(uvm_va_block_t *existing_va_block, uvm_va_block_t *new_block)
{
}
static NV_STATUS uvm_hmm_split_as_needed(uvm_va_space_t *va_space,
NvU64 addr,
uvm_va_policy_is_split_needed_t split_needed_cb,
@ -291,6 +391,44 @@ typedef struct
return NV_OK;
}
static uvm_va_block_region_t uvm_hmm_get_prefetch_region(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
NvU64 address)
{
return (uvm_va_block_region_t){};
}
static uvm_prot_t uvm_hmm_compute_logical_prot(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
NvU64 addr)
{
return UVM_PROT_NONE;
}
static NV_STATUS uvm_test_hmm_init(UVM_TEST_HMM_INIT_PARAMS *params, struct file *filp)
{
return NV_WARN_NOTHING_TO_DO;
}
static NV_STATUS uvm_test_split_invalidate_delay(UVM_TEST_SPLIT_INVALIDATE_DELAY_PARAMS *params,
struct file *filp)
{
return NV_ERR_INVALID_STATE;
}
static NV_STATUS uvm_hmm_va_range_info(uvm_va_space_t *va_space,
struct mm_struct *mm,
UVM_TEST_VA_RANGE_INFO_PARAMS *params)
{
return NV_ERR_INVALID_ADDRESS;
}
static bool uvm_hmm_must_use_sysmem(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context)
{
return false;
}
#endif // UVM_IS_CONFIG_HMM()
#endif // _UVM_HMM_H_

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2021 NVIDIA Corporation
Copyright (c) 2021-2022 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -35,7 +35,7 @@ NV_STATUS uvm_test_hmm_sanity(UVM_TEST_HMM_SANITY_PARAMS *params, struct file *f
uvm_va_block_t *hmm_block = NULL;
NV_STATUS status;
mm = uvm_va_space_mm_retain(va_space);
mm = uvm_va_space_mm_or_current_retain(va_space);
if (!mm)
return NV_WARN_NOTHING_TO_DO;
@ -61,7 +61,7 @@ NV_STATUS uvm_test_hmm_sanity(UVM_TEST_HMM_SANITY_PARAMS *params, struct file *f
status = uvm_hmm_va_block_find_create(va_space, 0UL, NULL, &hmm_block);
TEST_CHECK_GOTO(status == NV_ERR_INVALID_ADDRESS, done);
// Try to create an HMM va_block which overlaps a UVM managed block.
// Try to create an HMM va_block which overlaps a managed block.
// It should fail.
status = uvm_hmm_va_block_find_create(va_space, params->uvm_address, NULL, &hmm_block);
TEST_CHECK_GOTO(status == NV_ERR_INVALID_ADDRESS, done);
@ -77,14 +77,14 @@ NV_STATUS uvm_test_hmm_sanity(UVM_TEST_HMM_SANITY_PARAMS *params, struct file *f
done:
uvm_va_space_up_read(va_space);
uvm_up_read_mmap_lock(mm);
uvm_va_space_mm_release(va_space);
uvm_va_space_mm_or_current_release(va_space, mm);
return status;
out:
uvm_va_space_up_write(va_space);
uvm_up_write_mmap_lock(mm);
uvm_va_space_mm_release(va_space);
uvm_va_space_mm_or_current_release(va_space, mm);
return status;
}

View File

@ -34,31 +34,6 @@
// the (out-of-tree) UVM driver from changes to the upstream Linux kernel.
//
#if !defined(NV_ADDRESS_SPACE_INIT_ONCE_PRESENT)
void address_space_init_once(struct address_space *mapping)
{
memset(mapping, 0, sizeof(*mapping));
INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC);
#if defined(NV_ADDRESS_SPACE_HAS_RWLOCK_TREE_LOCK)
//
// The .tree_lock member variable was changed from type rwlock_t, to
// spinlock_t, on 25 July 2008, by mainline commit
// 19fd6231279be3c3bdd02ed99f9b0eb195978064.
//
rwlock_init(&mapping->tree_lock);
#else
spin_lock_init(&mapping->tree_lock);
#endif
spin_lock_init(&mapping->i_mmap_lock);
INIT_LIST_HEAD(&mapping->private_list);
spin_lock_init(&mapping->private_lock);
INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap);
INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
}
#endif
#if UVM_CGROUP_ACCOUNTING_SUPPORTED()
void uvm_memcg_context_start(uvm_memcg_context_t *context, struct mm_struct *mm)
{

View File

@ -88,7 +88,7 @@
#include "nv-kthread-q.h"
#if NV_KTHREAD_Q_SUPPORTS_AFFINITY() == 1 && defined(NV_CPUMASK_OF_NODE_PRESENT)
#if defined(NV_CPUMASK_OF_NODE_PRESENT)
#define UVM_THREAD_AFFINITY_SUPPORTED() 1
#else
#define UVM_THREAD_AFFINITY_SUPPORTED() 0
@ -136,8 +136,8 @@ static inline const struct cpumask *uvm_cpumask_of_node(int node)
#endif
// See bug 1707453 for further details about setting the minimum kernel version.
#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 32)
# error This driver does not support kernels older than 2.6.32!
#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0)
# error This driver does not support kernels older than 3.10!
#endif
#if !defined(VM_RESERVED)
@ -217,10 +217,6 @@ static inline const struct cpumask *uvm_cpumask_of_node(int node)
#define NV_UVM_GFP_FLAGS (GFP_KERNEL)
#if !defined(NV_ADDRESS_SPACE_INIT_ONCE_PRESENT)
void address_space_init_once(struct address_space *mapping);
#endif
// Develop builds define DEBUG but enable optimization
#if defined(DEBUG) && !defined(NVIDIA_UVM_DEVELOP)
// Wrappers for functions not building correctly without optimizations on,
@ -352,23 +348,6 @@ static inline NvU64 NV_GETTIME(void)
(bit) = find_next_zero_bit((addr), (size), (bit) + 1))
#endif
// bitmap_clear was added in 2.6.33 via commit c1a2a962a2ad103846e7950b4591471fabecece7
#if !defined(NV_BITMAP_CLEAR_PRESENT)
static inline void bitmap_clear(unsigned long *map, unsigned int start, int len)
{
unsigned int index = start;
for_each_set_bit_from(index, map, start + len)
__clear_bit(index, map);
}
static inline void bitmap_set(unsigned long *map, unsigned int start, int len)
{
unsigned int index = start;
for_each_clear_bit_from(index, map, start + len)
__set_bit(index, map);
}
#endif
// Added in 2.6.24
#ifndef ACCESS_ONCE
#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
@ -439,17 +418,6 @@ static inline NvU64 NV_GETTIME(void)
#define PAGE_ALIGNED(addr) (((addr) & (PAGE_SIZE - 1)) == 0)
#endif
// Added in 2.6.37 via commit e1ca7788dec6773b1a2bce51b7141948f2b8bccf
#if !defined(NV_VZALLOC_PRESENT)
static inline void *vzalloc(unsigned long size)
{
void *p = vmalloc(size);
if (p)
memset(p, 0, size);
return p;
}
#endif
// Changed in 3.17 via commit 743162013d40ca612b4cb53d3a200dff2d9ab26e
#if (NV_WAIT_ON_BIT_LOCK_ARGUMENT_COUNT == 3)
#define UVM_WAIT_ON_BIT_LOCK(word, bit, mode) \
@ -505,21 +473,6 @@ static bool radix_tree_empty(struct radix_tree_root *tree)
#endif
#endif
#if !defined(NV_USLEEP_RANGE_PRESENT)
static void __sched usleep_range(unsigned long min, unsigned long max)
{
unsigned min_msec = min / 1000;
unsigned max_msec = max / 1000;
if (min_msec != 0)
msleep(min_msec);
else if (max_msec != 0)
msleep(max_msec);
else
msleep(1);
}
#endif
typedef struct
{
struct mem_cgroup *new_memcg;

View File

@ -337,7 +337,9 @@
//
// - Channel lock
// Order: UVM_LOCK_ORDER_CHANNEL
// Spinlock (uvm_spinlock_t)
// Spinlock (uvm_spinlock_t) or exclusive lock (mutex)
//
// Lock protecting the state of all the channels in a channel pool.
//
// - Tools global VA space list lock (g_tools_va_space_list_lock)
// Order: UVM_LOCK_ORDER_TOOLS_VA_SPACE_LIST

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2016-2021 NVIDIA Corporation
Copyright (c) 2016-2022 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -605,7 +605,7 @@ static NV_STATUS uvm_create_external_range(uvm_va_space_t *va_space, UVM_CREATE_
return NV_ERR_INVALID_ADDRESS;
// The mm needs to be locked in order to remove stale HMM va_blocks.
mm = uvm_va_space_mm_retain_lock(va_space);
mm = uvm_va_space_mm_or_current_retain_lock(va_space);
uvm_va_space_down_write(va_space);
// Create the new external VA range.
@ -619,7 +619,7 @@ static NV_STATUS uvm_create_external_range(uvm_va_space_t *va_space, UVM_CREATE_
}
uvm_va_space_up_write(va_space);
uvm_va_space_mm_release_unlock(va_space, mm);
uvm_va_space_mm_or_current_release_unlock(va_space, mm);
return status;
}
@ -636,6 +636,11 @@ static NV_STATUS set_ext_gpu_map_location(uvm_ext_gpu_map_t *ext_gpu_map,
{
uvm_gpu_t *owning_gpu;
if (!mem_info->deviceDescendant && !mem_info->sysmem) {
ext_gpu_map->owning_gpu = NULL;
ext_gpu_map->is_sysmem = false;
return NV_OK;
}
// This is a local or peer allocation, so the owning GPU must have been
// registered.
owning_gpu = uvm_va_space_get_gpu_by_uuid(va_space, &mem_info->uuid);

View File

@ -523,7 +523,7 @@ static NV_STATUS mem_alloc_sysmem_chunks(uvm_mem_t *mem, struct mm_struct *mm, g
// In case of failure, the caller is required to handle cleanup by calling
// uvm_mem_free
static NV_STATUS mem_alloc_vidmem_chunks(uvm_mem_t *mem, bool zero)
static NV_STATUS mem_alloc_vidmem_chunks(uvm_mem_t *mem, bool zero, bool is_protected)
{
NV_STATUS status;
@ -559,7 +559,7 @@ static NV_STATUS mem_alloc_vidmem_chunks(uvm_mem_t *mem, bool zero)
return NV_OK;
}
static NV_STATUS mem_alloc_chunks(uvm_mem_t *mem, struct mm_struct *mm, bool zero)
static NV_STATUS mem_alloc_chunks(uvm_mem_t *mem, struct mm_struct *mm, bool zero, bool is_protected)
{
if (uvm_mem_is_sysmem(mem)) {
gfp_t gfp_flags;
@ -581,7 +581,7 @@ static NV_STATUS mem_alloc_chunks(uvm_mem_t *mem, struct mm_struct *mm, bool zer
return status;
}
return mem_alloc_vidmem_chunks(mem, zero);
return mem_alloc_vidmem_chunks(mem, zero, is_protected);
}
static const char *mem_physical_source(uvm_mem_t *mem)
@ -618,6 +618,7 @@ NV_STATUS uvm_mem_alloc(const uvm_mem_alloc_params_t *params, uvm_mem_t **mem_ou
{
NV_STATUS status;
uvm_mem_t *mem = NULL;
bool is_protected = false;
UVM_ASSERT(params->size > 0);
@ -639,7 +640,7 @@ NV_STATUS uvm_mem_alloc(const uvm_mem_alloc_params_t *params, uvm_mem_t **mem_ou
mem->physical_allocation_size = UVM_ALIGN_UP(mem->size, mem->chunk_size);
mem->chunks_count = mem->physical_allocation_size / mem->chunk_size;
status = mem_alloc_chunks(mem, params->mm, params->zero);
status = mem_alloc_chunks(mem, params->mm, params->zero, is_protected);
if (status != NV_OK)
goto error;
@ -893,7 +894,7 @@ static void sysmem_unmap_gpu_phys(uvm_mem_t *mem, uvm_gpu_t *gpu)
// partial map_gpu_sysmem_iommu() operation.
break;
}
uvm_gpu_unmap_cpu_pages(gpu, dma_addrs[i], mem->chunk_size);
uvm_gpu_unmap_cpu_pages(gpu->parent, dma_addrs[i], mem->chunk_size);
dma_addrs[i] = 0;
}
@ -914,7 +915,7 @@ static NV_STATUS sysmem_map_gpu_phys(uvm_mem_t *mem, uvm_gpu_t *gpu)
return status;
for (i = 0; i < mem->chunks_count; ++i) {
status = uvm_gpu_map_cpu_pages(gpu,
status = uvm_gpu_map_cpu_pages(gpu->parent,
mem->sysmem.pages[i],
mem->chunk_size,
&mem->sysmem.dma_addrs[uvm_global_id_gpu_index(gpu->global_id)][i]);

View File

@ -179,6 +179,8 @@ struct uvm_mem_struct
//
// There is no equivalent mask for vidmem, because only the backing
// GPU can physical access the memory
//
// TODO: Bug 3723779: Share DMA mappings within a single parent GPU
uvm_global_processor_mask_t mapped_on_phys;
struct page **pages;

View File

@ -207,6 +207,8 @@ NV_STATUS uvm_va_block_migrate_locked(uvm_va_block_t *va_block,
uvm_assert_mutex_locked(&va_block->lock);
va_block_context->policy = uvm_va_range_get_policy(va_block->va_range);
if (uvm_va_policy_is_read_duplicate(va_block_context->policy, va_space)) {
status = uvm_va_block_make_resident_read_duplicate(va_block,
va_block_retry,
@ -466,6 +468,8 @@ static NV_STATUS uvm_va_range_migrate(uvm_va_range_t *va_range,
{
NvU64 preunmap_range_start = start;
UVM_ASSERT(va_block_context->policy == uvm_va_range_get_policy(va_range));
should_do_cpu_preunmap = should_do_cpu_preunmap && va_range_should_do_cpu_preunmap(va_block_context->policy,
va_range->va_space);
@ -942,10 +946,8 @@ done:
// benchmarks to see if a two-pass approach would be faster (first
// pass pushes all GPU work asynchronously, second pass updates CPU
// mappings synchronously).
if (mm) {
if (mm)
uvm_up_read_mmap_lock_out_of_order(mm);
uvm_va_space_mm_or_current_release(va_space, mm);
}
if (tracker_ptr) {
// If requested, release semaphore
@ -973,6 +975,7 @@ done:
}
uvm_va_space_up_read(va_space);
uvm_va_space_mm_or_current_release(va_space, mm);
// If the migration is known to be complete, eagerly dispatch the migration
// events, instead of processing them on a later event flush. Note that an
@ -1043,13 +1046,12 @@ done:
// benchmarks to see if a two-pass approach would be faster (first
// pass pushes all GPU work asynchronously, second pass updates CPU
// mappings synchronously).
if (mm) {
if (mm)
uvm_up_read_mmap_lock_out_of_order(mm);
uvm_va_space_mm_or_current_release(va_space, mm);
}
tracker_status = uvm_tracker_wait_deinit(&local_tracker);
uvm_va_space_up_read(va_space);
uvm_va_space_mm_or_current_release(va_space, mm);
// This API is synchronous, so wait for migrations to finish
uvm_tools_flush_events();

View File

@ -74,7 +74,7 @@ static NV_STATUS migrate_vma_page_copy_address(struct page *page,
}
else {
// Sysmem/Indirect Peer
NV_STATUS status = uvm_gpu_map_cpu_page(copying_gpu, page, &state->dma.addrs[page_index]);
NV_STATUS status = uvm_gpu_map_cpu_page(copying_gpu->parent, page, &state->dma.addrs[page_index]);
if (status != NV_OK)
return status;
@ -628,7 +628,7 @@ void uvm_migrate_vma_finalize_and_map(struct migrate_vma *args, migrate_vma_stat
if (state->dma.num_pages > 0) {
for_each_set_bit(i, state->dma.page_mask, state->num_pages)
uvm_gpu_unmap_cpu_page(state->dma.addrs_gpus[i], state->dma.addrs[i]);
uvm_gpu_unmap_cpu_page(state->dma.addrs_gpus[i]->parent, state->dma.addrs[i]);
}
UVM_ASSERT(!bitmap_intersects(state->populate_pages_mask, state->allocation_failed_mask, state->num_pages));

View File

@ -34,8 +34,8 @@ typedef struct
{
uvm_va_space_t *va_space;
struct mm_struct *mm;
const unsigned long start;
const unsigned long length;
unsigned long start;
unsigned long length;
uvm_processor_id_t dst_id;
// dst_node_id may be clobbered by uvm_migrate_pageable().

View File

@ -132,7 +132,7 @@ static NV_STATUS phys_mem_allocate_sysmem(uvm_page_tree_t *tree, NvLength size,
// Check for fake GPUs from the unit test
if (tree->gpu->parent->pci_dev)
status = uvm_gpu_map_cpu_pages(tree->gpu, out->handle.page, UVM_PAGE_ALIGN_UP(size), &dma_addr);
status = uvm_gpu_map_cpu_pages(tree->gpu->parent, out->handle.page, UVM_PAGE_ALIGN_UP(size), &dma_addr);
else
dma_addr = page_to_phys(out->handle.page);
@ -217,7 +217,7 @@ static void phys_mem_deallocate_sysmem(uvm_page_tree_t *tree, uvm_mmu_page_table
UVM_ASSERT(ptr->addr.aperture == UVM_APERTURE_SYS);
if (tree->gpu->parent->pci_dev)
uvm_gpu_unmap_cpu_pages(tree->gpu, ptr->addr.address, UVM_PAGE_ALIGN_UP(ptr->size));
uvm_gpu_unmap_cpu_pages(tree->gpu->parent, ptr->addr.address, UVM_PAGE_ALIGN_UP(ptr->size));
__free_pages(ptr->handle.page, get_order(ptr->size));
}

View File

@ -50,7 +50,6 @@ NV_STATUS uvm_perf_heuristics_init()
void uvm_perf_heuristics_exit()
{
uvm_perf_access_counters_exit();
uvm_perf_prefetch_exit();
uvm_perf_thrashing_exit();
}
@ -73,9 +72,6 @@ NV_STATUS uvm_perf_heuristics_load(uvm_va_space_t *va_space)
NV_STATUS status;
status = uvm_perf_thrashing_load(va_space);
if (status != NV_OK)
return status;
status = uvm_perf_prefetch_load(va_space);
if (status != NV_OK)
return status;
status = uvm_perf_access_counters_load(va_space);
@ -105,6 +101,5 @@ void uvm_perf_heuristics_unload(uvm_va_space_t *va_space)
uvm_assert_rwsem_locked_write(&va_space->lock);
uvm_perf_access_counters_unload(va_space);
uvm_perf_prefetch_unload(va_space);
uvm_perf_thrashing_unload(va_space);
}

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2016 NVIDIA Corporation
Copyright (c) 2016-2022 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -45,7 +45,6 @@
//
// - UVM_PERF_MODULE_TYPE_THRASHING: detects memory thrashing scenarios and
// provides thrashing prevention mechanisms
// - UVM_PERF_MODULE_TYPE_PREFETCH: detects memory prefetching opportunities
// - UVM_PERF_MODULE_TYPE_ACCESS_COUNTERS: migrates memory using access counter
// notifications
typedef enum
@ -54,7 +53,6 @@ typedef enum
UVM_PERF_MODULE_TYPE_TEST = UVM_PERF_MODULE_FIRST_TYPE,
UVM_PERF_MODULE_TYPE_THRASHING,
UVM_PERF_MODULE_TYPE_PREFETCH,
UVM_PERF_MODULE_TYPE_ACCESS_COUNTERS,
UVM_PERF_MODULE_TYPE_COUNT,

View File

@ -30,31 +30,6 @@
#include "uvm_va_range.h"
#include "uvm_test.h"
// Global cache to allocate the per-VA block prefetch detection structures
static struct kmem_cache *g_prefetch_info_cache __read_mostly;
// Per-VA block prefetch detection structure
typedef struct
{
uvm_page_mask_t prefetch_pages;
uvm_page_mask_t migrate_pages;
uvm_va_block_bitmap_tree_t bitmap_tree;
uvm_processor_id_t last_migration_proc_id;
uvm_va_block_region_t region;
size_t big_page_size;
uvm_va_block_region_t big_pages_region;
NvU16 pending_prefetch_pages;
NvU16 fault_migrations_to_last_proc;
} block_prefetch_info_t;
//
// Tunables for prefetch detection/prevention (configurable via module parameters)
//
@ -88,19 +63,54 @@ static bool g_uvm_perf_prefetch_enable;
static unsigned g_uvm_perf_prefetch_threshold;
static unsigned g_uvm_perf_prefetch_min_faults;
// Callback declaration for the performance heuristics events
static void prefetch_block_destroy_cb(uvm_perf_event_t event_id, uvm_perf_event_data_t *event_data);
void uvm_perf_prefetch_bitmap_tree_iter_init(const uvm_perf_prefetch_bitmap_tree_t *bitmap_tree,
uvm_page_index_t page_index,
uvm_perf_prefetch_bitmap_tree_iter_t *iter)
{
UVM_ASSERT(bitmap_tree->level_count > 0);
UVM_ASSERT_MSG(page_index < bitmap_tree->leaf_count,
"%zd vs %zd",
(size_t)page_index,
(size_t)bitmap_tree->leaf_count);
static uvm_va_block_region_t compute_prefetch_region(uvm_page_index_t page_index, block_prefetch_info_t *prefetch_info)
iter->level_idx = bitmap_tree->level_count - 1;
iter->node_idx = page_index;
}
uvm_va_block_region_t uvm_perf_prefetch_bitmap_tree_iter_get_range(const uvm_perf_prefetch_bitmap_tree_t *bitmap_tree,
const uvm_perf_prefetch_bitmap_tree_iter_t *iter)
{
NvU16 range_leaves = uvm_perf_tree_iter_leaf_range(bitmap_tree, iter);
NvU16 range_start = uvm_perf_tree_iter_leaf_range_start(bitmap_tree, iter);
uvm_va_block_region_t subregion = uvm_va_block_region(range_start, range_start + range_leaves);
UVM_ASSERT(iter->level_idx >= 0);
UVM_ASSERT(iter->level_idx < bitmap_tree->level_count);
return subregion;
}
NvU16 uvm_perf_prefetch_bitmap_tree_iter_get_count(const uvm_perf_prefetch_bitmap_tree_t *bitmap_tree,
const uvm_perf_prefetch_bitmap_tree_iter_t *iter)
{
uvm_va_block_region_t subregion = uvm_perf_prefetch_bitmap_tree_iter_get_range(bitmap_tree, iter);
return uvm_page_mask_region_weight(&bitmap_tree->pages, subregion);
}
static uvm_va_block_region_t compute_prefetch_region(uvm_page_index_t page_index,
uvm_perf_prefetch_bitmap_tree_t *bitmap_tree,
uvm_va_block_region_t max_prefetch_region)
{
NvU16 counter;
uvm_va_block_bitmap_tree_iter_t iter;
uvm_va_block_bitmap_tree_t *bitmap_tree = &prefetch_info->bitmap_tree;
uvm_va_block_region_t prefetch_region = uvm_va_block_region(bitmap_tree->leaf_count,
bitmap_tree->leaf_count + 1);
uvm_perf_prefetch_bitmap_tree_iter_t iter;
uvm_va_block_region_t prefetch_region = uvm_va_block_region(0, 0);
uvm_va_block_bitmap_tree_traverse_counters(counter, bitmap_tree, page_index, &iter) {
uvm_va_block_region_t subregion = uvm_va_block_bitmap_tree_iter_get_range(bitmap_tree, &iter);
uvm_perf_prefetch_bitmap_tree_traverse_counters(counter,
bitmap_tree,
page_index - max_prefetch_region.first + bitmap_tree->offset,
&iter) {
uvm_va_block_region_t subregion = uvm_perf_prefetch_bitmap_tree_iter_get_range(bitmap_tree, &iter);
NvU16 subregion_pages = uvm_va_block_region_num_pages(subregion);
UVM_ASSERT(counter <= subregion_pages);
@ -109,289 +119,287 @@ static uvm_va_block_region_t compute_prefetch_region(uvm_page_index_t page_index
}
// Clamp prefetch region to actual pages
if (prefetch_region.first < bitmap_tree->leaf_count) {
if (prefetch_region.first < prefetch_info->region.first)
prefetch_region.first = prefetch_info->region.first;
if (prefetch_region.outer) {
prefetch_region.first += max_prefetch_region.first;
if (prefetch_region.first < bitmap_tree->offset) {
prefetch_region.first = bitmap_tree->offset;
}
else {
prefetch_region.first -= bitmap_tree->offset;
if (prefetch_region.first < max_prefetch_region.first)
prefetch_region.first = max_prefetch_region.first;
}
if (prefetch_region.outer > prefetch_info->region.outer)
prefetch_region.outer = prefetch_info->region.outer;
prefetch_region.outer += max_prefetch_region.first;
if (prefetch_region.outer < bitmap_tree->offset) {
prefetch_region.outer = bitmap_tree->offset;
}
else {
prefetch_region.outer -= bitmap_tree->offset;
if (prefetch_region.outer > max_prefetch_region.outer)
prefetch_region.outer = max_prefetch_region.outer;
}
}
return prefetch_region;
}
// Performance heuristics module for prefetch
static uvm_perf_module_t g_module_prefetch;
static uvm_perf_module_event_callback_desc_t g_callbacks_prefetch[] = {
{ UVM_PERF_EVENT_BLOCK_DESTROY, prefetch_block_destroy_cb },
{ UVM_PERF_EVENT_MODULE_UNLOAD, prefetch_block_destroy_cb },
{ UVM_PERF_EVENT_BLOCK_SHRINK, prefetch_block_destroy_cb }
};
// Get the prefetch detection struct for the given block
static block_prefetch_info_t *prefetch_info_get(uvm_va_block_t *va_block)
{
return uvm_perf_module_type_data(va_block->perf_modules_data, UVM_PERF_MODULE_TYPE_PREFETCH);
}
static void prefetch_info_destroy(uvm_va_block_t *va_block)
{
block_prefetch_info_t *prefetch_info = prefetch_info_get(va_block);
if (prefetch_info) {
kmem_cache_free(g_prefetch_info_cache, prefetch_info);
uvm_perf_module_type_unset_data(va_block->perf_modules_data, UVM_PERF_MODULE_TYPE_PREFETCH);
}
}
// Get the prefetch detection struct for the given block or create it if it
// does not exist
static block_prefetch_info_t *prefetch_info_get_create(uvm_va_block_t *va_block)
{
block_prefetch_info_t *prefetch_info = prefetch_info_get(va_block);
if (!prefetch_info) {
// Create some ghost leaves so we can align the tree to big page boundary. We use the
// largest page size to handle the worst-case scenario
size_t big_page_size = UVM_PAGE_SIZE_128K;
uvm_va_block_region_t big_pages_region = uvm_va_block_big_page_region_all(va_block, big_page_size);
size_t num_leaves = uvm_va_block_num_cpu_pages(va_block);
// If the va block is not big enough to fit 128KB pages, maybe it still can fit 64KB pages
if (big_pages_region.outer == 0) {
big_page_size = UVM_PAGE_SIZE_64K;
big_pages_region = uvm_va_block_big_page_region_all(va_block, big_page_size);
}
if (big_pages_region.first > 0)
num_leaves += (big_page_size / PAGE_SIZE - big_pages_region.first);
UVM_ASSERT(num_leaves <= PAGES_PER_UVM_VA_BLOCK);
prefetch_info = nv_kmem_cache_zalloc(g_prefetch_info_cache, NV_UVM_GFP_FLAGS);
if (!prefetch_info)
goto fail;
prefetch_info->last_migration_proc_id = UVM_ID_INVALID;
uvm_va_block_bitmap_tree_init_from_page_count(&prefetch_info->bitmap_tree, num_leaves);
uvm_perf_module_type_set_data(va_block->perf_modules_data, prefetch_info, UVM_PERF_MODULE_TYPE_PREFETCH);
}
return prefetch_info;
fail:
prefetch_info_destroy(va_block);
return NULL;
}
static void grow_fault_granularity_if_no_thrashing(block_prefetch_info_t *prefetch_info,
static void grow_fault_granularity_if_no_thrashing(uvm_perf_prefetch_bitmap_tree_t *bitmap_tree,
uvm_va_block_region_t region,
uvm_page_index_t first,
const uvm_page_mask_t *faulted_pages,
const uvm_page_mask_t *thrashing_pages)
{
if (!uvm_page_mask_region_empty(faulted_pages, region) &&
(!thrashing_pages || uvm_page_mask_region_empty(thrashing_pages, region))) {
region.first += prefetch_info->region.first;
region.outer += prefetch_info->region.first;
uvm_page_mask_region_fill(&prefetch_info->bitmap_tree.pages, region);
UVM_ASSERT(region.first >= first);
region.first = region.first - first + bitmap_tree->offset;
region.outer = region.outer - first + bitmap_tree->offset;
UVM_ASSERT(region.outer <= bitmap_tree->leaf_count);
uvm_page_mask_region_fill(&bitmap_tree->pages, region);
}
}
static void grow_fault_granularity(uvm_va_block_t *va_block,
block_prefetch_info_t *prefetch_info,
static void grow_fault_granularity(uvm_perf_prefetch_bitmap_tree_t *bitmap_tree,
NvU32 big_page_size,
uvm_va_block_region_t big_pages_region,
uvm_va_block_region_t max_prefetch_region,
const uvm_page_mask_t *faulted_pages,
const uvm_page_mask_t *thrashing_pages)
{
size_t num_big_pages;
size_t big_page_index;
uvm_va_block_region_t block_region = uvm_va_block_region_from_block(va_block);
uvm_page_index_t pages_per_big_page = big_page_size / PAGE_SIZE;
uvm_page_index_t page_index;
// Migrate whole block if no big pages and no page in it is thrashing
if (!big_pages_region.outer) {
grow_fault_granularity_if_no_thrashing(bitmap_tree,
max_prefetch_region,
max_prefetch_region.first,
faulted_pages,
thrashing_pages);
return;
}
// Migrate whole "prefix" if no page in it is thrashing
if (prefetch_info->big_pages_region.first > 0) {
uvm_va_block_region_t prefix_region = uvm_va_block_region(0, prefetch_info->big_pages_region.first);
if (big_pages_region.first > max_prefetch_region.first) {
uvm_va_block_region_t prefix_region = uvm_va_block_region(max_prefetch_region.first, big_pages_region.first);
grow_fault_granularity_if_no_thrashing(prefetch_info, prefix_region, faulted_pages, thrashing_pages);
grow_fault_granularity_if_no_thrashing(bitmap_tree,
prefix_region,
max_prefetch_region.first,
faulted_pages,
thrashing_pages);
}
// Migrate whole big pages if they are not thrashing
num_big_pages = uvm_va_block_num_big_pages(va_block, prefetch_info->big_page_size);
for (big_page_index = 0; big_page_index < num_big_pages; ++big_page_index) {
uvm_va_block_region_t big_region = uvm_va_block_big_page_region(va_block,
big_page_index,
prefetch_info->big_page_size);
for (page_index = big_pages_region.first;
page_index < big_pages_region.outer;
page_index += pages_per_big_page) {
uvm_va_block_region_t big_region = uvm_va_block_region(page_index,
page_index + pages_per_big_page);
grow_fault_granularity_if_no_thrashing(prefetch_info, big_region, faulted_pages, thrashing_pages);
grow_fault_granularity_if_no_thrashing(bitmap_tree,
big_region,
max_prefetch_region.first,
faulted_pages,
thrashing_pages);
}
// Migrate whole "suffix" if no page in it is thrashing
if (prefetch_info->big_pages_region.outer < block_region.outer) {
uvm_va_block_region_t suffix_region = uvm_va_block_region(prefetch_info->big_pages_region.outer,
block_region.outer);
if (big_pages_region.outer < max_prefetch_region.outer) {
uvm_va_block_region_t suffix_region = uvm_va_block_region(big_pages_region.outer,
max_prefetch_region.outer);
grow_fault_granularity_if_no_thrashing(prefetch_info, suffix_region, faulted_pages, thrashing_pages);
grow_fault_granularity_if_no_thrashing(bitmap_tree,
suffix_region,
max_prefetch_region.first,
faulted_pages,
thrashing_pages);
}
}
// Within a block we only allow prefetching to a single processor. Therefore, if two processors
// are accessing non-overlapping regions within the same block they won't benefit from
// prefetching.
// Within a block we only allow prefetching to a single processor. Therefore,
// if two processors are accessing non-overlapping regions within the same
// block they won't benefit from prefetching.
//
// TODO: Bug 1778034: [uvm] Explore prefetching to different processors within a VA block
void uvm_perf_prefetch_prenotify_fault_migrations(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
uvm_processor_id_t new_residency,
const uvm_page_mask_t *faulted_pages,
uvm_va_block_region_t region)
// TODO: Bug 1778034: [uvm] Explore prefetching to different processors within
// a VA block.
static NvU32 uvm_perf_prefetch_prenotify_fault_migrations(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
uvm_processor_id_t new_residency,
const uvm_page_mask_t *faulted_pages,
uvm_va_block_region_t faulted_region,
uvm_page_mask_t *prefetch_pages,
uvm_perf_prefetch_bitmap_tree_t *bitmap_tree)
{
uvm_page_index_t page_index;
block_prefetch_info_t *prefetch_info;
const uvm_page_mask_t *resident_mask = NULL;
const uvm_page_mask_t *thrashing_pages = NULL;
uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
uvm_va_policy_t *policy = va_block_context->policy;
uvm_va_block_region_t max_prefetch_region;
NvU32 big_page_size;
uvm_va_block_region_t big_pages_region;
uvm_assert_rwsem_locked(&va_space->lock);
if (!g_uvm_perf_prefetch_enable)
return;
prefetch_info = prefetch_info_get_create(va_block);
if (!prefetch_info)
return;
if (!uvm_id_equal(prefetch_info->last_migration_proc_id, new_residency)) {
prefetch_info->last_migration_proc_id = new_residency;
prefetch_info->fault_migrations_to_last_proc = 0;
if (!uvm_id_equal(va_block->prefetch_info.last_migration_proc_id, new_residency)) {
va_block->prefetch_info.last_migration_proc_id = new_residency;
va_block->prefetch_info.fault_migrations_to_last_proc = 0;
}
prefetch_info->pending_prefetch_pages = 0;
// Compute the expanded region that prefetching is allowed from.
if (uvm_va_block_is_hmm(va_block)) {
max_prefetch_region = uvm_hmm_get_prefetch_region(va_block,
va_block_context,
uvm_va_block_region_start(va_block, faulted_region));
}
else {
max_prefetch_region = uvm_va_block_region_from_block(va_block);
}
uvm_page_mask_zero(prefetch_pages);
if (UVM_ID_IS_CPU(new_residency) || va_block->gpus[uvm_id_gpu_index(new_residency)] != NULL)
resident_mask = uvm_va_block_resident_mask_get(va_block, new_residency);
// If this is a first-touch fault and the destination processor is the
// preferred location, populate the whole VA block
// preferred location, populate the whole max_prefetch_region.
if (uvm_processor_mask_empty(&va_block->resident) &&
uvm_id_equal(new_residency, policy->preferred_location)) {
uvm_page_mask_region_fill(&prefetch_info->prefetch_pages, uvm_va_block_region_from_block(va_block));
uvm_page_mask_region_fill(prefetch_pages, max_prefetch_region);
goto done;
}
if (resident_mask)
uvm_page_mask_or(&prefetch_info->bitmap_tree.pages, resident_mask, faulted_pages);
uvm_page_mask_or(&bitmap_tree->pages, resident_mask, faulted_pages);
else
uvm_page_mask_copy(&prefetch_info->bitmap_tree.pages, faulted_pages);
uvm_page_mask_copy(&bitmap_tree->pages, faulted_pages);
// Get the big page size for the new residency
// If we are using a subregion of the va_block, align bitmap_tree
uvm_page_mask_shift_right(&bitmap_tree->pages, &bitmap_tree->pages, max_prefetch_region.first);
// Get the big page size for the new residency.
// Assume 64K size if the new residency is the CPU or no GPU va space is
// registered in the current process for this GPU.
if (UVM_ID_IS_GPU(new_residency) &&
uvm_processor_mask_test(&va_space->registered_gpu_va_spaces, new_residency)) {
uvm_gpu_t *gpu = uvm_va_space_get_gpu(va_space, new_residency);
prefetch_info->big_page_size = uvm_va_block_gpu_big_page_size(va_block, gpu);
big_page_size = uvm_va_block_gpu_big_page_size(va_block, gpu);
}
else {
prefetch_info->big_page_size = UVM_PAGE_SIZE_64K;
big_page_size = UVM_PAGE_SIZE_64K;
}
big_pages_region = uvm_va_block_big_page_region_subset(va_block, max_prefetch_region, big_page_size);
// Adjust the prefetch tree to big page granularity to make sure that we
// get big page-friendly prefetching hints
prefetch_info->big_pages_region = uvm_va_block_big_page_region_all(va_block, prefetch_info->big_page_size);
if (prefetch_info->big_pages_region.first > 0) {
prefetch_info->region.first = prefetch_info->big_page_size / PAGE_SIZE - prefetch_info->big_pages_region.first;
if (big_pages_region.first - max_prefetch_region.first > 0) {
bitmap_tree->offset = big_page_size / PAGE_SIZE - (big_pages_region.first - max_prefetch_region.first);
bitmap_tree->leaf_count = uvm_va_block_region_num_pages(max_prefetch_region) + bitmap_tree->offset;
uvm_page_mask_shift_left(&prefetch_info->bitmap_tree.pages,
&prefetch_info->bitmap_tree.pages,
prefetch_info->region.first);
UVM_ASSERT(bitmap_tree->offset < big_page_size / PAGE_SIZE);
UVM_ASSERT(bitmap_tree->leaf_count <= PAGES_PER_UVM_VA_BLOCK);
uvm_page_mask_shift_left(&bitmap_tree->pages, &bitmap_tree->pages, bitmap_tree->offset);
}
else {
prefetch_info->region.first = 0;
bitmap_tree->offset = 0;
bitmap_tree->leaf_count = uvm_va_block_region_num_pages(max_prefetch_region);
}
prefetch_info->region.outer = prefetch_info->region.first + uvm_va_block_num_cpu_pages(va_block);
bitmap_tree->level_count = ilog2(roundup_pow_of_two(bitmap_tree->leaf_count)) + 1;
thrashing_pages = uvm_perf_thrashing_get_thrashing_pages(va_block);
// Assume big pages by default. Prefetch the rest of 4KB subregions within the big page
// region unless there is thrashing.
grow_fault_granularity(va_block, prefetch_info, faulted_pages, thrashing_pages);
// Assume big pages by default. Prefetch the rest of 4KB subregions within
// the big page region unless there is thrashing.
grow_fault_granularity(bitmap_tree,
big_page_size,
big_pages_region,
max_prefetch_region,
faulted_pages,
thrashing_pages);
// Do not compute prefetch regions with faults on pages that are thrashing
if (thrashing_pages)
uvm_page_mask_andnot(&prefetch_info->migrate_pages, faulted_pages, thrashing_pages);
uvm_page_mask_andnot(&va_block_context->scratch_page_mask, faulted_pages, thrashing_pages);
else
uvm_page_mask_copy(&prefetch_info->migrate_pages, faulted_pages);
uvm_page_mask_copy(&va_block_context->scratch_page_mask, faulted_pages);
// Update the tree using the migration mask to compute the pages to prefetch
uvm_page_mask_zero(&prefetch_info->prefetch_pages);
for_each_va_block_page_in_region_mask(page_index, &prefetch_info->migrate_pages, region) {
uvm_va_block_region_t prefetch_region = compute_prefetch_region(page_index + prefetch_info->region.first,
prefetch_info);
uvm_page_mask_region_fill(&prefetch_info->prefetch_pages, prefetch_region);
// Update the tree using the scratch mask to compute the pages to prefetch
for_each_va_block_page_in_region_mask(page_index, &va_block_context->scratch_page_mask, faulted_region) {
uvm_va_block_region_t region = compute_prefetch_region(page_index, bitmap_tree, max_prefetch_region);
uvm_page_mask_region_fill(prefetch_pages, region);
// Early out if we have already prefetched until the end of the VA block
if (prefetch_region.outer == prefetch_info->region.outer)
if (region.outer == max_prefetch_region.outer)
break;
}
// Adjust prefetching page mask
if (prefetch_info->region.first > 0) {
uvm_page_mask_shift_right(&prefetch_info->prefetch_pages,
&prefetch_info->prefetch_pages,
prefetch_info->region.first);
}
done:
// Do not prefetch pages that are going to be migrated/populated due to a
// fault
uvm_page_mask_andnot(&prefetch_info->prefetch_pages,
&prefetch_info->prefetch_pages,
faulted_pages);
uvm_page_mask_andnot(prefetch_pages, prefetch_pages, faulted_pages);
// TODO: Bug 1765432: prefetching pages that are already mapped on the CPU
// would trigger a remap, which may cause a large overhead. Therefore,
// exclude them from the mask.
if (UVM_ID_IS_CPU(new_residency)) {
// For HMM, we don't know what pages are mapped by the CPU unless we try to
// migrate them. Prefetch pages will only be opportunistically migrated.
if (UVM_ID_IS_CPU(new_residency) && !uvm_va_block_is_hmm(va_block)) {
uvm_page_mask_and(&va_block_context->scratch_page_mask,
resident_mask,
&va_block->cpu.pte_bits[UVM_PTE_BITS_CPU_READ]);
uvm_page_mask_andnot(&prefetch_info->prefetch_pages,
&prefetch_info->prefetch_pages,
&va_block_context->scratch_page_mask);
uvm_page_mask_andnot(prefetch_pages, prefetch_pages, &va_block_context->scratch_page_mask);
}
// Avoid prefetching pages that are thrashing
if (thrashing_pages) {
uvm_page_mask_andnot(&prefetch_info->prefetch_pages,
&prefetch_info->prefetch_pages,
thrashing_pages);
}
if (thrashing_pages)
uvm_page_mask_andnot(prefetch_pages, prefetch_pages, thrashing_pages);
prefetch_info->fault_migrations_to_last_proc += uvm_page_mask_region_weight(faulted_pages, region);
prefetch_info->pending_prefetch_pages = uvm_page_mask_weight(&prefetch_info->prefetch_pages);
va_block->prefetch_info.fault_migrations_to_last_proc += uvm_page_mask_region_weight(faulted_pages, faulted_region);
return uvm_page_mask_weight(prefetch_pages);
}
uvm_perf_prefetch_hint_t uvm_perf_prefetch_get_hint(uvm_va_block_t *va_block,
const uvm_page_mask_t *new_residency_mask)
void uvm_perf_prefetch_get_hint(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
uvm_processor_id_t new_residency,
const uvm_page_mask_t *faulted_pages,
uvm_va_block_region_t faulted_region,
uvm_perf_prefetch_bitmap_tree_t *bitmap_tree,
uvm_perf_prefetch_hint_t *out_hint)
{
uvm_perf_prefetch_hint_t ret = UVM_PERF_PREFETCH_HINT_NONE();
block_prefetch_info_t *prefetch_info;
uvm_va_policy_t *policy = va_block_context->policy;
uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
uvm_page_mask_t *prefetch_pages = &out_hint->prefetch_pages_mask;
NvU32 pending_prefetch_pages;
uvm_assert_rwsem_locked(&va_space->lock);
uvm_assert_mutex_locked(&va_block->lock);
UVM_ASSERT(uvm_va_block_check_policy_is_valid(va_block, policy, faulted_region));
UVM_ASSERT(uvm_hmm_va_block_context_vma_is_valid(va_block, va_block_context, faulted_region));
out_hint->residency = UVM_ID_INVALID;
if (!g_uvm_perf_prefetch_enable)
return ret;
return;
if (!va_space->test.page_prefetch_enabled)
return ret;
return;
prefetch_info = prefetch_info_get(va_block);
if (!prefetch_info)
return ret;
pending_prefetch_pages = uvm_perf_prefetch_prenotify_fault_migrations(va_block,
va_block_context,
new_residency,
faulted_pages,
faulted_region,
prefetch_pages,
bitmap_tree);
if (prefetch_info->fault_migrations_to_last_proc >= g_uvm_perf_prefetch_min_faults &&
prefetch_info->pending_prefetch_pages > 0) {
if (va_block->prefetch_info.fault_migrations_to_last_proc >= g_uvm_perf_prefetch_min_faults &&
pending_prefetch_pages > 0) {
bool changed = false;
uvm_range_group_range_t *rgr;
@ -402,62 +410,19 @@ uvm_perf_prefetch_hint_t uvm_perf_prefetch_get_hint(uvm_va_block_t *va_block,
max(rgr->node.start, va_block->start),
min(rgr->node.end, va_block->end));
if (uvm_page_mask_region_empty(new_residency_mask, region) &&
!uvm_page_mask_region_empty(&prefetch_info->prefetch_pages, region)) {
uvm_page_mask_region_clear(&prefetch_info->prefetch_pages, region);
if (uvm_page_mask_region_empty(faulted_pages, region) &&
!uvm_page_mask_region_empty(prefetch_pages, region)) {
uvm_page_mask_region_clear(prefetch_pages, region);
changed = true;
}
}
if (changed)
prefetch_info->pending_prefetch_pages = uvm_page_mask_weight(&prefetch_info->prefetch_pages);
pending_prefetch_pages = uvm_page_mask_weight(prefetch_pages);
if (prefetch_info->pending_prefetch_pages > 0) {
ret.residency = prefetch_info->last_migration_proc_id;
ret.prefetch_pages_mask = &prefetch_info->prefetch_pages;
}
if (pending_prefetch_pages > 0)
out_hint->residency = va_block->prefetch_info.last_migration_proc_id;
}
return ret;
}
void prefetch_block_destroy_cb(uvm_perf_event_t event_id, uvm_perf_event_data_t *event_data)
{
uvm_va_block_t *va_block;
UVM_ASSERT(g_uvm_perf_prefetch_enable);
UVM_ASSERT(event_id == UVM_PERF_EVENT_BLOCK_DESTROY ||
event_id == UVM_PERF_EVENT_MODULE_UNLOAD ||
event_id == UVM_PERF_EVENT_BLOCK_SHRINK);
if (event_id == UVM_PERF_EVENT_BLOCK_DESTROY)
va_block = event_data->block_destroy.block;
else if (event_id == UVM_PERF_EVENT_BLOCK_SHRINK)
va_block = event_data->block_shrink.block;
else
va_block = event_data->module_unload.block;
if (!va_block)
return;
prefetch_info_destroy(va_block);
}
NV_STATUS uvm_perf_prefetch_load(uvm_va_space_t *va_space)
{
if (!g_uvm_perf_prefetch_enable)
return NV_OK;
return uvm_perf_module_load(&g_module_prefetch, va_space);
}
void uvm_perf_prefetch_unload(uvm_va_space_t *va_space)
{
if (!g_uvm_perf_prefetch_enable)
return;
uvm_perf_module_unload(&g_module_prefetch, va_space);
}
NV_STATUS uvm_perf_prefetch_init()
@ -467,13 +432,6 @@ NV_STATUS uvm_perf_prefetch_init()
if (!g_uvm_perf_prefetch_enable)
return NV_OK;
uvm_perf_module_init("perf_prefetch", UVM_PERF_MODULE_TYPE_PREFETCH, g_callbacks_prefetch,
ARRAY_SIZE(g_callbacks_prefetch), &g_module_prefetch);
g_prefetch_info_cache = NV_KMEM_CACHE_CREATE("block_prefetch_info_t", block_prefetch_info_t);
if (!g_prefetch_info_cache)
return NV_ERR_NO_MEMORY;
if (uvm_perf_prefetch_threshold <= 100) {
g_uvm_perf_prefetch_threshold = uvm_perf_prefetch_threshold;
}
@ -498,14 +456,6 @@ NV_STATUS uvm_perf_prefetch_init()
return NV_OK;
}
void uvm_perf_prefetch_exit()
{
if (!g_uvm_perf_prefetch_enable)
return;
kmem_cache_destroy_safe(&g_prefetch_info_cache);
}
NV_STATUS uvm_test_set_page_prefetch_policy(UVM_TEST_SET_PAGE_PREFETCH_POLICY_PARAMS *params, struct file *filp)
{
uvm_va_space_t *va_space = uvm_va_space_get(filp);

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2016-2019 NVIDIA Corporation
Copyright (c) 2016-2022 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -30,32 +30,66 @@
typedef struct
{
const uvm_page_mask_t *prefetch_pages_mask;
uvm_page_mask_t prefetch_pages_mask;
uvm_processor_id_t residency;
} uvm_perf_prefetch_hint_t;
// Global initialization/cleanup functions
// Encapsulates a counter tree built on top of a page mask bitmap in which each
// leaf represents a page in the block. It contains leaf_count and level_count
// so that it can use some macros for perf trees.
typedef struct
{
uvm_page_mask_t pages;
uvm_page_index_t offset;
NvU16 leaf_count;
NvU8 level_count;
} uvm_perf_prefetch_bitmap_tree_t;
// Iterator for the bitmap tree. It contains level_idx and node_idx so that it
// can use some macros for perf trees.
typedef struct
{
s8 level_idx;
uvm_page_index_t node_idx;
} uvm_perf_prefetch_bitmap_tree_iter_t;
// Global initialization function (no clean up needed).
NV_STATUS uvm_perf_prefetch_init(void);
void uvm_perf_prefetch_exit(void);
// VA space Initialization/cleanup functions
NV_STATUS uvm_perf_prefetch_load(uvm_va_space_t *va_space);
void uvm_perf_prefetch_unload(uvm_va_space_t *va_space);
// Return a hint with the pages that may be prefetched in the block.
// The faulted_pages mask and faulted_region are the pages being migrated to
// the given residency.
// va_block_context must not be NULL, va_block_context->policy must be valid,
// and if the va_block is a HMM block, va_block_context->hmm.vma must be valid
// which also means the va_block_context->mm is not NULL, retained, and locked
// for at least read.
// Locking: The caller must hold the va_space lock and va_block lock.
void uvm_perf_prefetch_get_hint(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
uvm_processor_id_t new_residency,
const uvm_page_mask_t *faulted_pages,
uvm_va_block_region_t faulted_region,
uvm_perf_prefetch_bitmap_tree_t *bitmap_tree,
uvm_perf_prefetch_hint_t *out_hint);
// Obtain a hint with the pages that may be prefetched in the block
uvm_perf_prefetch_hint_t uvm_perf_prefetch_get_hint(uvm_va_block_t *va_block,
const uvm_page_mask_t *new_residency_mask);
void uvm_perf_prefetch_bitmap_tree_iter_init(const uvm_perf_prefetch_bitmap_tree_t *bitmap_tree,
uvm_page_index_t page_index,
uvm_perf_prefetch_bitmap_tree_iter_t *iter);
uvm_va_block_region_t uvm_perf_prefetch_bitmap_tree_iter_get_range(const uvm_perf_prefetch_bitmap_tree_t *bitmap_tree,
const uvm_perf_prefetch_bitmap_tree_iter_t *iter);
NvU16 uvm_perf_prefetch_bitmap_tree_iter_get_count(const uvm_perf_prefetch_bitmap_tree_t *bitmap_tree,
const uvm_perf_prefetch_bitmap_tree_iter_t *iter);
// Notify that the given mask of pages within region is going to migrate to
// the given residency. The caller must hold the va_space lock.
void uvm_perf_prefetch_prenotify_fault_migrations(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
uvm_processor_id_t new_residency,
const uvm_page_mask_t *migrate_pages,
uvm_va_block_region_t region);
#define UVM_PERF_PREFETCH_HINT_NONE() \
(uvm_perf_prefetch_hint_t){ NULL, UVM_ID_INVALID }
#define uvm_perf_prefetch_bitmap_tree_traverse_counters(counter,tree,page,iter) \
for (uvm_perf_prefetch_bitmap_tree_iter_init((tree), (page), (iter)), \
(counter) = uvm_perf_prefetch_bitmap_tree_iter_get_count((tree), (iter)); \
(iter)->level_idx >= 0; \
(counter) = --(iter)->level_idx < 0? 0: \
uvm_perf_prefetch_bitmap_tree_iter_get_count((tree), (iter)))
#endif

View File

@ -458,7 +458,7 @@ static void cpu_thrashing_stats_exit(void)
{
if (g_cpu_thrashing_stats.procfs_file) {
UVM_ASSERT(uvm_procfs_is_debug_enabled());
uvm_procfs_destroy_entry(g_cpu_thrashing_stats.procfs_file);
proc_remove(g_cpu_thrashing_stats.procfs_file);
g_cpu_thrashing_stats.procfs_file = NULL;
}
}
@ -522,7 +522,7 @@ static void gpu_thrashing_stats_destroy(uvm_gpu_t *gpu)
uvm_perf_module_type_unset_data(gpu->perf_modules_data, UVM_PERF_MODULE_TYPE_THRASHING);
if (gpu_thrashing->procfs_file)
uvm_procfs_destroy_entry(gpu_thrashing->procfs_file);
proc_remove(gpu_thrashing->procfs_file);
uvm_kvfree(gpu_thrashing);
}
@ -652,7 +652,6 @@ done:
static void thrashing_reset_pages_in_region(uvm_va_block_t *va_block, NvU64 address, NvU64 bytes);
// Destroy the thrashing detection struct for the given block
void uvm_perf_thrashing_info_destroy(uvm_va_block_t *va_block)
{
block_thrashing_info_t *block_thrashing = thrashing_info_get(va_block);
@ -1066,11 +1065,11 @@ static void thrashing_reset_pages_in_region(uvm_va_block_t *va_block, NvU64 addr
// Unmap remote mappings from the given processors on the pinned pages
// described by region and block_thrashing->pinned pages.
static NV_STATUS unmap_remote_pinned_pages_from_processors(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
block_thrashing_info_t *block_thrashing,
uvm_va_block_region_t region,
const uvm_processor_mask_t *unmap_processors)
static NV_STATUS unmap_remote_pinned_pages(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
block_thrashing_info_t *block_thrashing,
uvm_va_block_region_t region,
const uvm_processor_mask_t *unmap_processors)
{
NV_STATUS status = NV_OK;
NV_STATUS tracker_status;
@ -1116,17 +1115,16 @@ static NV_STATUS unmap_remote_pinned_pages_from_processors(uvm_va_block_t *va_bl
return status;
}
// Unmap remote mappings from all processors on the pinned pages
// described by region and block_thrashing->pinned pages.
NV_STATUS unmap_remote_pinned_pages_from_all_processors(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
uvm_va_block_region_t region)
NV_STATUS uvm_perf_thrashing_unmap_remote_pinned_pages_all(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
uvm_va_block_region_t region)
{
block_thrashing_info_t *block_thrashing;
uvm_processor_mask_t unmap_processors;
uvm_va_policy_t *policy;
uvm_va_policy_t *policy = va_block_context->policy;
uvm_assert_mutex_locked(&va_block->lock);
UVM_ASSERT(uvm_va_block_check_policy_is_valid(va_block, policy, region));
block_thrashing = thrashing_info_get(va_block);
if (!block_thrashing || !block_thrashing->pages)
@ -1137,15 +1135,9 @@ NV_STATUS unmap_remote_pinned_pages_from_all_processors(uvm_va_block_t *va_block
// Unmap all mapped processors (that are not SetAccessedBy) with
// no copy of the page
policy = uvm_va_policy_get(va_block, uvm_va_block_region_start(va_block, region));
uvm_processor_mask_andnot(&unmap_processors, &va_block->mapped, &policy->accessed_by);
return unmap_remote_pinned_pages_from_processors(va_block,
va_block_context,
block_thrashing,
region,
&unmap_processors);
return unmap_remote_pinned_pages(va_block, va_block_context, block_thrashing, region, &unmap_processors);
}
// Check that we are not migrating pages away from its pinned location and
@ -1246,7 +1238,7 @@ void thrashing_event_cb(uvm_perf_event_t event_id, uvm_perf_event_data_t *event_
if (!va_space_thrashing->params.enable)
return;
// TODO: Bug 2046423: HMM will need to look up the policy when
// TODO: Bug 3660922: HMM will need to look up the policy when
// read duplication is supported.
read_duplication = uvm_va_block_is_hmm(va_block) ?
UVM_READ_DUPLICATION_UNSET :
@ -1796,6 +1788,7 @@ static void thrashing_unpin_pages(struct work_struct *work)
struct delayed_work *dwork = to_delayed_work(work);
va_space_thrashing_info_t *va_space_thrashing = container_of(dwork, va_space_thrashing_info_t, pinned_pages.dwork);
uvm_va_space_t *va_space = va_space_thrashing->va_space;
uvm_va_block_context_t *va_block_context = &va_space_thrashing->pinned_pages.va_block_context;
UVM_ASSERT(uvm_va_space_initialized(va_space) == NV_OK);
@ -1857,12 +1850,13 @@ static void thrashing_unpin_pages(struct work_struct *work)
UVM_ASSERT(block_thrashing);
UVM_ASSERT(uvm_page_mask_test(&block_thrashing->pinned_pages.mask, page_index));
va_space_thrashing->pinned_pages.va_block_context.policy =
uvm_va_block_context_init(va_block_context, NULL);
va_block_context->policy =
uvm_va_policy_get(va_block, uvm_va_block_cpu_page_address(va_block, page_index));
unmap_remote_pinned_pages_from_all_processors(va_block,
&va_space_thrashing->pinned_pages.va_block_context,
uvm_va_block_region_for_page(page_index));
uvm_perf_thrashing_unmap_remote_pinned_pages_all(va_block,
va_block_context,
uvm_va_block_region_for_page(page_index));
thrashing_reset_page(va_space_thrashing, va_block, block_thrashing, page_index);
}
@ -2105,11 +2099,10 @@ NV_STATUS uvm_test_set_page_thrashing_policy(UVM_TEST_SET_PAGE_THRASHING_POLICY_
// Unmap may split PTEs and require a retry. Needs to be called
// before the pinned pages information is destroyed.
status = UVM_VA_BLOCK_RETRY_LOCKED(va_block,
NULL,
unmap_remote_pinned_pages_from_all_processors(va_block,
block_context,
va_block_region));
status = UVM_VA_BLOCK_RETRY_LOCKED(va_block, NULL,
uvm_perf_thrashing_unmap_remote_pinned_pages_all(va_block,
block_context,
va_block_region));
uvm_perf_thrashing_info_destroy(va_block);

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2016-2019 NVIDIA Corporation
Copyright (c) 2016-2022 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -108,8 +108,11 @@ void uvm_perf_thrashing_info_destroy(uvm_va_block_t *va_block);
// Unmap remote mappings from all processors on the pinned pages
// described by region and block_thrashing->pinned pages.
NV_STATUS unmap_remote_pinned_pages_from_all_processors(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
uvm_va_block_region_t region);
// va_block_context must not be NULL and va_block_context->policy must be valid.
// See the comments for uvm_va_block_check_policy_is_valid() in uvm_va_block.h.
// Locking: the va_block lock must be held.
NV_STATUS uvm_perf_thrashing_unmap_remote_pinned_pages_all(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
uvm_va_block_region_t region);
#endif

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015 NVIDIA Corporation
Copyright (c) 2015-2022 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -23,6 +23,7 @@
#include "uvm_perf_utils.h"
#include "uvm_va_block.h"
#include "uvm_perf_prefetch.h"
#include "uvm_test.h"
static NV_STATUS test_saturating_counter_basic(void)
@ -681,10 +682,12 @@ fail:
static NV_STATUS test_bitmap_tree_traversal(void)
{
int value;
uvm_va_block_bitmap_tree_t tree;
uvm_va_block_bitmap_tree_iter_t iter;
uvm_perf_prefetch_bitmap_tree_t tree;
uvm_perf_prefetch_bitmap_tree_iter_t iter;
uvm_va_block_bitmap_tree_init_from_page_count(&tree, 9);
tree.leaf_count = 9;
tree.level_count = ilog2(roundup_pow_of_two(tree.leaf_count)) + 1;
uvm_page_mask_zero(&tree.pages);
TEST_CHECK_RET(tree.level_count == 5);
TEST_CHECK_RET(tree.leaf_count == 9);
@ -695,7 +698,7 @@ static NV_STATUS test_bitmap_tree_traversal(void)
uvm_page_mask_set(&tree.pages, 7);
uvm_page_mask_set(&tree.pages, 8);
uvm_va_block_bitmap_tree_traverse_counters(value, &tree, 6, &iter) {
uvm_perf_prefetch_bitmap_tree_traverse_counters(value, &tree, 6, &iter) {
if (iter.level_idx == 4)
TEST_CHECK_RET(value == 0);
else if (iter.level_idx == 3)

View File

@ -591,19 +591,16 @@ error:
return status;
}
NV_STATUS uvm_pmm_gpu_alloc_kernel(uvm_pmm_gpu_t *pmm,
size_t num_chunks,
uvm_chunk_size_t chunk_size,
uvm_pmm_alloc_flags_t flags,
uvm_gpu_chunk_t **chunks,
uvm_tracker_t *out_tracker)
static NV_STATUS pmm_gpu_alloc_kernel(uvm_pmm_gpu_t *pmm,
size_t num_chunks,
uvm_chunk_size_t chunk_size,
uvm_pmm_gpu_memory_type_t memory_type,
uvm_pmm_alloc_flags_t flags,
uvm_gpu_chunk_t **chunks,
uvm_tracker_t *out_tracker)
{
uvm_gpu_t *gpu = uvm_pmm_to_gpu(pmm);
NV_STATUS status;
size_t i;
uvm_pmm_gpu_memory_type_t memory_type = UVM_PMM_GPU_MEMORY_TYPE_KERNEL;
status = uvm_pmm_gpu_alloc(pmm, num_chunks, chunk_size, memory_type, flags, chunks, out_tracker);
NV_STATUS status = uvm_pmm_gpu_alloc(pmm, num_chunks, chunk_size, memory_type, flags, chunks, out_tracker);
if (status != NV_OK)
return status;
@ -618,6 +615,18 @@ NV_STATUS uvm_pmm_gpu_alloc_kernel(uvm_pmm_gpu_t *pmm,
return NV_OK;
}
NV_STATUS uvm_pmm_gpu_alloc_kernel(uvm_pmm_gpu_t *pmm,
size_t num_chunks,
uvm_chunk_size_t chunk_size,
uvm_pmm_alloc_flags_t flags,
uvm_gpu_chunk_t **chunks,
uvm_tracker_t *out_tracker)
{
uvm_pmm_gpu_memory_type_t memory_type = UVM_PMM_GPU_MEMORY_TYPE_KERNEL;
return pmm_gpu_alloc_kernel(pmm, num_chunks, chunk_size, memory_type, flags, chunks, out_tracker);
}
static void chunk_update_lists_locked(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
{
uvm_gpu_root_chunk_t *root_chunk = root_chunk_from_chunk(pmm, chunk);
@ -1174,7 +1183,7 @@ static void root_chunk_unmap_indirect_peer(uvm_pmm_gpu_t *pmm, uvm_gpu_root_chun
if (status != NV_OK)
UVM_ASSERT(uvm_global_get_status() != NV_OK);
uvm_gpu_unmap_cpu_pages(other_gpu, indirect_peer->dma_addrs[index], UVM_CHUNK_SIZE_MAX);
uvm_gpu_unmap_cpu_pages(other_gpu->parent, indirect_peer->dma_addrs[index], UVM_CHUNK_SIZE_MAX);
uvm_processor_mask_clear(&root_chunk->indirect_peers_mapped, other_gpu->id);
new_count = atomic64_dec_return(&indirect_peer->map_count);
UVM_ASSERT(new_count >= 0);
@ -1304,7 +1313,7 @@ NV_STATUS uvm_pmm_gpu_indirect_peer_map(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chu
root_chunk_lock(pmm, root_chunk);
if (!uvm_processor_mask_test(&root_chunk->indirect_peers_mapped, accessing_gpu->id)) {
status = uvm_gpu_map_cpu_pages(accessing_gpu,
status = uvm_gpu_map_cpu_pages(accessing_gpu->parent,
uvm_gpu_chunk_to_page(pmm, &root_chunk->chunk),
UVM_CHUNK_SIZE_MAX,
&indirect_peer->dma_addrs[index]);
@ -2705,7 +2714,8 @@ static NV_STATUS uvm_pmm_gpu_pma_evict_pages(void *void_pmm,
NvU64 *pages,
NvU32 num_pages_to_evict,
NvU64 phys_start,
NvU64 phys_end)
NvU64 phys_end,
UVM_PMA_GPU_MEMORY_TYPE mem_type)
{
NV_STATUS status;
uvm_pmm_gpu_t *pmm = (uvm_pmm_gpu_t *)void_pmm;
@ -2804,14 +2814,15 @@ static NV_STATUS uvm_pmm_gpu_pma_evict_pages_wrapper(void *void_pmm,
NvU64 *pages,
NvU32 num_pages_to_evict,
NvU64 phys_start,
NvU64 phys_end)
NvU64 phys_end,
UVM_PMA_GPU_MEMORY_TYPE mem_type)
{
NV_STATUS status;
// RM invokes the eviction callbacks with its API lock held, but not its GPU
// lock.
uvm_record_lock_rm_api();
status = uvm_pmm_gpu_pma_evict_pages(void_pmm, page_size, pages, num_pages_to_evict, phys_start, phys_end);
status = uvm_pmm_gpu_pma_evict_pages(void_pmm, page_size, pages, num_pages_to_evict, phys_start, phys_end, mem_type);
uvm_record_unlock_rm_api();
return status;
}
@ -2821,19 +2832,24 @@ static NV_STATUS uvm_pmm_gpu_pma_evict_pages_wrapper_entry(void *void_pmm,
NvU64 *pages,
NvU32 num_pages_to_evict,
NvU64 phys_start,
NvU64 phys_end)
NvU64 phys_end,
UVM_PMA_GPU_MEMORY_TYPE mem_type)
{
UVM_ENTRY_RET(uvm_pmm_gpu_pma_evict_pages_wrapper(void_pmm,
page_size,
pages,
num_pages_to_evict,
phys_start,
phys_end));
phys_end,
mem_type));
}
// See the documentation of pmaEvictRangeCb_t in pma.h for details of the
// expected semantics.
static NV_STATUS uvm_pmm_gpu_pma_evict_range(void *void_pmm, NvU64 phys_begin, NvU64 phys_end)
static NV_STATUS uvm_pmm_gpu_pma_evict_range(void *void_pmm,
NvU64 phys_begin,
NvU64 phys_end,
UVM_PMA_GPU_MEMORY_TYPE mem_type)
{
NV_STATUS status;
uvm_pmm_gpu_t *pmm = (uvm_pmm_gpu_t *)void_pmm;
@ -2922,21 +2938,27 @@ static NV_STATUS uvm_pmm_gpu_pma_evict_range(void *void_pmm, NvU64 phys_begin, N
return NV_OK;
}
static NV_STATUS uvm_pmm_gpu_pma_evict_range_wrapper(void *void_pmm, NvU64 phys_begin, NvU64 phys_end)
static NV_STATUS uvm_pmm_gpu_pma_evict_range_wrapper(void *void_pmm,
NvU64 phys_begin,
NvU64 phys_end,
UVM_PMA_GPU_MEMORY_TYPE mem_type)
{
NV_STATUS status;
// RM invokes the eviction callbacks with its API lock held, but not its GPU
// lock.
uvm_record_lock_rm_api();
status = uvm_pmm_gpu_pma_evict_range(void_pmm, phys_begin, phys_end);
status = uvm_pmm_gpu_pma_evict_range(void_pmm, phys_begin, phys_end, mem_type);
uvm_record_unlock_rm_api();
return status;
}
static NV_STATUS uvm_pmm_gpu_pma_evict_range_wrapper_entry(void *void_pmm, NvU64 phys_begin, NvU64 phys_end)
static NV_STATUS uvm_pmm_gpu_pma_evict_range_wrapper_entry(void *void_pmm,
NvU64 phys_begin,
NvU64 phys_end,
UVM_PMA_GPU_MEMORY_TYPE mem_type)
{
UVM_ENTRY_RET(uvm_pmm_gpu_pma_evict_range_wrapper(void_pmm, phys_begin, phys_end));
UVM_ENTRY_RET(uvm_pmm_gpu_pma_evict_range_wrapper(void_pmm, phys_begin, phys_end, mem_type));
}
static void deinit_chunk_split_cache(uvm_pmm_gpu_t *pmm)
@ -3420,12 +3442,13 @@ NV_STATUS uvm_test_evict_chunk(UVM_TEST_EVICT_CHUNK_PARAMS *params, struct file
params->evicted_physical_address = 0;
params->chunk_size_backing_virtual = 0;
mm = uvm_va_space_mm_retain_lock(va_space);
mm = uvm_va_space_mm_or_current_retain_lock(va_space);
uvm_va_space_down_read(va_space);
gpu = uvm_va_space_get_gpu_by_uuid(va_space, &params->gpu_uuid);
if (!gpu || !uvm_gpu_supports_eviction(gpu)) {
uvm_va_space_up_read(va_space);
uvm_va_space_mm_or_current_release_unlock(va_space, mm);
return NV_ERR_INVALID_DEVICE;
}
pmm = &gpu->pmm;
@ -3436,13 +3459,24 @@ NV_STATUS uvm_test_evict_chunk(UVM_TEST_EVICT_CHUNK_PARAMS *params, struct file
// For virtual mode, look up and retain the block first so that eviction can
// be started without the VA space lock held.
if (params->eviction_mode == UvmTestEvictModeVirtual) {
status = uvm_va_block_find_create(va_space, mm, params->address, NULL, &block);
if (status != NV_OK) {
uvm_va_block_context_t *block_context;
block_context = uvm_va_block_context_alloc(mm);
if (!block_context) {
status = NV_ERR_NO_MEMORY;
uvm_va_space_up_read(va_space);
uvm_va_space_mm_release_unlock(va_space, mm);
goto out;
}
status = uvm_va_block_find_create(va_space, params->address, block_context, &block);
uvm_va_block_context_free(block_context);
if (status != NV_OK) {
uvm_va_space_up_read(va_space);
uvm_va_space_mm_or_current_release_unlock(va_space, mm);
goto out;
}
// Retain the block before unlocking the VA space lock so that we can
// safely access it later.
uvm_va_block_retain(block);
@ -3451,7 +3485,7 @@ NV_STATUS uvm_test_evict_chunk(UVM_TEST_EVICT_CHUNK_PARAMS *params, struct file
// Unlock the VA space to emulate real eviction better where a VA space lock
// may not be held or may be held for a different VA space.
uvm_va_space_up_read(va_space);
uvm_va_space_mm_release_unlock(va_space, mm);
uvm_va_space_mm_or_current_release_unlock(va_space, mm);
if (params->eviction_mode == UvmTestEvictModeVirtual) {
UVM_ASSERT(block);

View File

@ -428,10 +428,10 @@ uvm_chunk_sizes_mask_t uvm_cpu_chunk_get_allocation_sizes(void)
return uvm_cpu_chunk_allocation_sizes & UVM_CPU_CHUNK_SIZES;
}
static void uvm_cpu_chunk_set_phys_size(uvm_cpu_chunk_t *chunk, uvm_chunk_size_t size)
static void uvm_cpu_chunk_set_size(uvm_cpu_chunk_t *chunk, uvm_chunk_size_t size)
{
#if !UVM_CPU_CHUNK_SIZE_IS_PAGE_SIZE()
chunk->log2_phys_size = ilog2(size);
chunk->log2_size = ilog2(size);
#endif
}
@ -440,13 +440,7 @@ uvm_chunk_size_t uvm_cpu_chunk_get_size(uvm_cpu_chunk_t *chunk)
#if UVM_CPU_CHUNK_SIZE_IS_PAGE_SIZE()
return PAGE_SIZE;
#else
uvm_chunk_size_t chunk_size;
UVM_ASSERT(chunk);
UVM_ASSERT(uvm_cpu_chunk_get_phys_size(chunk));
chunk_size = uvm_va_block_region_size(chunk->region);
UVM_ASSERT(uvm_cpu_chunk_get_phys_size(chunk) >= chunk_size);
return chunk_size;
return ((uvm_chunk_size_t)1) << chunk->log2_size;
#endif
}
@ -1036,8 +1030,7 @@ void uvm_cpu_chunk_remove_from_block(uvm_va_block_t *va_block, uvm_cpu_chunk_t *
return;
};
uvm_page_mask_region_clear(&va_block->cpu.allocated,
uvm_va_block_region(page_index, page_index + uvm_cpu_chunk_num_pages(chunk)));
uvm_page_mask_region_clear(&va_block->cpu.allocated, chunk->region);
if (uvm_page_mask_empty(&va_block->cpu.allocated)) {
if (UVM_CPU_STORAGE_GET_TYPE(va_block) != UVM_CPU_CHUNK_STORAGE_CHUNK)
@ -1191,7 +1184,7 @@ NV_STATUS uvm_cpu_chunk_alloc(uvm_va_block_t *va_block,
}
chunk->page = page;
uvm_cpu_chunk_set_phys_size(chunk, alloc_size);
uvm_cpu_chunk_set_size(chunk, alloc_size);
chunk->region = region;
nv_kref_init(&chunk->refcount);
uvm_spin_lock_init(&chunk->lock, UVM_LOCK_ORDER_LEAF);
@ -1224,13 +1217,15 @@ error:
return status;
}
NV_STATUS uvm_cpu_chunk_split(uvm_va_block_t *va_block, uvm_cpu_chunk_t *chunk, uvm_chunk_size_t new_size)
NV_STATUS uvm_cpu_chunk_split(uvm_va_block_t *va_block,
uvm_cpu_chunk_t *chunk,
uvm_chunk_size_t new_size,
uvm_page_index_t page_index,
uvm_cpu_chunk_t **new_chunks)
{
NV_STATUS status = NV_OK;
NV_STATUS insert_status;
uvm_cpu_chunk_t *new_chunk;
uvm_page_index_t running_page_index = chunk->region.first;
uvm_page_index_t next_page_index;
uvm_page_index_t running_page_index = page_index;
size_t num_new_chunks;
size_t num_subchunk_pages;
size_t i;
@ -1238,21 +1233,13 @@ NV_STATUS uvm_cpu_chunk_split(uvm_va_block_t *va_block, uvm_cpu_chunk_t *chunk,
UVM_ASSERT(chunk);
UVM_ASSERT(is_power_of_2(new_size));
UVM_ASSERT(new_size < uvm_cpu_chunk_get_size(chunk));
UVM_ASSERT(new_chunks);
// We subtract 1 from the computed number of subchunks because we always
// keep the original chunk as the first in the block's list. This is so we
// don't lose the physical chunk.
// All new subchunks will point to the original chunk as their parent.
num_new_chunks = (uvm_cpu_chunk_get_size(chunk) / new_size) - 1;
num_new_chunks = uvm_cpu_chunk_get_size(chunk) / new_size;
num_subchunk_pages = new_size / PAGE_SIZE;
running_page_index += num_subchunk_pages;
// Remove the existing chunk from the block first. We re-insert it after
// the split.
uvm_cpu_chunk_remove_from_block(va_block, chunk, chunk->region.first);
for (i = 0; i < num_new_chunks; i++) {
uvm_page_index_t relative_page_index = running_page_index - chunk->region.first;
uvm_page_index_t relative_page_index = running_page_index - page_index;
uvm_gpu_id_t id;
new_chunk = uvm_kvmalloc_zero(sizeof(*new_chunk));
@ -1264,10 +1251,10 @@ NV_STATUS uvm_cpu_chunk_split(uvm_va_block_t *va_block, uvm_cpu_chunk_t *chunk,
new_chunk->page = chunk->page + relative_page_index;
new_chunk->offset = chunk->offset + relative_page_index;
new_chunk->region = uvm_va_block_region(running_page_index, running_page_index + num_subchunk_pages);
uvm_cpu_chunk_set_phys_size(new_chunk, new_size);
uvm_cpu_chunk_set_size(new_chunk, new_size);
nv_kref_init(&new_chunk->refcount);
// This lock is unused for logical blocks but initialize it for
// This lock is unused for logical chunks but initialize it for
// consistency.
uvm_spin_lock_init(&new_chunk->lock, UVM_LOCK_ORDER_LEAF);
new_chunk->parent = chunk;
@ -1286,109 +1273,64 @@ NV_STATUS uvm_cpu_chunk_split(uvm_va_block_t *va_block, uvm_cpu_chunk_t *chunk,
parent_dma_addr + (relative_page_index * PAGE_SIZE));
}
status = uvm_cpu_chunk_insert_in_block(va_block, new_chunk, new_chunk->region.first);
if (status != NV_OK) {
uvm_cpu_chunk_put(new_chunk);
goto error;
}
new_chunks[i] = new_chunk;
running_page_index += num_subchunk_pages;
}
chunk->region = uvm_va_block_region(chunk->region.first, chunk->region.first + num_subchunk_pages);
// Drop the original reference count on the parent (from its creation). This
// is done so the parent's reference count goes to 0 when all the children
// are released.
uvm_cpu_chunk_put(chunk);
error:
// Re-insert the split chunk. This is done unconditionally in both the
// success and error paths. The difference is that on the success path,
// the chunk's region has been updated.
// This operation should never fail with NV_ERR_NO_MEMORY since all
// state memory should already be allocated. Failing with other errors
// is a programmer error.
insert_status = uvm_cpu_chunk_insert_in_block(va_block, chunk, chunk->region.first);
UVM_ASSERT(insert_status != NV_ERR_INVALID_ARGUMENT && insert_status != NV_ERR_INVALID_STATE);
if (status != NV_OK) {
for_each_cpu_chunk_in_block_region_safe(new_chunk,
running_page_index,
next_page_index,
va_block,
chunk->region) {
uvm_cpu_chunk_remove_from_block(va_block, new_chunk, new_chunk->region.first);
while (i--)
uvm_cpu_chunk_put(new_chunk);
}
}
return status;
}
uvm_cpu_chunk_t *uvm_cpu_chunk_merge(uvm_va_block_t *va_block, uvm_cpu_chunk_t *chunk)
NV_STATUS uvm_cpu_chunk_merge(uvm_va_block_t *va_block,
uvm_cpu_chunk_t **chunks,
size_t num_merge_chunks,
uvm_chunk_size_t merge_size,
uvm_cpu_chunk_t **merged_chunk)
{
uvm_cpu_chunk_t *parent;
uvm_cpu_chunk_t *subchunk;
uvm_chunk_sizes_mask_t merge_sizes = uvm_cpu_chunk_get_allocation_sizes();
uvm_chunk_size_t merge_chunk_size;
uvm_chunk_size_t parent_phys_size;
uvm_chunk_size_t chunk_size;
uvm_va_block_region_t subchunk_region;
uvm_page_index_t page_index;
uvm_page_index_t next_page_index;
NV_STATUS insert_status;
size_t i;
UVM_ASSERT(chunk);
parent = chunk->parent;
UVM_ASSERT(chunks);
UVM_ASSERT(num_merge_chunks > 0);
UVM_ASSERT(merged_chunk);
// If the chunk does not have a parent, a merge cannot be done.
parent = chunks[0]->parent;
if (!parent)
return NULL;
return NV_WARN_NOTHING_TO_DO;
chunk_size = uvm_cpu_chunk_get_size(chunk);
parent_phys_size = uvm_cpu_chunk_get_phys_size(parent);
chunk_size = uvm_cpu_chunk_get_size(chunks[0]);
// Remove all sizes above the parent's physical size.
merge_sizes &= parent_phys_size | (parent_phys_size - 1);
UVM_ASSERT(uvm_cpu_chunk_get_size(parent) == merge_size);
UVM_ASSERT(merge_size > chunk_size);
// Remove all sizes including and below the chunk's current size.
merge_sizes &= ~(chunk_size | (chunk_size - 1));
for (i = 1; i < num_merge_chunks; i++) {
if (chunks[i]->parent != parent || uvm_cpu_chunk_get_size(chunks[i]) != chunk_size)
return NV_ERR_INVALID_ARGUMENT;
// Find the largest size that is fully contained within the VA block.
for_each_chunk_size_rev(merge_chunk_size, merge_sizes) {
NvU64 parent_start = uvm_cpu_chunk_get_virt_addr(va_block, parent);
NvU64 parent_end = parent_start + parent_phys_size - 1;
if (uvm_va_block_contains_address(va_block, parent_start) &&
uvm_va_block_contains_address(va_block, parent_start + merge_chunk_size - 1) &&
IS_ALIGNED(parent_start, merge_chunk_size) &&
IS_ALIGNED(parent_end + 1, merge_chunk_size))
break;
UVM_ASSERT(nv_kref_read(&chunks[i]->refcount) == 1);
}
if (merge_chunk_size == UVM_CHUNK_SIZE_INVALID)
return NULL;
// Take a reference on the parent chunk so it doesn't get released when all
// of the children are released below.
uvm_cpu_chunk_get(parent);
if (uvm_cpu_chunk_get_size(parent) == merge_chunk_size)
return NULL;
for (i = 0; i < num_merge_chunks; i++)
uvm_cpu_chunk_put(chunks[i]);
UVM_ASSERT(chunk_size == uvm_cpu_chunk_get_size(parent));
UVM_ASSERT(IS_ALIGNED(merge_chunk_size, chunk_size));
*merged_chunk = parent;
subchunk_region = uvm_va_block_region(parent->region.first + uvm_cpu_chunk_num_pages(parent),
parent->region.first + (merge_chunk_size / PAGE_SIZE));
// Remove the first (parent) subchunk. It will be re-inserted later with an
// updated region.
uvm_cpu_chunk_remove_from_block(va_block, parent, parent->region.first);
for_each_cpu_chunk_in_block_region_safe(subchunk, page_index, next_page_index, va_block, subchunk_region) {
UVM_ASSERT(subchunk);
uvm_cpu_chunk_remove_from_block(va_block, subchunk, subchunk->region.first);
uvm_cpu_chunk_put(subchunk);
}
parent->region = uvm_va_block_region(parent->region.first, parent->region.first + (merge_chunk_size / PAGE_SIZE));
insert_status = uvm_cpu_chunk_insert_in_block(va_block, parent, parent->region.first);
UVM_ASSERT(insert_status != NV_ERR_INVALID_ARGUMENT && insert_status != NV_ERR_INVALID_STATE);
return parent;
return NV_OK;
}
static uvm_cpu_chunk_t *get_parent_cpu_chunk(uvm_cpu_chunk_t *chunk)
@ -1414,7 +1356,7 @@ static void check_cpu_dirty_flag(uvm_cpu_chunk_t *chunk, uvm_page_index_t page_i
// compound pages.
page = chunk->page + page_index;
if (PageDirty(page)) {
bitmap_fill(chunk->dirty_bitmap, uvm_cpu_chunk_get_phys_size(chunk) / PAGE_SIZE);
bitmap_fill(chunk->dirty_bitmap, uvm_cpu_chunk_get_size(chunk) / PAGE_SIZE);
ClearPageDirty(page);
}
}
@ -1432,7 +1374,7 @@ static uvm_cpu_chunk_t *get_parent_and_page_index(uvm_cpu_chunk_t *chunk, uvm_pa
page_index = chunk->offset + (page_index - chunk->region.first);
parent = get_parent_cpu_chunk(chunk);
UVM_ASSERT(page_index < uvm_cpu_chunk_get_phys_size(parent) / PAGE_SIZE);
UVM_ASSERT(page_index < uvm_cpu_chunk_get_size(parent) / PAGE_SIZE);
*out_page_index = page_index;
return parent;
}
@ -1442,7 +1384,7 @@ void uvm_cpu_chunk_mark_dirty(uvm_cpu_chunk_t *chunk, uvm_page_index_t page_inde
uvm_cpu_chunk_t *parent;
parent = get_parent_and_page_index(chunk, &page_index);
if (uvm_cpu_chunk_get_phys_size(parent) == PAGE_SIZE) {
if (uvm_cpu_chunk_get_size(parent) == PAGE_SIZE) {
SetPageDirty(parent->page);
return;
}
@ -1457,7 +1399,7 @@ void uvm_cpu_chunk_mark_clean(uvm_cpu_chunk_t *chunk, uvm_page_index_t page_inde
uvm_cpu_chunk_t *parent;
parent = get_parent_and_page_index(chunk, &page_index);
if (uvm_cpu_chunk_get_phys_size(parent) == PAGE_SIZE) {
if (uvm_cpu_chunk_get_size(parent) == PAGE_SIZE) {
ClearPageDirty(parent->page);
return;
}
@ -1474,7 +1416,7 @@ bool uvm_cpu_chunk_is_dirty(uvm_cpu_chunk_t *chunk, uvm_page_index_t page_index)
bool dirty;
parent = get_parent_and_page_index(chunk, &page_index);
if (uvm_cpu_chunk_get_phys_size(parent) == PAGE_SIZE)
if (uvm_cpu_chunk_get_size(parent) == PAGE_SIZE)
return PageDirty(parent->page);
uvm_spin_lock(&parent->lock);

View File

@ -181,6 +181,9 @@ size_t uvm_pmm_sysmem_mappings_dma_to_virt(uvm_pmm_sysmem_mappings_t *sysmem_map
#if UVM_CPU_CHUNK_SIZES == PAGE_SIZE
#define UVM_CPU_CHUNK_SIZE_IS_PAGE_SIZE() 1
typedef struct page uvm_cpu_chunk_t;
#define UVM_CPU_CHUNK_PAGE_INDEX(chunk, page_index) (page_index)
#else
#define UVM_CPU_CHUNK_SIZE_IS_PAGE_SIZE() 0
typedef struct uvm_cpu_chunk_struct uvm_cpu_chunk_t;
@ -224,13 +227,10 @@ struct uvm_cpu_chunk_struct
// parent.
nv_kref_t refcount;
// Size of the chunk at the time of its creation.
// For chunks, which are the result of a split, this
// value will be the size of the chunk prior to the
// split.
// For chunks resulting from page allocations (physical),
// Size of the chunk.
// For chunks resulting from page allocations (physical chunks),
// this value is the size of the physical allocation.
size_t log2_phys_size : order_base_2(UVM_CHUNK_SIZE_MASK_SIZE);
size_t log2_size : order_base_2(UVM_CHUNK_SIZE_MASK_SIZE);
struct {
// Per-GPU array of DMA mapping addresses for the chunk.
@ -252,6 +252,8 @@ struct uvm_cpu_chunk_struct
// for logical chunks this will be NULL;
unsigned long *dirty_bitmap;
};
#define UVM_CPU_CHUNK_PAGE_INDEX(chunk, page_index) (chunk->region.first)
#endif // UVM_CPU_CHUNK_SIZES == PAGE_SIZE
// Return the set of allowed CPU chunk allocation sizes.
@ -302,22 +304,6 @@ void uvm_cpu_chunk_remove_from_block(uvm_va_block_t *va_block, uvm_cpu_chunk_t *
// NULL is returned.
uvm_cpu_chunk_t *uvm_cpu_chunk_get_chunk_for_page(uvm_va_block_t *block, uvm_page_index_t page_index);
// Return the physical size of the CPU chunk.
// The physical size of the CPU chunk is the size of the physical CPU
// memory backing the CPU chunk. It is set at CPU chunk allocation time
static uvm_chunk_size_t uvm_cpu_chunk_get_phys_size(uvm_cpu_chunk_t *chunk)
{
#if UVM_CPU_CHUNK_SIZE_IS_PAGE_SIZE()
return (uvm_chunk_size_t)PAGE_SIZE;
#else
return ((uvm_chunk_size_t)1) << chunk->log2_phys_size;
#endif
}
// Return the size of the CPU chunk. While the physical size of the CPU
// chunk reflects the size of the physical memory backing the chunk, this
// size is the effective size of the chunk and changes as result of CPU
// chunk splits.
uvm_chunk_size_t uvm_cpu_chunk_get_size(uvm_cpu_chunk_t *chunk);
// Return the number of base system pages covered by the CPU chunk.
@ -370,35 +356,27 @@ NvU64 uvm_cpu_chunk_get_gpu_mapping_addr(uvm_va_block_t *block,
// new_size has to be one of the supported CPU chunk allocation sizes and has to
// be smaller than the current size of chunk.
//
// On success, NV_OK is returned. All new chunks will have chunk as parent and
// chunk's size will have been updated to new_size.
//
// Note that due to the way CPU chunks are managed and split, the number of
// newly created chunks will be (size_of(chunk) / new_size) - 1.
//
// On failure NV_ERR_NO_MEMORY will be returned. chunk's size will not be
// modified.
NV_STATUS uvm_cpu_chunk_split(uvm_va_block_t *va_block, uvm_cpu_chunk_t *chunk, uvm_chunk_size_t new_size);
// On success, NV_OK is returned. On failure NV_ERR_NO_MEMORY will be returned.
NV_STATUS uvm_cpu_chunk_split(uvm_va_block_t *va_block,
uvm_cpu_chunk_t *chunk,
uvm_chunk_size_t new_size,
uvm_page_index_t page_index,
uvm_cpu_chunk_t **new_chunks);
// Merge chunk's parent to the highest possible CPU chunk size fully contained
// within the parent's owning VA block.
// Merge chunks to merge_size.
//
// The size to which chunks are merged is determined by finding the largest
// size from the set of allowed CPU chunk sizes that satisfies both criteria
// below:
// * The VA range of the parent chunk resulting from the merge has to be
// fully contained within the VA block.
// * The start and end VA addresses of the parent based on its physical
// size have to be aligned to the merge size.
// All input chunks must have the same parent and size. If not,
// NV_ERR_INVALID_ARGUMENT is returned.
//
// It is possible that a merge cannot be done if chunk does not have a parent
// (it is a physical chunk), chunk's owning VA block is not the same as
// its parent's owning VA block, or there is no chunk size that satisfied both
// the above criteria.
// If a merge cannot be done, NV_WARN_NOTHING_TO_DO is returned.
//
// Return a pointer to the merged chunk. If a merge could not be done, return
// NULL.
uvm_cpu_chunk_t *uvm_cpu_chunk_merge(uvm_va_block_t *va_block, uvm_cpu_chunk_t *chunk);
// On success, NV_OK is returned and merged_chunk is set to point to the
// merged chunk.
NV_STATUS uvm_cpu_chunk_merge(uvm_va_block_t *va_block,
uvm_cpu_chunk_t **chunks,
size_t num_merge_chunks,
uvm_chunk_size_t merge_size,
uvm_cpu_chunk_t **merged_chunk);
// Mark the CPU sub-page page_index in the CPU chunk as dirty.
// page_index has to be a page withing the chunk's region.
@ -414,14 +392,22 @@ bool uvm_cpu_chunk_is_dirty(uvm_cpu_chunk_t *chunk, uvm_page_index_t page_index)
#else // UVM_CPU_CHUNK_SIZE_IS_PAGE_SIZE()
static NV_STATUS uvm_cpu_chunk_split(uvm_va_block_t *va_block, uvm_cpu_chunk_t *chunk, uvm_chunk_size_t new_size)
static NV_STATUS uvm_cpu_chunk_split(uvm_va_block_t *va_block,
uvm_cpu_chunk_t *chunk,
uvm_chunk_size_t new_size,
uvm_page_index_t page_index,
uvm_cpu_chunk_t **new_chunks)
{
return NV_OK;
}
static uvm_cpu_chunk_t *uvm_cpu_chunk_merge(uvm_va_block_t *va_block, uvm_cpu_chunk_t *chunk)
static NV_STATUS uvm_cpu_chunk_merge(uvm_va_block_t *va_block,
uvm_cpu_chunk_t **chunk,
size_t num_merge_chunks,
uvm_chunk_size_t merge_size,
uvm_cpu_chunk_t **merged_chunk)
{
return NULL;
return NV_WARN_NOTHING_TO_DO;
}
static void uvm_cpu_chunk_mark_dirty(uvm_cpu_chunk_t *chunk, uvm_page_index_t page_index)

View File

@ -101,7 +101,7 @@ static NV_STATUS split_as_needed(uvm_va_space_t *va_space,
UVM_ASSERT(PAGE_ALIGNED(addr));
// Look for UVM managed allocations first, then look for HMM policies.
// Look for managed allocations first, then look for HMM policies.
va_range = uvm_va_range_find(va_space, addr);
if (!va_range)
return uvm_hmm_split_as_needed(va_space, addr, split_needed_cb, data);
@ -203,6 +203,10 @@ NV_STATUS uvm_va_block_set_preferred_location_locked(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context)
{
uvm_assert_mutex_locked(&va_block->lock);
// TODO: Bug 1750144: remove this restriction when HMM handles setting
// the preferred location semantics instead of just recording the policy.
UVM_ASSERT(!uvm_va_block_is_hmm(va_block));
UVM_ASSERT(va_block_context->policy == uvm_va_range_get_policy(va_block->va_range));
uvm_va_block_mark_cpu_dirty(va_block);
@ -432,10 +436,9 @@ NV_STATUS uvm_va_block_set_accessed_by(uvm_va_block_t *va_block,
uvm_tracker_t local_tracker = UVM_TRACKER_INIT();
UVM_ASSERT(!uvm_va_block_is_hmm(va_block));
UVM_ASSERT(va_block_context->policy == uvm_va_range_get_policy(va_block->va_range));
va_block_context->policy = uvm_va_range_get_policy(va_block->va_range);
// Read duplication takes precedence over SetAccesedBy. Do not add mappings
// Read duplication takes precedence over SetAccessedBy. Do not add mappings
// if read duplication is enabled.
if (uvm_va_policy_is_read_duplicate(va_block_context->policy, va_space))
return NV_OK;
@ -617,6 +620,10 @@ NV_STATUS uvm_va_block_set_read_duplication(uvm_va_block_t *va_block,
NV_STATUS status;
uvm_va_block_retry_t va_block_retry;
// TODO: Bug 3660922: need to implement HMM read duplication support.
UVM_ASSERT(!uvm_va_block_is_hmm(va_block));
UVM_ASSERT(va_block_context->policy == uvm_va_range_get_policy(va_block->va_range));
status = UVM_VA_BLOCK_LOCK_RETRY(va_block, &va_block_retry,
va_block_set_read_duplication_locked(va_block,
&va_block_retry,
@ -714,6 +721,9 @@ NV_STATUS uvm_va_block_unset_read_duplication(uvm_va_block_t *va_block,
NV_STATUS status = NV_OK;
uvm_tracker_t local_tracker = UVM_TRACKER_INIT();
UVM_ASSERT(!uvm_va_block_is_hmm(va_block));
UVM_ASSERT(va_block_context->policy == uvm_va_range_get_policy(va_block->va_range));
// Restore all SetAccessedBy mappings
status = UVM_VA_BLOCK_LOCK_RETRY(va_block, &va_block_retry,
va_block_unset_read_duplication_locked(va_block,

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2018-2021 NVIDIA Corporation
Copyright (c) 2018-2022 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -54,7 +54,7 @@ NV_STATUS uvm_populate_pageable_vma(struct vm_area_struct *vma,
{
unsigned long vma_num_pages;
unsigned long outer = start + length;
const bool is_writable = is_write_populate(vma, populate_permissions);
unsigned int gup_flags = is_write_populate(vma, populate_permissions) ? FOLL_WRITE : 0;
struct mm_struct *mm = vma->vm_mm;
unsigned long vm_flags = vma->vm_flags;
bool uvm_managed_vma;
@ -97,7 +97,10 @@ NV_STATUS uvm_populate_pageable_vma(struct vm_area_struct *vma,
if (uvm_managed_vma)
uvm_record_unlock_mmap_lock_read(mm);
ret = NV_GET_USER_PAGES_REMOTE(NULL, mm, start, vma_num_pages, is_writable, 0, pages, NULL);
if (touch)
ret = NV_PIN_USER_PAGES_REMOTE(mm, start, vma_num_pages, gup_flags, pages, NULL, NULL);
else
ret = NV_GET_USER_PAGES_REMOTE(mm, start, vma_num_pages, gup_flags, pages, NULL, NULL);
if (uvm_managed_vma)
uvm_record_lock_mmap_lock_read(mm);
@ -114,7 +117,7 @@ NV_STATUS uvm_populate_pageable_vma(struct vm_area_struct *vma,
for (i = 0; i < ret; i++) {
UVM_ASSERT(pages[i]);
put_page(pages[i]);
NV_UNPIN_USER_PAGE(pages[i]);
}
}
@ -127,7 +130,7 @@ NV_STATUS uvm_populate_pageable_vma(struct vm_area_struct *vma,
for (i = 0; i < vma_num_pages; i++) {
uvm_touch_page(pages[i]);
put_page(pages[i]);
NV_UNPIN_USER_PAGE(pages[i]);
}
}

View File

@ -68,31 +68,7 @@ NV_STATUS uvm_procfs_init()
void uvm_procfs_exit()
{
uvm_procfs_destroy_entry(uvm_proc_dir);
}
// TODO: Bug 1767237: Copied from nv-procfs.c. Refactor it out to
// nv-procfs-common.c.
static void procfs_destroy_entry_with_root(struct proc_dir_entry *entry, struct proc_dir_entry *delimiter)
{
#if defined(NV_PROC_REMOVE_PRESENT)
proc_remove(entry);
#else
while (entry) {
struct proc_dir_entry *next = entry->next;
if (entry->subdir)
procfs_destroy_entry_with_root(entry->subdir, delimiter);
remove_proc_entry(entry->name, entry->parent);
if (entry == delimiter)
break;
entry = next;
}
#endif
}
void uvm_procfs_destroy_entry(struct proc_dir_entry *entry)
{
procfs_destroy_entry_with_root(entry, entry);
proc_remove(uvm_proc_dir);
}
struct proc_dir_entry *uvm_procfs_get_gpu_base_dir()

View File

@ -53,8 +53,6 @@ static bool uvm_procfs_is_debug_enabled(void)
struct proc_dir_entry *uvm_procfs_get_gpu_base_dir(void);
struct proc_dir_entry *uvm_procfs_get_cpu_base_dir(void);
void uvm_procfs_destroy_entry(struct proc_dir_entry *entry);
int uvm_procfs_open_callback(void);
void uvm_procfs_close_callback(void);

View File

@ -121,7 +121,7 @@ NV_STATUS uvm_pushbuffer_create(uvm_channel_manager_t *channel_manager, uvm_push
goto error;
// Verify the GPU can access the pushbuffer.
UVM_ASSERT(uvm_pushbuffer_get_gpu_va_base(pushbuffer) + UVM_PUSHBUFFER_SIZE < gpu->parent->max_host_va);
UVM_ASSERT((uvm_pushbuffer_get_gpu_va_base(pushbuffer) + UVM_PUSHBUFFER_SIZE - 1) < gpu->parent->max_host_va);
bitmap_fill(pushbuffer->idle_chunks, UVM_PUSHBUFFER_CHUNKS);
bitmap_fill(pushbuffer->available_chunks, UVM_PUSHBUFFER_CHUNKS);
@ -372,7 +372,7 @@ void uvm_pushbuffer_destroy(uvm_pushbuffer_t *pushbuffer)
if (pushbuffer == NULL)
return;
uvm_procfs_destroy_entry(pushbuffer->procfs.info_file);
proc_remove(pushbuffer->procfs.info_file);
uvm_rm_mem_free(pushbuffer->memory);
uvm_kvfree(pushbuffer);
@ -448,7 +448,7 @@ void uvm_pushbuffer_end_push(uvm_pushbuffer_t *pushbuffer, uvm_push_t *push, uvm
{
uvm_pushbuffer_chunk_t *chunk = gpfifo_to_chunk(pushbuffer, gpfifo);
uvm_assert_spinlock_locked(&push->channel->pool->lock);
uvm_channel_pool_assert_locked(push->channel->pool);
uvm_spin_lock(&pushbuffer->lock);

View File

@ -166,30 +166,6 @@ void uvm_range_tree_shrink_node(uvm_range_tree_t *tree, uvm_range_tree_node_t *n
node->end = new_end;
}
void uvm_range_tree_adjust_interval(uvm_range_tree_t *tree,
NvU64 addr,
NvU64 *startp,
NvU64 *endp)
{
uvm_range_tree_node_t *node;
NvU64 start = *startp;
NvU64 end = *endp;
uvm_range_tree_for_each_in(node, tree, start, end) {
if (node->start > addr) {
end = node->start - 1;
break;
}
else if (node->end < addr)
start = node->end + 1;
else
UVM_ASSERT_MSG(0, "Found node at address 0x%llx\n", addr);
}
*startp = start;
*endp = end;
}
void uvm_range_tree_split(uvm_range_tree_t *tree,
uvm_range_tree_node_t *existing,
uvm_range_tree_node_t *new)
@ -261,3 +237,55 @@ uvm_range_tree_node_t *uvm_range_tree_iter_first(uvm_range_tree_t *tree, NvU64 s
return NULL;
}
NV_STATUS uvm_range_tree_find_hole(uvm_range_tree_t *tree, NvU64 addr, NvU64 *start, NvU64 *end)
{
uvm_range_tree_node_t *node;
// Find the first node on or after addr, if any
node = uvm_range_tree_iter_first(tree, addr, ULLONG_MAX);
if (node) {
if (node->start <= addr)
return NV_ERR_UVM_ADDRESS_IN_USE;
// node->start can't be 0, otherwise it would contain addr
if (end)
*end = node->start - 1;
node = uvm_range_tree_prev(tree, node);
}
else {
// All nodes in the tree must come before addr, if any exist
node = uvm_range_tree_last(tree);
if (end)
*end = ULLONG_MAX;
}
if (start) {
if (node)
*start = node->end + 1;
else
*start = 0;
}
return NV_OK;
}
NV_STATUS uvm_range_tree_find_hole_in(uvm_range_tree_t *tree, NvU64 addr, NvU64 *start, NvU64 *end)
{
NvU64 temp_start, temp_end;
NV_STATUS status;
UVM_ASSERT(start);
UVM_ASSERT(end);
UVM_ASSERT(*start <= addr);
UVM_ASSERT(*end >= addr);
status = uvm_range_tree_find_hole(tree, addr, &temp_start, &temp_end);
if (status == NV_OK) {
*start = max(temp_start, *start);
*end = min(temp_end, *end);
}
return status;
}

View File

@ -73,11 +73,6 @@ static void uvm_range_tree_remove(uvm_range_tree_t *tree, uvm_range_tree_node_t
// lesser or equal to node->end.
void uvm_range_tree_shrink_node(uvm_range_tree_t *tree, uvm_range_tree_node_t *node, NvU64 new_start, NvU64 new_end);
// Adjust start and end to be the largest contiguous interval surrounding addr
// between *startp and *endp and without overlapping an existing tree node.
// This function assumes there is no node that includes addr.
void uvm_range_tree_adjust_interval(uvm_range_tree_t *tree, NvU64 addr, NvU64 *startp, NvU64 *endp);
// Splits an existing node into two pieces, with the new node always after the
// existing node. The caller must set new->start before calling this function.
// existing should not be modified by the caller. On return, existing will
@ -100,6 +95,16 @@ uvm_range_tree_node_t *uvm_range_tree_merge_next(uvm_range_tree_t *tree, uvm_ran
// Returns the node containing addr, if any
uvm_range_tree_node_t *uvm_range_tree_find(uvm_range_tree_t *tree, NvU64 addr);
// Find the largest hole containing addr but not containing any nodes. If addr
// is contained by a node, NV_ERR_UVM_ADDRESS_IN_USE is returned.
//
// start and end may be NULL.
NV_STATUS uvm_range_tree_find_hole(uvm_range_tree_t *tree, NvU64 addr, NvU64 *start, NvU64 *end);
// Like uvm_range_tree_find_hole, but start and end are in/out parameters that
// clamp the range.
NV_STATUS uvm_range_tree_find_hole_in(uvm_range_tree_t *tree, NvU64 addr, NvU64 *start, NvU64 *end);
// Returns the prev/next node in address order, or NULL if none exists
static uvm_range_tree_node_t *uvm_range_tree_prev(uvm_range_tree_t *tree, uvm_range_tree_node_t *node)
{
@ -118,17 +123,6 @@ static uvm_range_tree_node_t *uvm_range_tree_next(uvm_range_tree_t *tree, uvm_ra
// Returns the first node in the range [start, end], if any
uvm_range_tree_node_t *uvm_range_tree_iter_first(uvm_range_tree_t *tree, NvU64 start, NvU64 end);
// Return true if the range tree is empty.
static bool uvm_range_tree_empty(uvm_range_tree_t *tree)
{
return list_empty(&tree->head);
}
static NvU64 uvm_range_tree_node_size(uvm_range_tree_node_t *node)
{
return node->end - node->start + 1;
}
// Returns the node following the provided node in address order, if that node's
// start <= the provided end.
static uvm_range_tree_node_t *uvm_range_tree_iter_next(uvm_range_tree_t *tree, uvm_range_tree_node_t *node, NvU64 end)
@ -139,6 +133,25 @@ static uvm_range_tree_node_t *uvm_range_tree_iter_next(uvm_range_tree_t *tree, u
return NULL;
}
// Return true if the range tree is empty.
static bool uvm_range_tree_empty(uvm_range_tree_t *tree)
{
return list_empty(&tree->head);
}
// Return the last node in the tree, or NULL if none exists
static uvm_range_tree_node_t *uvm_range_tree_last(uvm_range_tree_t *tree)
{
if (list_empty(&tree->head))
return NULL;
return list_last_entry(&tree->head, uvm_range_tree_node_t, list);
}
static NvU64 uvm_range_tree_node_size(uvm_range_tree_node_t *node)
{
return node->end - node->start + 1;
}
#define uvm_range_tree_for_each(node, tree) list_for_each_entry((node), &(tree)->head, list)
#define uvm_range_tree_for_each_safe(node, next, tree) \

View File

@ -303,10 +303,93 @@ error:
return status;
}
static NV_STATUS rtt_check_between(rtt_state_t *state, uvm_range_tree_node_t *lower, uvm_range_tree_node_t *upper)
{
bool hole_exists = true;
NvU64 hole_start = 0, hole_end = ULLONG_MAX;
NvU64 test_start, test_end;
if (lower) {
if (lower->end == ULLONG_MAX) {
UVM_ASSERT(!upper);
hole_exists = false;
}
else {
hole_start = lower->end + 1;
}
}
if (upper) {
if (upper->start == 0) {
UVM_ASSERT(!lower);
hole_exists = false;
}
else {
hole_end = upper->start - 1;
}
}
if (hole_start > hole_end)
hole_exists = false;
if (hole_exists) {
size_t i;
NvU64 hole_mid = hole_start + ((hole_end - hole_start) / 2);
NvU64 inputs[] = {hole_start, hole_mid, hole_end};
for (i = 0; i < ARRAY_SIZE(inputs); i++) {
TEST_CHECK_RET(uvm_range_tree_find(&state->tree, inputs[i]) == NULL);
TEST_NV_CHECK_RET(uvm_range_tree_find_hole(&state->tree, inputs[i], &test_start, &test_end));
TEST_CHECK_RET(test_start == hole_start);
TEST_CHECK_RET(test_end == hole_end);
test_start = 0;
test_end = ULLONG_MAX;
TEST_NV_CHECK_RET(uvm_range_tree_find_hole_in(&state->tree, inputs[i], &test_start, &test_end));
TEST_CHECK_RET(test_start == hole_start);
TEST_CHECK_RET(test_end == hole_end);
test_start = hole_start;
test_end = inputs[i];
TEST_NV_CHECK_RET(uvm_range_tree_find_hole_in(&state->tree, inputs[i], &test_start, &test_end));
TEST_CHECK_RET(test_start == hole_start);
TEST_CHECK_RET(test_end == inputs[i]);
test_start = inputs[i];
test_end = hole_end;
TEST_NV_CHECK_RET(uvm_range_tree_find_hole_in(&state->tree, inputs[i], &test_start, &test_end));
TEST_CHECK_RET(test_start == inputs[i]);
TEST_CHECK_RET(test_end == hole_end);
}
}
else {
test_start = 0;
test_end = ULLONG_MAX;
if (lower) {
MEM_NV_CHECK_RET(uvm_range_tree_find_hole(&state->tree, lower->end, NULL, NULL),
NV_ERR_UVM_ADDRESS_IN_USE);
MEM_NV_CHECK_RET(uvm_range_tree_find_hole_in(&state->tree, lower->end, &test_start, &test_end),
NV_ERR_UVM_ADDRESS_IN_USE);
}
if (upper) {
MEM_NV_CHECK_RET(uvm_range_tree_find_hole(&state->tree, upper->start, NULL, NULL),
NV_ERR_UVM_ADDRESS_IN_USE);
MEM_NV_CHECK_RET(uvm_range_tree_find_hole_in(&state->tree, upper->start, &test_start, &test_end),
NV_ERR_UVM_ADDRESS_IN_USE);
}
}
return NV_OK;
}
static NV_STATUS rtt_check_node(rtt_state_t *state, uvm_range_tree_node_t *node)
{
uvm_range_tree_node_t *temp, *prev, *next;
NvU64 start, mid, end;
NvU64 hole_start = 0, hole_end = ULLONG_MAX;
start = node->start;
end = node->end;
@ -320,6 +403,18 @@ static NV_STATUS rtt_check_node(rtt_state_t *state, uvm_range_tree_node_t *node)
TEST_CHECK_RET(uvm_range_tree_find(&state->tree, start) == node);
TEST_CHECK_RET(uvm_range_tree_find(&state->tree, mid) == node);
TEST_CHECK_RET(uvm_range_tree_find(&state->tree, end) == node);
MEM_NV_CHECK_RET(uvm_range_tree_find_hole(&state->tree, start, NULL, NULL), NV_ERR_UVM_ADDRESS_IN_USE);
MEM_NV_CHECK_RET(uvm_range_tree_find_hole(&state->tree, mid, NULL, NULL), NV_ERR_UVM_ADDRESS_IN_USE);
MEM_NV_CHECK_RET(uvm_range_tree_find_hole(&state->tree, end, NULL, NULL), NV_ERR_UVM_ADDRESS_IN_USE);
MEM_NV_CHECK_RET(uvm_range_tree_find_hole_in(&state->tree, start, &hole_start, &hole_end),
NV_ERR_UVM_ADDRESS_IN_USE);
MEM_NV_CHECK_RET(uvm_range_tree_find_hole_in(&state->tree, mid, &hole_start, &hole_end),
NV_ERR_UVM_ADDRESS_IN_USE);
MEM_NV_CHECK_RET(uvm_range_tree_find_hole_in(&state->tree, end, &hole_start, &hole_end),
NV_ERR_UVM_ADDRESS_IN_USE);
TEST_CHECK_RET(uvm_range_tree_node_size(node) == end - start + 1);
if (end < ULLONG_MAX)
@ -327,6 +422,8 @@ static NV_STATUS rtt_check_node(rtt_state_t *state, uvm_range_tree_node_t *node)
uvm_range_tree_for_each_in(temp, &state->tree, start, end)
TEST_CHECK_RET(temp == node);
uvm_range_tree_for_each_in_safe(temp, next, &state->tree, start, end)
TEST_CHECK_RET(temp == node);
prev = uvm_range_tree_prev(&state->tree, node);
if (prev) {
@ -341,11 +438,16 @@ static NV_STATUS rtt_check_node(rtt_state_t *state, uvm_range_tree_node_t *node)
if (next) {
TEST_CHECK_RET(node->end < next->start);
TEST_CHECK_RET(uvm_range_tree_prev(&state->tree, next) == node);
TEST_CHECK_RET(uvm_range_tree_last(&state->tree) != node);
}
else {
TEST_CHECK_RET(uvm_range_tree_iter_next(&state->tree, node, ULLONG_MAX) == NULL);
TEST_CHECK_RET(uvm_range_tree_last(&state->tree) == node);
}
TEST_NV_CHECK_RET(rtt_check_between(state, prev, node));
TEST_NV_CHECK_RET(rtt_check_between(state, node, next));
return NV_OK;
}
@ -362,13 +464,17 @@ static NV_STATUS rtt_check_iterator_all(rtt_state_t *state)
TEST_CHECK_RET(prev->end < node->start);
TEST_CHECK_RET(uvm_range_tree_prev(&state->tree, node) == prev);
TEST_NV_CHECK_RET(rtt_check_between(state, prev, node));
++iter_count;
prev = node;
expected = uvm_range_tree_next(&state->tree, node);
}
TEST_CHECK_RET(expected == NULL);
TEST_CHECK_RET(expected == NULL);
TEST_CHECK_RET(uvm_range_tree_last(&state->tree) == prev);
TEST_CHECK_RET(iter_count == state->count);
TEST_NV_CHECK_RET(rtt_check_between(state, prev, NULL));
iter_count = 0;
expected = NULL;
@ -381,13 +487,17 @@ static NV_STATUS rtt_check_iterator_all(rtt_state_t *state)
TEST_CHECK_RET(prev->end < node->start);
TEST_CHECK_RET(uvm_range_tree_prev(&state->tree, node) == prev);
// Skip rtt_check_between since it was done in the loop above
++iter_count;
prev = node;
expected = uvm_range_tree_next(&state->tree, node);
}
TEST_CHECK_RET(expected == NULL);
TEST_CHECK_RET(expected == NULL);
TEST_CHECK_RET(uvm_range_tree_last(&state->tree) == prev);
TEST_CHECK_RET(iter_count == state->count);
return NV_OK;
}
@ -424,20 +534,32 @@ static NV_STATUS rtt_range_add_check(rtt_state_t *state, rtt_range_t *range)
}
}
status = rtt_range_add(state, range, &node);
// Verify tree state
if (overlap) {
// Verify failure
MEM_NV_CHECK_RET(status, NV_ERR_UVM_ADDRESS_IN_USE);
// The tree said there's already a range there. Check whether its
// internal state is consistent.
node = uvm_range_tree_iter_first(&state->tree, range->start, range->end);
TEST_CHECK_RET(node);
TEST_CHECK_RET(rtt_range_overlaps_node(node, range));
}
else {
// Verify success
NvU64 hole_start, hole_end;
TEST_NV_CHECK_RET(uvm_range_tree_find_hole(&state->tree, range->start, &hole_start, &hole_end));
TEST_CHECK_RET(hole_start <= range->start);
TEST_CHECK_RET(hole_end >= range->end);
hole_start = range->start;
hole_end = range->end;
TEST_NV_CHECK_RET(uvm_range_tree_find_hole_in(&state->tree, range->start, &hole_start, &hole_end));
TEST_CHECK_RET(hole_start == range->start);
TEST_CHECK_RET(hole_end == range->end);
}
status = rtt_range_add(state, range, &node);
if (overlap) {
MEM_NV_CHECK_RET(status, NV_ERR_UVM_ADDRESS_IN_USE);
}
else {
MEM_NV_CHECK_RET(status, NV_OK);
status = rtt_check_node(state, node);
}
@ -450,6 +572,7 @@ static NV_STATUS rtt_index_remove_check(rtt_state_t *state, size_t index)
{
uvm_range_tree_node_t *node, *prev, *next;
NvU64 start, end;
NvU64 hole_start, hole_end;
NV_STATUS status;
TEST_CHECK_RET(index < state->count);
@ -472,12 +595,35 @@ static NV_STATUS rtt_index_remove_check(rtt_state_t *state, size_t index)
TEST_CHECK_RET(uvm_range_tree_find(&state->tree, start) == NULL);
TEST_CHECK_RET(uvm_range_tree_find(&state->tree, end) == NULL);
TEST_CHECK_RET(uvm_range_tree_iter_first(&state->tree, start, end) == NULL);
if (prev)
hole_start = start;
hole_end = end;
TEST_NV_CHECK_RET(uvm_range_tree_find_hole_in(&state->tree, start, &hole_start, &hole_end));
TEST_CHECK_RET(hole_start == start);
TEST_CHECK_RET(hole_end == end);
TEST_NV_CHECK_RET(uvm_range_tree_find_hole(&state->tree, start, &hole_start, &hole_end));
TEST_CHECK_RET(hole_start <= start);
TEST_CHECK_RET(hole_end >= end);
if (prev) {
TEST_CHECK_RET(uvm_range_tree_next(&state->tree, prev) == next);
if (next)
TEST_CHECK_RET(hole_start == prev->end + 1);
}
if (next) {
TEST_CHECK_RET(uvm_range_tree_prev(&state->tree, next) == prev);
TEST_CHECK_RET(hole_end == next->start - 1);
}
else {
TEST_CHECK_RET(uvm_range_tree_last(&state->tree) == prev);
}
if (!prev && !next) {
TEST_CHECK_RET(uvm_range_tree_empty(&state->tree));
TEST_CHECK_RET(uvm_range_tree_last(&state->tree) == NULL);
TEST_CHECK_RET(hole_start == 0);
TEST_CHECK_RET(hole_end == ULLONG_MAX);
TEST_CHECK_RET(state->count == 0);
}
else {
@ -749,10 +895,11 @@ static NV_STATUS rtt_index_merge_check_next_val(rtt_state_t *state, NvU64 addr)
static NV_STATUS rtt_directed(rtt_state_t *state)
{
uvm_range_tree_node_t *node;
uvm_range_tree_node_t *node, *next;
// Empty tree
TEST_CHECK_RET(uvm_range_tree_empty(&state->tree));
TEST_CHECK_RET(uvm_range_tree_last(&state->tree) == NULL);
TEST_CHECK_RET(uvm_range_tree_find(&state->tree, 0) == NULL);
TEST_CHECK_RET(uvm_range_tree_find(&state->tree, ULLONG_MAX) == NULL);
uvm_range_tree_for_each(node, &state->tree)
@ -763,6 +910,13 @@ static NV_STATUS rtt_directed(rtt_state_t *state)
TEST_CHECK_RET(0);
uvm_range_tree_for_each_in(node, &state->tree, ULLONG_MAX, ULLONG_MAX)
TEST_CHECK_RET(0);
uvm_range_tree_for_each_in_safe(node, next, &state->tree, 0, 0)
TEST_CHECK_RET(0);
uvm_range_tree_for_each_in_safe(node, next, &state->tree, 0, ULLONG_MAX)
TEST_CHECK_RET(0);
uvm_range_tree_for_each_in_safe(node, next, &state->tree, ULLONG_MAX, ULLONG_MAX)
TEST_CHECK_RET(0);
TEST_NV_CHECK_RET(rtt_check_between(state, NULL, NULL));
// Consume entire range
MEM_NV_CHECK_RET(rtt_range_add_check_val(state, 0, ULLONG_MAX), NV_OK);
@ -1038,8 +1192,8 @@ static NV_STATUS rtt_batch_remove(rtt_state_t *state, UVM_TEST_RANGE_TREE_RANDOM
return NV_OK;
}
// Attempts to shrink a randomly-selected range in the tree. On selecting a range
// of size 1, the attempt is repeated with another range up to the
// Attempts to shrink a randomly-selected range in the tree. On selecting a
// range of size 1, the attempt is repeated with another range up to the
// params->max_attempts threshold.
static NV_STATUS rtt_rand_shrink(rtt_state_t *state, UVM_TEST_RANGE_TREE_RANDOM_PARAMS *params)
{
@ -1151,11 +1305,12 @@ static NV_STATUS rtt_rand_split(rtt_state_t *state, UVM_TEST_RANGE_TREE_RANDOM_P
return NV_OK;
}
// Attempts to merge a randomly-selected range in the tree in a randomly-selected
// direction (next or prev). On selecting a range with a non-adjacent neighbor,
// the attempt is repeated with another range up to the params->max_attempts
// threshold. On reaching the attempt threshold the RNG probabilities are
// adjusted to prefer split operations and NV_ERR_BUSY_RETRY is returned.
// Attempts to merge a randomly-selected range in the tree in a randomly-
// selected direction (next or prev). On selecting a range with a non-adjacent
// neighbor, the attempt is repeated with another range up to the
// params->max_attempts threshold. On reaching the attempt threshold the RNG
// probabilities are adjusted to prefer split operations and NV_ERR_BUSY_RETRY
// is returned.
static NV_STATUS rtt_rand_merge(rtt_state_t *state, UVM_TEST_RANGE_TREE_RANDOM_PARAMS *params)
{
uvm_range_tree_node_t *node;
@ -1236,20 +1391,113 @@ static NV_STATUS rtt_rand_collision_check(rtt_state_t *state, NvU64 max_end)
// in that range in order.
static NV_STATUS rtt_rand_iterator_check(rtt_state_t *state, NvU64 max_end)
{
uvm_range_tree_node_t *node, *prev = NULL;
uvm_range_tree_node_t *node;
uvm_range_tree_node_t *prev = NULL, *first = NULL, *last = NULL, *next = NULL;
size_t i, target_count = 0, iter_count = 0;
NvU64 hole_start, hole_end, test_start, test_end;
rtt_range_t range;
// Generate the range to check
rtt_get_rand_range(&state->rng, max_end, &range);
// Phase 1: Iterate through the unordered list, counting how many nodes we
// ought to see from the tree iterator.
for (i = 0; i < state->count; i++)
target_count += rtt_range_overlaps_node(state->nodes[i], &range);
// ought to see from the tree iterator and finding the boundary nodes.
for (i = 0; i < state->count; i++) {
node = state->nodes[i];
if (rtt_range_overlaps_node(node, &range)) {
++target_count;
// first is the lowest node with any overlap
if (!first || first->start > node->start)
first = node;
// last is the highest node with any overlap
if (!last || last->end < node->end)
last = node;
}
else {
// prev is the highest node with end < range.start
if (node->end < range.start && (!prev || node->end > prev->end))
prev = node;
// next is the lowest node with start > range.end
if (node->start > range.end && (!next || node->start < next->start))
next = node;
}
}
// Phase 2: Use the tree iterators
// The holes between the nodes will be checked within the iterator loop.
// Here we check the holes at the start and end of the range, if any.
if (first) {
if (range.start < first->start) {
// Check hole at range.start
hole_start = prev ? prev->end + 1 : 0;
hole_end = first->start - 1;
TEST_NV_CHECK_RET(uvm_range_tree_find_hole(&state->tree, range.start, &test_start, &test_end));
TEST_CHECK_RET(test_start == hole_start);
TEST_CHECK_RET(test_end == hole_end);
test_start = range.start;
test_end = ULLONG_MAX;
TEST_NV_CHECK_RET(uvm_range_tree_find_hole_in(&state->tree, range.start, &test_start, &test_end));
TEST_CHECK_RET(test_start == range.start);
TEST_CHECK_RET(test_end == hole_end);
}
// Else, no hole at start
}
else {
// No nodes intersect the range
UVM_ASSERT(target_count == 0);
UVM_ASSERT(!last);
hole_start = prev ? prev->end + 1 : 0;
hole_end = next ? next->start - 1 : ULLONG_MAX;
TEST_NV_CHECK_RET(uvm_range_tree_find_hole(&state->tree, range.start, &test_start, &test_end));
TEST_CHECK_RET(test_start == hole_start);
TEST_CHECK_RET(test_end == hole_end);
test_start = range.start;
test_end = range.end;
TEST_NV_CHECK_RET(uvm_range_tree_find_hole_in(&state->tree, range.start, &test_start, &test_end));
TEST_CHECK_RET(test_start == range.start);
TEST_CHECK_RET(test_end == range.end);
}
if (last && range.end > last->end) {
// Check hole at range.end
hole_start = last->end + 1;
hole_end = next ? next->start - 1 : ULLONG_MAX;
TEST_NV_CHECK_RET(uvm_range_tree_find_hole(&state->tree, range.end, &test_start, &test_end));
TEST_CHECK_RET(test_start == hole_start);
TEST_CHECK_RET(test_end == hole_end);
test_start = 0;
test_end = range.end;
TEST_NV_CHECK_RET(uvm_range_tree_find_hole_in(&state->tree, range.end, &test_start, &test_end));
TEST_CHECK_RET(test_start == hole_start);
TEST_CHECK_RET(test_end == range.end);
}
// Phase 2: Use the tree iterator
uvm_range_tree_for_each_in(node, &state->tree, range.start, range.end) {
TEST_CHECK_RET(rtt_range_overlaps_node(node, &range));
if (prev) {
TEST_CHECK_RET(prev->end < node->start);
TEST_NV_CHECK_RET(rtt_check_between(state, prev, node));
}
++iter_count;
prev = node;
}
TEST_CHECK_RET(iter_count == target_count);
prev = NULL;
iter_count = 0;
uvm_range_tree_for_each_in_safe(node, next, &state->tree, range.start, range.end) {
TEST_CHECK_RET(rtt_range_overlaps_node(node, &range));
if (prev)
TEST_CHECK_RET(prev->end < node->start);
@ -1277,9 +1525,9 @@ static rtt_op_t rtt_get_rand_op(rtt_state_t *state, UVM_TEST_RANGE_TREE_RANDOM_P
if (state->count == 1 && state->count == params->max_ranges)
return RTT_OP_REMOVE;
// r_group selects between the two groups of operations, either {add/remove/shrink}
// or {merge/split}. r_sub selects the sub operation within that group based
// on the current probability settings.
// r_group selects between the two groups of operations, either {add/remove/
// shrink} or {merge/split}. r_sub selects the sub operation within that
// group based on the current probability settings.
r_group = uvm_test_rng_range_32(&state->rng, 1, 100);
r_sub = uvm_test_rng_range_32(&state->rng, 1, 100);
@ -1287,7 +1535,9 @@ static rtt_op_t rtt_get_rand_op(rtt_state_t *state, UVM_TEST_RANGE_TREE_RANDOM_P
if (r_group <= params->add_remove_shrink_group_probability) {
if (r_sub <= state->shrink_probability)
return RTT_OP_SHRINK;
// After giving shrink a chance, redo the randomization for add/remove.
// After giving shrink a chance, redo the randomization for add/
// remove.
r_sub = uvm_test_rng_range_32(&state->rng, 1, 100);
if (r_sub <= state->add_chance)

View File

@ -60,10 +60,22 @@ static NV_STATUS map_cpu(uvm_rm_mem_t *rm_mem)
return NV_OK;
}
static NV_STATUS check_alignment(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu, NvU64 alignment)
{
// Alignment requirements only apply to mappings in the UVM-owned VA space
if (alignment != 0) {
bool is_proxy_va_space = false;
NvU64 gpu_va = uvm_rm_mem_get_gpu_va(rm_mem, gpu, is_proxy_va_space);
TEST_CHECK_RET(IS_ALIGNED(gpu_va, alignment));
}
return NV_OK;
}
static NV_STATUS map_gpu_owner(uvm_rm_mem_t *rm_mem, NvU64 alignment)
{
uvm_gpu_t *gpu = rm_mem->gpu_owner;
NvU64 gpu_va;
// The memory should have been automatically mapped in the GPU owner
TEST_CHECK_RET(uvm_rm_mem_mapped_on_gpu(rm_mem, gpu));
@ -73,9 +85,7 @@ static NV_STATUS map_gpu_owner(uvm_rm_mem_t *rm_mem, NvU64 alignment)
// located in vidmem.
TEST_CHECK_RET(uvm_rm_mem_mapped_on_gpu_proxy(rm_mem, gpu) == uvm_gpu_uses_proxy_channel_pool(gpu));
gpu_va = uvm_rm_mem_get_gpu_va(rm_mem, gpu, uvm_rm_mem_mapped_on_gpu_proxy(rm_mem, gpu));
if (alignment)
TEST_CHECK_RET(IS_ALIGNED(gpu_va, alignment));
TEST_NV_CHECK_RET(check_alignment(rm_mem, gpu, alignment));
// Explicitly mapping or unmapping to the GPU that owns the allocation is
// not allowed, so the testing related to GPU owners is simpler than that of
@ -87,7 +97,6 @@ static NV_STATUS map_other_gpus(uvm_rm_mem_t *rm_mem, uvm_va_space_t *va_space,
{
uvm_gpu_t *gpu_owner = rm_mem->gpu_owner;
uvm_gpu_t *gpu;
NvU64 gpu_va;
for_each_va_space_gpu(gpu, va_space) {
if (gpu == gpu_owner)
@ -119,9 +128,7 @@ static NV_STATUS map_other_gpus(uvm_rm_mem_t *rm_mem, uvm_va_space_t *va_space,
TEST_CHECK_RET(uvm_rm_mem_mapped_on_gpu_proxy(rm_mem, gpu) == uvm_gpu_uses_proxy_channel_pool(gpu));
gpu_va = uvm_rm_mem_get_gpu_va(rm_mem, gpu, uvm_rm_mem_mapped_on_gpu_proxy(rm_mem, gpu));
if (alignment)
TEST_CHECK_RET(IS_ALIGNED(gpu_va, alignment));
TEST_NV_CHECK_RET(check_alignment(rm_mem, gpu, alignment));
}
return NV_OK;

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2021 NVIDIA Corporation
Copyright (c) 2015-2022 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -247,6 +247,7 @@ long uvm_test_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_CHANNEL_STRESS, uvm_test_channel_stress);
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_CE_SANITY, uvm_test_ce_sanity);
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_HOST_SANITY, uvm_test_host_sanity);
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_VA_SPACE_MM_OR_CURRENT_RETAIN, uvm_test_va_space_mm_or_current_retain);
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_VA_BLOCK_INFO, uvm_test_va_block_info);
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_LOCK_SANITY, uvm_test_lock_sanity);
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_PERF_UTILS_SANITY, uvm_test_perf_utils_sanity);
@ -328,6 +329,8 @@ long uvm_test_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
uvm_test_va_range_inject_add_gpu_va_space_error);
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_DESTROY_GPU_VA_SPACE_DELAY, uvm_test_destroy_gpu_va_space_delay);
UVM_ROUTE_CMD_STACK_NO_INIT_CHECK(UVM_TEST_CGROUP_ACCOUNTING_SUPPORTED, uvm_test_cgroup_accounting_supported);
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_HMM_INIT, uvm_test_hmm_init);
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_SPLIT_INVALIDATE_DELAY, uvm_test_split_invalidate_delay);
}
return -EINVAL;

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2021 NVidia Corporation
Copyright (c) 2015-2022 NVidia Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -23,9 +23,7 @@
#ifndef __UVM_TEST_IOCTL_H__
#define __UVM_TEST_IOCTL_H__
#ifndef __KERNEL__
#endif
#include "uvm_types.h"
#include "uvm_ioctl.h"
#include "nv_uvm_types.h"
@ -151,6 +149,14 @@ typedef enum
UVM_TEST_VA_RANGE_TYPE_MAX
} UVM_TEST_VA_RANGE_TYPE;
typedef enum
{
UVM_TEST_RANGE_SUBTYPE_INVALID = 0,
UVM_TEST_RANGE_SUBTYPE_UVM,
UVM_TEST_RANGE_SUBTYPE_HMM,
UVM_TEST_RANGE_SUBTYPE_MAX
} UVM_TEST_RANGE_SUBTYPE;
// Keep this in sync with uvm_read_duplication_t in uvm_va_range.h
typedef enum
{
@ -169,6 +175,7 @@ typedef struct
NvBool is_zombie; // Out
// Note: if this is a zombie, this field is meaningless.
NvBool owned_by_calling_process; // Out
NvU32 subtype; // Out (UVM_TEST_RANGE_SUBTYPE)
} UVM_TEST_VA_RANGE_INFO_MANAGED;
#define UVM_TEST_VA_RANGE_INFO UVM_TEST_IOCTL_BASE(4)
@ -176,6 +183,10 @@ typedef struct
{
NvU64 lookup_address NV_ALIGN_BYTES(8); // In
// For HMM ranges va_range_start/end will contain the lookup address but not
// neccessarily the maximal range over which the returned policy applies.
// For example there could be adjacent ranges with the same policy, implying
// the returned range could be as small as a page in the worst case for HMM.
NvU64 va_range_start NV_ALIGN_BYTES(8); // Out
NvU64 va_range_end NV_ALIGN_BYTES(8); // Out, inclusive
NvU32 read_duplication; // Out (UVM_TEST_READ_DUPLICATION_POLICY)
@ -536,12 +547,14 @@ typedef struct
// If user_pages_allocation_retry_force_count is non-0 then the next count user
// memory allocations under the VA block will be forced to do allocation-retry.
//
// If cpu_pages_allocation_error_count is not zero, the subsequent operations
// that need to allocate CPU pages will fail with NV_ERR_NO_MEMORY for
// cpu_pages_allocation_error_count times. If cpu_pages_allocation_error_count
// is equal to ~0U, the count is infinite.
//
// If eviction_failure is NV_TRUE, the next eviction attempt from the VA block
// will fail with NV_ERR_NO_MEMORY.
//
// If cpu_pages_allocation_error is NV_TRUE, the subsequent operations that
// need to allocate CPU pages will fail with NV_ERR_NO_MEMORY.
//
// If populate_failure is NV_TRUE, a retry error will be injected after the next
// successful user memory allocation under the VA block but before that
// allocation is used by the block. This is similar to
@ -558,8 +571,8 @@ typedef struct
NvU32 page_table_allocation_retry_force_count; // In
NvU32 user_pages_allocation_retry_force_count; // In
NvU32 cpu_chunk_allocation_size_mask; // In
NvU32 cpu_pages_allocation_error_count; // In
NvBool eviction_error; // In
NvBool cpu_pages_allocation_error; // In
NvBool populate_error; // In
NV_STATUS rmStatus; // Out
} UVM_TEST_VA_BLOCK_INJECT_ERROR_PARAMS;
@ -1111,10 +1124,14 @@ typedef struct
//
// If migrate_vma_allocation_fail_nth is greater than 0, the nth page
// allocation within migrate_vma will fail.
//
// If va_block_allocation_fail_nth is greater than 0, the nth call to
// uvm_va_block_find_create() will fail with NV_ERR_NO_MEMORY.
#define UVM_TEST_VA_SPACE_INJECT_ERROR UVM_TEST_IOCTL_BASE(72)
typedef struct
{
NvU32 migrate_vma_allocation_fail_nth; // In
NvU32 va_block_allocation_fail_nth; // In
NV_STATUS rmStatus; // Out
} UVM_TEST_VA_SPACE_INJECT_ERROR_PARAMS;
@ -1341,6 +1358,28 @@ typedef struct
NV_STATUS rmStatus; // Out
} UVM_TEST_HOST_SANITY_PARAMS;
// Calls uvm_va_space_mm_or_current_retain() on a VA space,
// then releases the va_space_mm and returns.
#define UVM_TEST_VA_SPACE_MM_OR_CURRENT_RETAIN UVM_TEST_IOCTL_BASE(89)
typedef struct
{
// User address of a flag to act as a semaphore. If non-NULL, the address
// is set to 1 after successful retain but before the sleep.
NvU64 retain_done_ptr NV_ALIGN_BYTES(8); // In
// Approximate duration for which to sleep with the va_space_mm retained.
NvU64 sleep_us NV_ALIGN_BYTES(8); // In
// On success, this contains the value of mm->mm_users before mmput() is
// called.
NvU64 mm_users NV_ALIGN_BYTES(8); // Out
// NV_ERR_PAGE_TABLE_NOT_AVAIL Could not retain va_space_mm
// (uvm_va_space_mm_or_current_retain returned
// NULL)
NV_STATUS rmStatus; // Out
} UVM_TEST_VA_SPACE_MM_OR_CURRENT_RETAIN_PARAMS;
#define UVM_TEST_GET_USER_SPACE_END_ADDRESS UVM_TEST_IOCTL_BASE(90)
typedef struct
{
@ -1396,6 +1435,19 @@ typedef struct
NV_STATUS rmStatus; // Out
} UVM_TEST_CGROUP_ACCOUNTING_SUPPORTED_PARAMS;
#define UVM_TEST_HMM_INIT UVM_TEST_IOCTL_BASE(97)
typedef struct
{
NV_STATUS rmStatus; // Out
} UVM_TEST_HMM_INIT_PARAMS;
#define UVM_TEST_SPLIT_INVALIDATE_DELAY UVM_TEST_IOCTL_BASE(98)
typedef struct
{
NvU64 delay_us; // In
NV_STATUS rmStatus; // Out
} UVM_TEST_SPLIT_INVALIDATE_DELAY_PARAMS;
#ifdef __cplusplus
}
#endif

View File

@ -430,10 +430,12 @@ static bool thread_context_non_interrupt_add(uvm_thread_context_t *thread_contex
if (thread_context->array_index == UVM_THREAD_CONTEXT_ARRAY_SIZE) {
NvU64 old = atomic64_cmpxchg(&array_entry->task, 0, task);
// Task already added a different thread context. There is nothing
// to undo because the current thread context has not been inserted.
if (old == task)
// Task already added a different thread context. The current thread
// context has not been inserted but needs to be freed.
if (old == task) {
thread_context_non_interrupt_deinit(thread_context);
return false;
}
// Speculatively add the current thread context.
if (old == 0)
@ -444,6 +446,7 @@ static bool thread_context_non_interrupt_add(uvm_thread_context_t *thread_contex
// Task already added a different thread context to the array, so
// undo the speculative insertion
atomic64_set(&table_entry->array[thread_context->array_index].task, 0);
thread_context_non_interrupt_deinit(thread_context);
return false;
}
@ -474,6 +477,9 @@ static bool thread_context_non_interrupt_add(uvm_thread_context_t *thread_contex
added = true;
}
if (!added)
thread_context_non_interrupt_deinit(thread_context);
spin_unlock_irqrestore(&table_entry->tree_lock, flags);
return added;
}

View File

@ -218,7 +218,7 @@ static void uvm_put_user_pages_dirty(struct page **pages, NvU64 page_count)
for (i = 0; i < page_count; i++) {
set_page_dirty(pages[i]);
put_page(pages[i]);
NV_UNPIN_USER_PAGE(pages[i]);
}
}
@ -262,7 +262,7 @@ static NV_STATUS map_user_pages(NvU64 user_va, NvU64 size, void **addr, struct p
}
nv_mmap_read_lock(current->mm);
ret = NV_GET_USER_PAGES(user_va, num_pages, 1, 0, *pages, vmas);
ret = NV_PIN_USER_PAGES(user_va, num_pages, FOLL_WRITE, *pages, vmas);
nv_mmap_read_unlock(current->mm);
if (ret != num_pages) {
status = NV_ERR_INVALID_ARGUMENT;
@ -1116,6 +1116,19 @@ void uvm_tools_broadcast_access_counter(uvm_gpu_t *gpu,
uvm_tools_broadcast_event(&entry);
}
void uvm_tools_test_hmm_split_invalidate(uvm_va_space_t *va_space)
{
UvmEventEntry entry;
if (!va_space->tools.enabled)
return;
entry.testEventData.splitInvalidate.eventType = UvmEventTypeTestHmmSplitInvalidate;
uvm_down_read(&va_space->tools.lock);
uvm_tools_record_event(va_space, &entry);
uvm_up_read(&va_space->tools.lock);
}
// This function is used as a begin marker to group all migrations within a VA
// block that are performed in the same call to
// block_copy_resident_pages_between. All of these are pushed to the same
@ -2101,8 +2114,7 @@ exit:
uvm_global_mask_release(retained_global_gpus);
if (mm)
uvm_va_space_mm_or_current_release(va_space, mm);
uvm_va_space_mm_or_current_release(va_space, mm);
uvm_kvfree(global_gpus);
uvm_kvfree(retained_global_gpus);

View File

@ -115,6 +115,8 @@ void uvm_tools_broadcast_access_counter(uvm_gpu_t *gpu,
const uvm_access_counter_buffer_entry_t *buffer_entry,
bool on_managed);
void uvm_tools_test_hmm_split_invalidate(uvm_va_space_t *va_space);
// schedules completed events and then waits from the to be dispatched
void uvm_tools_flush_events(void);

View File

@ -34,9 +34,6 @@
#include "nvstatus.h"
#include "nvCpuUuid.h"
#ifndef __KERNEL__
#endif
/*******************************************************************************
UVM stream types
@ -359,9 +356,10 @@ typedef enum
UvmEventNumTypes,
// ---- Private event types for uvm tests
UvmEventTestTypesFirst = 63,
UvmEventTestTypesFirst = 62,
UvmEventTypeTestAccessCounter = UvmEventTestTypesFirst,
UvmEventTypeTestHmmSplitInvalidate = UvmEventTestTypesFirst,
UvmEventTypeTestAccessCounter = UvmEventTestTypesFirst + 1,
UvmEventTestTypesLast = UvmEventTypeTestAccessCounter,
@ -387,6 +385,7 @@ typedef enum
#define UVM_EVENT_ENABLE_MAP_REMOTE ((NvU64)1 << UvmEventTypeMapRemote)
#define UVM_EVENT_ENABLE_EVICTION ((NvU64)1 << UvmEventTypeEviction)
#define UVM_EVENT_ENABLE_TEST_ACCESS_COUNTER ((NvU64)1 << UvmEventTypeTestAccessCounter)
#define UVM_EVENT_ENABLE_TEST_HMM_SPLIT_INVALIDATE ((NvU64)1 << UvmEventTypeTestHmmSplitInvalidate)
//------------------------------------------------------------------------------
// Information associated with a memory violation event
@ -977,6 +976,11 @@ typedef struct
NvU64 instancePtr;
} UvmEventTestAccessCounterInfo;
typedef struct
{
NvU8 eventType;
} UvmEventTestSplitInvalidateInfo;
//------------------------------------------------------------------------------
// Entry added in the event queue buffer when an enabled event occurs. For
// compatibility with all tools ensure that this structure is 64 bit aligned.
@ -1010,6 +1014,7 @@ typedef struct
NvU8 eventType;
UvmEventTestAccessCounterInfo accessCounter;
UvmEventTestSplitInvalidateInfo splitInvalidate;
} testEventData;
};
} UvmEventEntry;

View File

@ -618,7 +618,7 @@ static NV_STATUS uvm_register_channel(uvm_va_space_t *va_space,
uvm_va_space_up_read_rm(va_space);
// The mm needs to be locked in order to remove stale HMM va_blocks.
mm = uvm_va_space_mm_retain_lock(va_space);
mm = uvm_va_space_mm_or_current_retain_lock(va_space);
// We have the RM objects now so we know what the VA range layout should be.
// Re-take the VA space lock in write mode to create and insert them.
@ -653,10 +653,8 @@ static NV_STATUS uvm_register_channel(uvm_va_space_t *va_space,
if (status != NV_OK)
goto error_under_write;
if (mm) {
if (mm)
uvm_up_read_mmap_lock_out_of_order(mm);
uvm_va_space_mm_release(va_space);
}
// The subsequent mappings will need to call into RM, which means we must
// downgrade the VA space lock to read mode. Although we're in read mode no
@ -681,6 +679,7 @@ static NV_STATUS uvm_register_channel(uvm_va_space_t *va_space,
goto error_under_read;
uvm_va_space_up_read_rm(va_space);
uvm_va_space_mm_or_current_release(va_space, mm);
uvm_gpu_release(gpu);
return NV_OK;
@ -688,7 +687,7 @@ error_under_write:
if (user_channel->gpu_va_space)
uvm_user_channel_detach(user_channel, &deferred_free_list);
uvm_va_space_up_write(va_space);
uvm_va_space_mm_release_unlock(va_space, mm);
uvm_va_space_mm_or_current_release_unlock(va_space, mm);
uvm_deferred_free_object_list(&deferred_free_list);
uvm_gpu_release(gpu);
return status;
@ -714,10 +713,12 @@ error_under_read:
if (user_channel->gpu_va_space) {
uvm_user_channel_detach(user_channel, &deferred_free_list);
uvm_va_space_up_write(va_space);
uvm_va_space_mm_or_current_release(va_space, mm);
uvm_deferred_free_object_list(&deferred_free_list);
}
else {
uvm_va_space_up_write(va_space);
uvm_va_space_mm_or_current_release(va_space, mm);
}
uvm_user_channel_release(user_channel);

View File

@ -105,6 +105,36 @@ uvm_va_space_t *uvm_va_block_get_va_space(uvm_va_block_t *va_block)
return va_space;
}
bool uvm_va_block_check_policy_is_valid(uvm_va_block_t *va_block,
uvm_va_policy_t *policy,
uvm_va_block_region_t region)
{
uvm_assert_mutex_locked(&va_block->lock);
if (uvm_va_block_is_hmm(va_block)) {
uvm_va_policy_node_t *node;
if (policy == &uvm_va_policy_default) {
// There should only be the default policy within the region.
node = uvm_va_policy_node_iter_first(va_block,
uvm_va_block_region_start(va_block, region),
uvm_va_block_region_end(va_block, region));
UVM_ASSERT(!node);
}
else {
// The policy node should cover the region.
node = uvm_va_policy_node_from_policy(policy);
UVM_ASSERT(node->node.start <= uvm_va_block_region_start(va_block, region));
UVM_ASSERT(node->node.end >= uvm_va_block_region_end(va_block, region));
}
}
else {
UVM_ASSERT(policy == uvm_va_range_get_policy(va_block->va_range));
}
return true;
}
static NvU64 block_gpu_pte_flag_cacheable(uvm_va_block_t *block, uvm_gpu_t *gpu, uvm_processor_id_t resident_id)
{
uvm_va_space_t *va_space = uvm_va_block_get_va_space(block);
@ -589,7 +619,7 @@ NV_STATUS uvm_va_block_create(uvm_va_range_t *va_range,
UVM_ASSERT(size <= UVM_VA_BLOCK_SIZE);
if (va_range) {
// Create a UVM managed va_block.
// Create a managed va_block.
UVM_ASSERT(start >= va_range->node.start);
UVM_ASSERT(end <= va_range->node.end);
UVM_ASSERT(va_range->type == UVM_VA_RANGE_TYPE_MANAGED);
@ -617,6 +647,7 @@ NV_STATUS uvm_va_block_create(uvm_va_range_t *va_range,
block->end = end;
block->va_range = va_range;
uvm_tracker_init(&block->tracker);
block->prefetch_info.last_migration_proc_id = UVM_ID_INVALID;
nv_kthread_q_item_init(&block->eviction_mappings_q_item, block_deferred_eviction_mappings_entry, block);
@ -636,7 +667,7 @@ static void block_gpu_unmap_phys_all_cpu_pages(uvm_va_block_t *block, uvm_gpu_t
gpu_mapping_addr = uvm_cpu_chunk_get_gpu_mapping_addr(block, page_index, chunk, gpu->id);
if (gpu_mapping_addr != 0) {
uvm_pmm_sysmem_mappings_remove_gpu_mapping(&gpu->pmm_reverse_sysmem_mappings, gpu_mapping_addr);
uvm_gpu_unmap_cpu_pages(gpu, gpu_mapping_addr, uvm_cpu_chunk_get_size(chunk));
uvm_gpu_unmap_cpu_pages(gpu->parent, gpu_mapping_addr, uvm_cpu_chunk_get_size(chunk));
uvm_cpu_chunk_set_gpu_mapping_addr(block, page_index, chunk, gpu->id, 0);
}
}
@ -657,7 +688,7 @@ static NV_STATUS block_gpu_map_phys_all_cpu_pages(uvm_va_block_t *block, uvm_gpu
UVM_ASSERT_MSG(gpu_mapping_addr == 0, "GPU%u DMA address 0x%llx\n", uvm_id_value(gpu->id), gpu_mapping_addr);
status = uvm_gpu_map_cpu_pages(gpu,
status = uvm_gpu_map_cpu_pages(gpu->parent,
uvm_cpu_chunk_get_cpu_page(block, chunk, page_index),
chunk_size,
&gpu_mapping_addr);
@ -846,7 +877,7 @@ static void block_unmap_cpu_chunk_on_gpus(uvm_va_block_t *block, uvm_cpu_chunk_t
gpu = block_get_gpu(block, id);
uvm_pmm_sysmem_mappings_remove_gpu_mapping(&gpu->pmm_reverse_sysmem_mappings, gpu_mapping_addr);
uvm_gpu_unmap_cpu_pages(gpu, gpu_mapping_addr, uvm_cpu_chunk_get_size(chunk));
uvm_gpu_unmap_cpu_pages(gpu->parent, gpu_mapping_addr, uvm_cpu_chunk_get_size(chunk));
uvm_cpu_chunk_set_gpu_mapping_addr(block, page_index, chunk, id, 0);
}
}
@ -880,7 +911,7 @@ static NV_STATUS block_map_cpu_chunk_on_gpus(uvm_va_block_t *block, uvm_page_ind
UVM_ASSERT_MSG(gpu_mapping_addr == 0, "GPU%u DMA address 0x%llx\n", uvm_id_value(id), gpu_mapping_addr);
gpu = block_get_gpu(block, id);
status = uvm_gpu_map_cpu_pages(gpu,
status = uvm_gpu_map_cpu_pages(gpu->parent,
uvm_cpu_chunk_get_cpu_page(block, chunk, chunk_region.first),
chunk_size,
&gpu_mapping_addr);
@ -1014,9 +1045,14 @@ static NV_STATUS block_populate_page_cpu(uvm_va_block_t *block, uvm_page_index_t
UVM_ASSERT(!uvm_page_mask_test(&block->cpu.resident, page_index));
// Return out of memory error if the tests have requested it. As opposed to
// other error injection settings, this one is persistent.
if (block_test && block_test->inject_cpu_pages_allocation_error)
// other error injection settings, this one fails N times and then succeeds.
// TODO: Bug 3701182: This will print a warning in Linux kernels newer than
// 5.16.0-rc1+.
if (block_test && block_test->inject_cpu_pages_allocation_error_count) {
if (block_test->inject_cpu_pages_allocation_error_count != ~(NvU32)0)
block_test->inject_cpu_pages_allocation_error_count--;
return NV_ERR_NO_MEMORY;
}
status = uvm_cpu_chunk_alloc(block, page_index, mm, &chunk);
if (status != NV_OK)
@ -1178,6 +1214,26 @@ uvm_va_block_region_t uvm_va_block_big_page_region_all(uvm_va_block_t *va_block,
return range_big_page_region_all(va_block->start, va_block->end, big_page_size);
}
uvm_va_block_region_t uvm_va_block_big_page_region_subset(uvm_va_block_t *va_block,
uvm_va_block_region_t region,
NvU32 big_page_size)
{
NvU64 start = uvm_va_block_region_start(va_block, region);
NvU64 end = uvm_va_block_region_end(va_block, region);
uvm_va_block_region_t big_region;
UVM_ASSERT(start < va_block->end);
UVM_ASSERT(end <= va_block->end);
big_region = range_big_page_region_all(start, end, big_page_size);
if (big_region.outer) {
big_region.first += region.first;
big_region.outer += region.first;
}
return big_region;
}
size_t uvm_va_block_num_big_pages(uvm_va_block_t *va_block, NvU32 big_page_size)
{
return range_num_big_pages(va_block->start, va_block->end, big_page_size);
@ -2159,13 +2215,21 @@ static uvm_gpu_address_t block_phys_page_copy_address(uvm_va_block_t *block,
return copy_addr;
}
uvm_gpu_phys_address_t uvm_va_block_gpu_phys_page_address(uvm_va_block_t *va_block,
uvm_gpu_phys_address_t uvm_va_block_res_phys_page_address(uvm_va_block_t *va_block,
uvm_page_index_t page_index,
uvm_processor_id_t residency,
uvm_gpu_t *gpu)
{
uvm_assert_mutex_locked(&va_block->lock);
return block_phys_page_address(va_block, block_phys_page(gpu->id, page_index), gpu);
return block_phys_page_address(va_block, block_phys_page(residency, page_index), gpu);
}
uvm_gpu_phys_address_t uvm_va_block_gpu_phys_page_address(uvm_va_block_t *va_block,
uvm_page_index_t page_index,
uvm_gpu_t *gpu)
{
return uvm_va_block_res_phys_page_address(va_block, page_index, gpu->id, gpu);
}
// Begin a push appropriate for copying data from src_id processor to dst_id processor.
@ -2327,12 +2391,17 @@ typedef enum
BLOCK_TRANSFER_MODE_INTERNAL_MOVE_TO_STAGE = 3,
BLOCK_TRANSFER_MODE_INTERNAL_MOVE_FROM_STAGE = 4,
BLOCK_TRANSFER_MODE_INTERNAL_COPY_TO_STAGE = 5,
BLOCK_TRANSFER_MODE_INTERNAL_COPY_FROM_STAGE = 6
BLOCK_TRANSFER_MODE_INTERNAL_COPY_FROM_STAGE = 6,
BLOCK_TRANSFER_MODE_INTERNAL_COPY_ONLY = 7
} block_transfer_mode_internal_t;
static uvm_va_block_transfer_mode_t get_block_transfer_mode_from_internal(block_transfer_mode_internal_t transfer_mode)
{
switch (transfer_mode) {
// For HMM, BLOCK_TRANSFER_MODE_INTERNAL_COPY_ONLY is just part of a
// two phase move. First the pages are copied, then after
// migrate_vma_pages() succeeds, residency and mapping are updated.
case BLOCK_TRANSFER_MODE_INTERNAL_COPY_ONLY:
case BLOCK_TRANSFER_MODE_INTERNAL_MOVE:
case BLOCK_TRANSFER_MODE_INTERNAL_MOVE_TO_STAGE:
case BLOCK_TRANSFER_MODE_INTERNAL_MOVE_FROM_STAGE:
@ -2391,6 +2460,57 @@ static uvm_va_block_region_t block_phys_contig_region(uvm_va_block_t *block,
}
}
typedef struct
{
// Location of the memory
uvm_processor_id_t id;
// Whether the whole block has a single physically-contiguous chunk of
// storage on the processor.
bool is_block_contig;
// Starting address of the physically-contiguous allocation, from the view
// of the copying GPU. Valid only if is_block_contig.
uvm_gpu_address_t address;
} block_copy_addr_t;
typedef struct
{
block_copy_addr_t src;
block_copy_addr_t dst;
} block_copy_state_t;
// Like block_phys_page_copy_address, but uses the address cached in bca when
// possible.
static uvm_gpu_address_t block_copy_get_address(uvm_va_block_t *block,
block_copy_addr_t *bca,
uvm_page_index_t page_index,
uvm_gpu_t *copying_gpu)
{
if (bca->is_block_contig) {
uvm_gpu_address_t addr = bca->address;
addr.address += page_index * PAGE_SIZE;
UVM_ASSERT(block_phys_copy_contig_check(block, page_index, &bca->address, bca->id, copying_gpu));
return addr;
}
return block_phys_page_copy_address(block, block_phys_page(bca->id, page_index), copying_gpu);
}
static void block_copy_push(uvm_va_block_t *block,
block_copy_state_t *state,
uvm_va_block_region_t region,
uvm_push_t *push)
{
uvm_gpu_t *copying_gpu = uvm_push_get_gpu(push);
uvm_push_set_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
copying_gpu->parent->ce_hal->memcopy(push,
block_copy_get_address(block, &state->dst, region.first, copying_gpu),
block_copy_get_address(block, &state->src, region.first, copying_gpu),
uvm_va_block_region_size(region));
}
// Copies pages resident on the src_id processor to the dst_id processor
//
// The function adds the pages that were successfully copied to the output
@ -2403,7 +2523,7 @@ static NV_STATUS block_copy_resident_pages_between(uvm_va_block_t *block,
uvm_processor_id_t dst_id,
uvm_processor_id_t src_id,
uvm_va_block_region_t region,
const uvm_page_mask_t *page_mask,
uvm_page_mask_t *copy_mask,
const uvm_page_mask_t *prefetch_page_mask,
block_transfer_mode_internal_t transfer_mode,
uvm_page_mask_t *migrated_pages,
@ -2418,7 +2538,6 @@ static NV_STATUS block_copy_resident_pages_between(uvm_va_block_t *block,
uvm_page_index_t page_index;
uvm_page_index_t contig_start_index = region.outer;
uvm_page_index_t last_index = region.outer;
uvm_page_mask_t *copy_mask = &block_context->make_resident.copy_resident_pages_between_mask;
uvm_range_group_range_t *rgr = NULL;
bool rgr_has_changed = false;
uvm_make_resident_cause_t cause = block_context->make_resident.cause;
@ -2426,26 +2545,21 @@ static NV_STATUS block_copy_resident_pages_between(uvm_va_block_t *block,
const bool may_prefetch = (cause == UVM_MAKE_RESIDENT_CAUSE_REPLAYABLE_FAULT ||
cause == UVM_MAKE_RESIDENT_CAUSE_NON_REPLAYABLE_FAULT ||
cause == UVM_MAKE_RESIDENT_CAUSE_ACCESS_COUNTER) && !!prefetch_page_mask;
const bool is_src_phys_contig = is_block_phys_contig(block, src_id);
const bool is_dst_phys_contig = is_block_phys_contig(block, dst_id);
uvm_gpu_address_t contig_src_address = {0};
uvm_gpu_address_t contig_dst_address = {0};
block_copy_state_t state = {0};
uvm_va_range_t *va_range = block->va_range;
uvm_va_space_t *va_space = uvm_va_block_get_va_space(block);
const uvm_va_block_transfer_mode_t block_transfer_mode = get_block_transfer_mode_from_internal(transfer_mode);
state.src.id = src_id;
state.dst.id = dst_id;
state.src.is_block_contig = is_block_phys_contig(block, src_id);
state.dst.is_block_contig = is_block_phys_contig(block, dst_id);
*copied_pages = 0;
if (uvm_id_equal(dst_id, src_id))
return NV_OK;
uvm_page_mask_init_from_region(copy_mask, region, src_resident_mask);
if (page_mask)
uvm_page_mask_and(copy_mask, copy_mask, page_mask);
// If there are not pages to be copied, exit early
if (!uvm_page_mask_andnot(copy_mask, copy_mask, dst_resident_mask))
// If there are no pages to be copied, exit early
if (!uvm_page_mask_andnot(copy_mask, copy_mask, dst_resident_mask) ||
!uvm_page_mask_andnot(copy_mask, copy_mask, migrated_pages))
return NV_OK;
// uvm_range_group_range_iter_first should only be called when the va_space
@ -2458,6 +2572,7 @@ static NV_STATUS block_copy_resident_pages_between(uvm_va_block_t *block,
rgr_has_changed = true;
}
// TODO: Bug 3745051: This function is complicated and needs refactoring
for_each_va_block_page_in_region_mask(page_index, copy_mask, region) {
NvU64 page_start = uvm_va_block_cpu_page_address(block, page_index);
uvm_make_resident_cause_t page_cause = (may_prefetch && uvm_page_mask_test(prefetch_page_mask, page_index))?
@ -2553,29 +2668,19 @@ static NV_STATUS block_copy_resident_pages_between(uvm_va_block_t *block,
// NVLINK links. Therefore, for physically-contiguous block
// storage, we cache the start address and compute the page address
// using the page index.
if (is_src_phys_contig)
contig_src_address = block_phys_page_copy_address(block, block_phys_page(src_id, 0), copying_gpu);
if (is_dst_phys_contig)
contig_dst_address = block_phys_page_copy_address(block, block_phys_page(dst_id, 0), copying_gpu);
if (state.src.is_block_contig)
state.src.address = block_phys_page_copy_address(block, block_phys_page(src_id, 0), copying_gpu);
if (state.dst.is_block_contig)
state.dst.address = block_phys_page_copy_address(block, block_phys_page(dst_id, 0), copying_gpu);
}
else if ((page_index != last_index + 1) || contig_cause != page_cause) {
uvm_va_block_region_t contig_region = uvm_va_block_region(contig_start_index, last_index + 1);
size_t contig_region_size = uvm_va_block_region_size(contig_region);
UVM_ASSERT(uvm_va_block_region_contains_region(region, contig_region));
// If both src and dst are physically-contiguous, consolidate copies
// of contiguous pages into a single method.
if (is_src_phys_contig && is_dst_phys_contig) {
uvm_gpu_address_t src_address = contig_src_address;
uvm_gpu_address_t dst_address = contig_dst_address;
src_address.address += contig_start_index * PAGE_SIZE;
dst_address.address += contig_start_index * PAGE_SIZE;
uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
copying_gpu->parent->ce_hal->memcopy(&push, dst_address, src_address, contig_region_size);
}
if (state.src.is_block_contig && state.dst.is_block_contig)
block_copy_push(block, &state, contig_region, &push);
uvm_perf_event_notify_migration(&va_space->perf_events,
&push,
@ -2583,7 +2688,7 @@ static NV_STATUS block_copy_resident_pages_between(uvm_va_block_t *block,
dst_id,
src_id,
uvm_va_block_region_start(block, contig_region),
contig_region_size,
uvm_va_block_region_size(contig_region),
block_transfer_mode,
contig_cause,
&block_context->make_resident);
@ -2592,34 +2697,8 @@ static NV_STATUS block_copy_resident_pages_between(uvm_va_block_t *block,
contig_cause = page_cause;
}
if (is_src_phys_contig)
UVM_ASSERT(block_phys_copy_contig_check(block, page_index, &contig_src_address, src_id, copying_gpu));
if (is_dst_phys_contig)
UVM_ASSERT(block_phys_copy_contig_check(block, page_index, &contig_dst_address, dst_id, copying_gpu));
if (!is_src_phys_contig || !is_dst_phys_contig) {
uvm_gpu_address_t src_address;
uvm_gpu_address_t dst_address;
if (is_src_phys_contig) {
src_address = contig_src_address;
src_address.address += page_index * PAGE_SIZE;
}
else {
src_address = block_phys_page_copy_address(block, block_phys_page(src_id, page_index), copying_gpu);
}
if (is_dst_phys_contig) {
dst_address = contig_dst_address;
dst_address.address += page_index * PAGE_SIZE;
}
else {
dst_address = block_phys_page_copy_address(block, block_phys_page(dst_id, page_index), copying_gpu);
}
uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
copying_gpu->parent->ce_hal->memcopy(&push, dst_address, src_address, PAGE_SIZE);
}
if (!state.src.is_block_contig || !state.dst.is_block_contig)
block_copy_push(block, &state, uvm_va_block_region_for_page(page_index), &push);
last_index = page_index;
}
@ -2627,19 +2706,10 @@ static NV_STATUS block_copy_resident_pages_between(uvm_va_block_t *block,
// Copy the remaining pages
if (copying_gpu) {
uvm_va_block_region_t contig_region = uvm_va_block_region(contig_start_index, last_index + 1);
size_t contig_region_size = uvm_va_block_region_size(contig_region);
UVM_ASSERT(uvm_va_block_region_contains_region(region, contig_region));
if (is_src_phys_contig && is_dst_phys_contig) {
uvm_gpu_address_t src_address = contig_src_address;
uvm_gpu_address_t dst_address = contig_dst_address;
src_address.address += contig_start_index * PAGE_SIZE;
dst_address.address += contig_start_index * PAGE_SIZE;
uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
copying_gpu->parent->ce_hal->memcopy(&push, dst_address, src_address, contig_region_size);
}
if (state.src.is_block_contig && state.dst.is_block_contig)
block_copy_push(block, &state, contig_region, &push);
uvm_perf_event_notify_migration(&va_space->perf_events,
&push,
@ -2647,7 +2717,7 @@ static NV_STATUS block_copy_resident_pages_between(uvm_va_block_t *block,
dst_id,
src_id,
uvm_va_block_region_start(block, contig_region),
contig_region_size,
uvm_va_block_region_size(contig_region),
block_transfer_mode,
contig_cause,
&block_context->make_resident);
@ -2673,8 +2743,11 @@ static NV_STATUS block_copy_resident_pages_between(uvm_va_block_t *block,
if (*copied_pages) {
uvm_page_mask_or(migrated_pages, migrated_pages, copy_mask);
uvm_page_mask_or(dst_resident_mask, dst_resident_mask, copy_mask);
block_set_resident_processor(block, dst_id);
// For HMM, the residency is updated after migrate_vma_pages() succeeds.
if (transfer_mode != BLOCK_TRANSFER_MODE_INTERNAL_COPY_ONLY) {
uvm_page_mask_or(dst_resident_mask, dst_resident_mask, copy_mask);
block_set_resident_processor(block, dst_id);
}
if (transfer_mode == BLOCK_TRANSFER_MODE_INTERNAL_MOVE_FROM_STAGE) {
// Check whether there are any resident pages left on src
@ -2682,7 +2755,8 @@ static NV_STATUS block_copy_resident_pages_between(uvm_va_block_t *block,
block_clear_resident_processor(block, src_id);
}
// If we are staging the copy due to read duplication, we keep the copy there
// If we are staging the copy due to read duplication, we keep the copy
// there
if (transfer_mode == BLOCK_TRANSFER_MODE_INTERNAL_COPY ||
transfer_mode == BLOCK_TRANSFER_MODE_INTERNAL_COPY_TO_STAGE)
uvm_page_mask_or(&block->read_duplicated_pages, &block->read_duplicated_pages, copy_mask);
@ -2746,23 +2820,30 @@ static NV_STATUS block_copy_resident_pages_mask(uvm_va_block_t *block,
uvm_va_space_t *va_space = uvm_va_block_get_va_space(block);
uvm_processor_id_t src_id;
uvm_processor_mask_t search_mask;
uvm_page_mask_t *copy_mask = &block_context->make_resident.copy_resident_pages_between_mask;
uvm_processor_mask_copy(&search_mask, src_processor_mask);
*copied_pages_out = 0;
for_each_closest_id(src_id, &search_mask, dst_id, va_space) {
uvm_page_mask_t *src_resident_mask = uvm_va_block_resident_mask_get(block, src_id);
NV_STATUS status;
NvU32 copied_pages_from_src;
UVM_ASSERT(!uvm_id_equal(src_id, dst_id));
uvm_page_mask_init_from_region(copy_mask, region, src_resident_mask);
if (page_mask)
uvm_page_mask_and(copy_mask, copy_mask, page_mask);
status = block_copy_resident_pages_between(block,
block_context,
dst_id,
src_id,
region,
page_mask,
copy_mask,
prefetch_page_mask,
transfer_mode,
migrated_pages,
@ -2852,7 +2933,10 @@ static void block_copy_set_first_touch_residency(uvm_va_block_t *block,
//
// If UVM_VA_BLOCK_TRANSFER_MODE_COPY is passed, processors that already have a
// copy of the page will keep it. Conversely, if UVM_VA_BLOCK_TRANSFER_MODE_MOVE
// is passed, the page will no longer be resident in any processor other than dst_id.
// is passed, the page will no longer be resident in any processor other than
// dst_id. If UVM_VA_BLOCK_TRANSFER_MODE_COPY_ONLY is passed, the destination
// pages are copied into but the residency bits for source and destination are
// not updated.
static NV_STATUS block_copy_resident_pages(uvm_va_block_t *block,
uvm_va_block_context_t *block_context,
uvm_processor_id_t dst_id,
@ -2903,6 +2987,13 @@ static NV_STATUS block_copy_resident_pages(uvm_va_block_t *block,
uvm_processor_mask_and(&src_processor_mask, block_get_can_copy_from_mask(block, dst_id), &block->resident);
uvm_processor_mask_clear(&src_processor_mask, dst_id);
if (transfer_mode == UVM_VA_BLOCK_TRANSFER_MODE_MOVE)
transfer_mode_internal = BLOCK_TRANSFER_MODE_INTERNAL_MOVE;
else if (transfer_mode == UVM_VA_BLOCK_TRANSFER_MODE_COPY)
transfer_mode_internal = BLOCK_TRANSFER_MODE_INTERNAL_COPY;
else
transfer_mode_internal = BLOCK_TRANSFER_MODE_INTERNAL_COPY_ONLY;
status = block_copy_resident_pages_mask(block,
block_context,
dst_id,
@ -2910,9 +3001,7 @@ static NV_STATUS block_copy_resident_pages(uvm_va_block_t *block,
region,
copy_page_mask,
prefetch_page_mask,
transfer_mode == UVM_VA_BLOCK_TRANSFER_MODE_COPY?
BLOCK_TRANSFER_MODE_INTERNAL_COPY:
BLOCK_TRANSFER_MODE_INTERNAL_MOVE,
transfer_mode_internal,
missing_pages_count,
migrated_pages,
&pages_copied,
@ -2940,7 +3029,9 @@ static NV_STATUS block_copy_resident_pages(uvm_va_block_t *block,
uvm_page_mask_zero(staged_pages);
if (UVM_ID_IS_CPU(dst_id)) {
if (transfer_mode == UVM_VA_BLOCK_TRANSFER_MODE_COPY_ONLY)
transfer_mode_internal = BLOCK_TRANSFER_MODE_INTERNAL_COPY_ONLY;
else if (UVM_ID_IS_CPU(dst_id)) {
transfer_mode_internal = transfer_mode == UVM_VA_BLOCK_TRANSFER_MODE_COPY?
BLOCK_TRANSFER_MODE_INTERNAL_COPY:
BLOCK_TRANSFER_MODE_INTERNAL_MOVE;
@ -2981,6 +3072,13 @@ static NV_STATUS block_copy_resident_pages(uvm_va_block_t *block,
goto out;
uvm_tracker_clear(&local_tracker);
if (transfer_mode == UVM_VA_BLOCK_TRANSFER_MODE_MOVE)
transfer_mode_internal = BLOCK_TRANSFER_MODE_INTERNAL_MOVE_FROM_STAGE;
else if (transfer_mode == UVM_VA_BLOCK_TRANSFER_MODE_COPY)
transfer_mode_internal = BLOCK_TRANSFER_MODE_INTERNAL_COPY_FROM_STAGE;
else
transfer_mode_internal = BLOCK_TRANSFER_MODE_INTERNAL_COPY_ONLY;
// Now copy staged pages from the CPU to the destination.
status = block_copy_resident_pages_between(block,
block_context,
@ -2989,9 +3087,7 @@ static NV_STATUS block_copy_resident_pages(uvm_va_block_t *block,
region,
staged_pages,
prefetch_page_mask,
transfer_mode == UVM_VA_BLOCK_TRANSFER_MODE_COPY?
BLOCK_TRANSFER_MODE_INTERNAL_COPY_FROM_STAGE:
BLOCK_TRANSFER_MODE_INTERNAL_MOVE_FROM_STAGE,
transfer_mode_internal,
migrated_pages,
&pages_copied,
&local_tracker);
@ -3010,7 +3106,8 @@ out:
// Pages that weren't resident anywhere else were populated at the
// destination directly. Mark them as resident now. We only do it if there
// have been no errors because we cannot identify which pages failed.
if (status == NV_OK && missing_pages_count > 0)
// For HMM, don't do this until migrate_vma_pages() succeeds.
if (status == NV_OK && missing_pages_count > 0 && transfer_mode != UVM_VA_BLOCK_TRANSFER_MODE_COPY_ONLY)
block_copy_set_first_touch_residency(block, block_context, dst_id, region, page_mask);
// Break read duplication
@ -3048,14 +3145,15 @@ out:
return status == NV_OK ? tracker_status : status;
}
NV_STATUS uvm_va_block_make_resident(uvm_va_block_t *va_block,
static NV_STATUS block_make_resident(uvm_va_block_t *va_block,
uvm_va_block_retry_t *va_block_retry,
uvm_va_block_context_t *va_block_context,
uvm_processor_id_t dest_id,
uvm_va_block_region_t region,
const uvm_page_mask_t *page_mask,
const uvm_page_mask_t *prefetch_page_mask,
uvm_make_resident_cause_t cause)
uvm_make_resident_cause_t cause,
uvm_va_block_transfer_mode_t transfer_mode)
{
NV_STATUS status;
uvm_processor_mask_t unmap_processor_mask;
@ -3073,6 +3171,7 @@ NV_STATUS uvm_va_block_make_resident(uvm_va_block_t *va_block,
uvm_assert_mutex_locked(&va_block->lock);
UVM_ASSERT(uvm_va_block_is_hmm(va_block) || va_block->va_range->type == UVM_VA_RANGE_TYPE_MANAGED);
UVM_ASSERT(uvm_va_block_check_policy_is_valid(va_block, va_block_context->policy, region));
resident_mask = block_resident_mask_get_alloc(va_block, dest_id);
if (!resident_mask)
@ -3122,7 +3221,7 @@ NV_STATUS uvm_va_block_make_resident(uvm_va_block_t *va_block,
region,
page_mask,
prefetch_page_mask,
UVM_VA_BLOCK_TRANSFER_MODE_MOVE);
transfer_mode);
if (status != NV_OK)
return status;
@ -3132,12 +3231,95 @@ NV_STATUS uvm_va_block_make_resident(uvm_va_block_t *va_block,
//
// Skip this if we didn't do anything (the input region and/or page mask was
// empty).
if (uvm_processor_mask_test(&va_block->resident, dest_id))
if (transfer_mode == UVM_VA_BLOCK_TRANSFER_MODE_MOVE && uvm_processor_mask_test(&va_block->resident, dest_id))
block_mark_memory_used(va_block, dest_id);
return NV_OK;
}
NV_STATUS uvm_va_block_make_resident(uvm_va_block_t *va_block,
uvm_va_block_retry_t *va_block_retry,
uvm_va_block_context_t *va_block_context,
uvm_processor_id_t dest_id,
uvm_va_block_region_t region,
const uvm_page_mask_t *page_mask,
const uvm_page_mask_t *prefetch_page_mask,
uvm_make_resident_cause_t cause)
{
return block_make_resident(va_block,
va_block_retry,
va_block_context,
dest_id,
region,
page_mask,
prefetch_page_mask,
cause,
UVM_VA_BLOCK_TRANSFER_MODE_MOVE);
}
NV_STATUS uvm_va_block_make_resident_pre(uvm_va_block_t *va_block,
uvm_va_block_retry_t *va_block_retry,
uvm_va_block_context_t *va_block_context,
uvm_processor_id_t dest_id,
uvm_va_block_region_t region,
const uvm_page_mask_t *page_mask,
const uvm_page_mask_t *prefetch_page_mask,
uvm_make_resident_cause_t cause)
{
return block_make_resident(va_block,
va_block_retry,
va_block_context,
dest_id,
region,
page_mask,
prefetch_page_mask,
cause,
UVM_VA_BLOCK_TRANSFER_MODE_COPY_ONLY);
}
void uvm_va_block_make_resident_post(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
uvm_va_block_region_t region,
const uvm_page_mask_t *page_mask)
{
uvm_page_mask_t *migrated_pages = &va_block_context->make_resident.pages_migrated;
uvm_processor_id_t dst_id = va_block_context->make_resident.dest_id;
uvm_page_mask_t *dst_resident_mask = uvm_va_block_resident_mask_get(va_block, dst_id);
uvm_assert_mutex_locked(&va_block->lock);
if (page_mask)
uvm_page_mask_and(migrated_pages, migrated_pages, page_mask);
if (!uvm_page_mask_empty(migrated_pages)) {
// The migrated pages are now resident on the destination.
uvm_page_mask_or(dst_resident_mask, dst_resident_mask, migrated_pages);
block_set_resident_processor(va_block, dst_id);
}
// Pages that weren't resident anywhere else were populated at the
// destination directly. Mark them as resident now. We only do it if there
// have been no errors because we cannot identify which pages failed.
// For HMM, don't do this until migrate_vma_pages() succeeds.
block_copy_set_first_touch_residency(va_block, va_block_context, dst_id, region, page_mask);
// Any move operation implies that mappings have been removed from all
// non-UVM-Lite GPUs.
uvm_page_mask_andnot(&va_block->maybe_mapped_pages, &va_block->maybe_mapped_pages, migrated_pages);
// Break read duplication and clear residency from other processors.
break_read_duplication_in_region(va_block, va_block_context, dst_id, region, page_mask);
// Update eviction heuristics, if needed. Notably this could repeat the call
// done in block_set_resident_processor(), but that doesn't do anything bad
// and it's simpler to keep it in both places.
//
// Skip this if we didn't do anything (the input region and/or page mask was
// empty).
if (uvm_processor_mask_test(&va_block->resident, dst_id))
block_mark_memory_used(va_block, dst_id);
}
// Combination function which prepares the input {region, page_mask} for
// entering read-duplication. It:
// - Unmaps all processors but revoke_id
@ -3198,6 +3380,10 @@ NV_STATUS uvm_va_block_make_resident_read_duplicate(uvm_va_block_t *va_block,
NV_STATUS status = NV_OK;
uvm_processor_id_t src_id;
// TODO: Bug 3660922: need to implement HMM read duplication support.
UVM_ASSERT(!uvm_va_block_is_hmm(va_block));
UVM_ASSERT(va_block_context->policy = uvm_va_range_get_policy(va_block->va_range));
va_block_context->make_resident.dest_id = dest_id;
va_block_context->make_resident.cause = cause;
@ -3568,7 +3754,7 @@ static bool block_check_mappings_page(uvm_va_block_t *block, uvm_page_index_t pa
// Processors with mappings must have access to the processor that
// has the valid copy
UVM_ASSERT_MSG(uvm_processor_mask_subset(&read_mappings, residency_accessible_from),
"Not all processors have access to %s\n",
"Not all processors have access to %s\n"
"Resident: 0x%lx - Mappings R: 0x%lx W: 0x%lx A: 0x%lx -"
"Access: 0x%lx - Native Atomics: 0x%lx - SWA: 0x%lx\n",
uvm_va_space_processor_name(va_space, residency),
@ -3909,9 +4095,12 @@ static void block_unmap_cpu(uvm_va_block_t *block, uvm_va_block_region_t region,
if (!block_has_valid_mapping_cpu(block, subregion))
continue;
unmap_mapping_range(&va_space->mapping,
uvm_va_block_region_start(block, subregion),
uvm_va_block_region_size(subregion), 1);
// We can't actually unmap HMM ranges from the CPU here.
// It happens as part of migrate_vma_setup().
if (!uvm_va_block_is_hmm(block))
unmap_mapping_range(&va_space->mapping,
uvm_va_block_region_start(block, subregion),
uvm_va_block_region_size(subregion), 1);
for (pte_bit = 0; pte_bit < UVM_PTE_BITS_CPU_MAX; pte_bit++)
uvm_page_mask_region_clear(&block->cpu.pte_bits[pte_bit], subregion);
@ -5406,7 +5595,7 @@ static void block_gpu_compute_new_pte_state(uvm_va_block_t *block,
uvm_page_index_t page_index;
size_t big_page_index;
DECLARE_BITMAP(big_ptes_not_covered, MAX_BIG_PAGES_PER_UVM_VA_BLOCK);
bool can_make_new_big_ptes, region_full;
bool can_make_new_big_ptes;
memset(new_pte_state, 0, sizeof(*new_pte_state));
new_pte_state->needs_4k = true;
@ -5469,14 +5658,11 @@ static void block_gpu_compute_new_pte_state(uvm_va_block_t *block,
__set_bit(big_page_index, new_pte_state->big_ptes_covered);
region_full = uvm_page_mask_region_full(page_mask_after, big_page_region);
if (region_full && UVM_ID_IS_INVALID(resident_id))
__set_bit(big_page_index, new_pte_state->big_ptes_fully_unmapped);
// When mapping sysmem, we can use big pages only if we are mapping all pages
// in the big page subregion and the CPU pages backing the subregion are
// physically contiguous.
if (can_make_new_big_ptes && region_full &&
// When mapping sysmem, we can use big pages only if we are mapping all
// pages in the big page subregion and the CPU pages backing the
// subregion are physically contiguous.
if (can_make_new_big_ptes &&
uvm_page_mask_region_full(page_mask_after, big_page_region) &&
(!UVM_ID_IS_CPU(resident_id) ||
(contig_region.first <= big_page_region.first && contig_region.outer >= big_page_region.outer))) {
__set_bit(big_page_index, new_pte_state->big_ptes);
@ -5988,6 +6174,42 @@ static NV_STATUS uvm_cpu_insert_page(struct vm_area_struct *vma,
return NV_OK;
}
static uvm_prot_t compute_logical_prot(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
uvm_page_index_t page_index)
{
struct vm_area_struct *vma;
uvm_prot_t logical_prot;
if (uvm_va_block_is_hmm(va_block)) {
NvU64 addr = uvm_va_block_cpu_page_address(va_block, page_index);
logical_prot = uvm_hmm_compute_logical_prot(va_block, va_block_context, addr);
}
else {
uvm_va_range_t *va_range = va_block->va_range;
UVM_ASSERT(va_range->type == UVM_VA_RANGE_TYPE_MANAGED);
// Zombified VA ranges no longer have a vma, so they have no permissions
if (uvm_va_range_is_managed_zombie(va_range)) {
logical_prot = UVM_PROT_NONE;
}
else {
vma = uvm_va_range_vma(va_range);
if (!(vma->vm_flags & VM_READ))
logical_prot = UVM_PROT_NONE;
else if (!(vma->vm_flags & VM_WRITE))
logical_prot = UVM_PROT_READ_ONLY;
else
logical_prot = UVM_PROT_READ_WRITE_ATOMIC;
}
}
return logical_prot;
}
// Creates or upgrades a CPU mapping for the given page, updating the block's
// mapping and pte_bits bitmaps as appropriate. Upon successful return, the page
// will be mapped with at least new_prot permissions.
@ -6008,6 +6230,7 @@ static NV_STATUS uvm_cpu_insert_page(struct vm_area_struct *vma,
// - Ensure that the block hasn't been killed (block->va_range is present)
// - Update the pte/mapping tracking state on success
static NV_STATUS block_map_cpu_page_to(uvm_va_block_t *block,
uvm_va_block_context_t *va_block_context,
uvm_processor_id_t resident_id,
uvm_page_index_t page_index,
uvm_prot_t new_prot)
@ -6041,7 +6264,7 @@ static NV_STATUS block_map_cpu_page_to(uvm_va_block_t *block,
// Check for existing VMA permissions. They could have been modified after
// the initial mmap by mprotect.
if (!uvm_va_block_is_hmm(block) && new_prot > uvm_va_range_logical_prot(va_range))
if (new_prot > compute_logical_prot(block, va_block_context, page_index))
return NV_ERR_INVALID_ACCESS_TYPE;
if (uvm_va_block_is_hmm(block)) {
@ -6155,6 +6378,7 @@ static NV_STATUS block_map_cpu_to(uvm_va_block_t *block,
for_each_va_block_page_in_region_mask(page_index, pages_to_map, region) {
status = block_map_cpu_page_to(block,
block_context,
resident_id,
page_index,
new_prot);
@ -6389,6 +6613,7 @@ NV_STATUS uvm_va_block_map(uvm_va_block_t *va_block,
UVM_ASSERT(new_prot != UVM_PROT_NONE);
UVM_ASSERT(new_prot < UVM_PROT_MAX);
uvm_assert_mutex_locked(&va_block->lock);
UVM_ASSERT(uvm_va_block_check_policy_is_valid(va_block, va_block_context->policy, region));
// Mapping is not supported on the eviction path that doesn't hold the VA
// space lock.
@ -6730,6 +6955,8 @@ NV_STATUS uvm_va_block_map_mask(uvm_va_block_t *va_block,
NV_STATUS tracker_status;
uvm_processor_id_t id;
UVM_ASSERT(uvm_va_block_check_policy_is_valid(va_block, va_block_context->policy, region));
for_each_id_in_mask(id, map_processor_mask) {
status = uvm_va_block_map(va_block,
va_block_context,
@ -7176,28 +7403,8 @@ static NV_STATUS block_evict_pages_from_gpu(uvm_va_block_t *va_block, uvm_gpu_t
uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
uvm_va_block_context_t *block_context = uvm_va_space_block_context(va_space, mm);
if (!uvm_va_block_is_hmm(va_block))
block_context->policy = uvm_va_range_get_policy(va_block->va_range);
// Move all subregions resident on the GPU to the CPU
for_each_va_block_subregion_in_mask(subregion, resident, region) {
// Need to set block_context->policy for HMM.
if (uvm_va_block_is_hmm(va_block)) {
uvm_va_policy_node_t *node;
node = uvm_va_policy_node_find(va_block, uvm_va_block_region_start(va_block, subregion));
if (node) {
uvm_page_index_t outer = uvm_va_block_cpu_page_index(va_block,
node->node.end) + 1;
// If the policy doesn't cover the subregion, truncate the
// subregion.
if (subregion.outer > outer)
subregion.outer = outer;
block_context->policy = &node->policy;
}
else
block_context->policy = &uvm_va_policy_default;
}
status = uvm_va_block_migrate_locked(va_block,
NULL,
block_context,
@ -7630,13 +7837,82 @@ error:
return status;
}
static NV_STATUS block_split_cpu_chunk_to_size(uvm_va_block_t *block,
uvm_page_index_t page_index,
uvm_cpu_chunk_t *chunk,
uvm_chunk_size_t new_size)
{
size_t num_new_chunks = uvm_cpu_chunk_get_size(chunk) / new_size;
uvm_cpu_chunk_t **new_chunks = NULL;
uvm_gpu_t *gpu;
NvU64 gpu_mapping_addr;
uvm_processor_mask_t gpu_split_mask;
uvm_gpu_id_t id;
NV_STATUS status;
size_t i;
UVM_ASSERT(IS_ALIGNED(uvm_cpu_chunk_get_size(chunk), new_size));
uvm_processor_mask_zero(&gpu_split_mask);
for_each_gpu_id(id) {
if (!uvm_va_block_gpu_state_get(block, id))
continue;
// If the parent chunk has not been mapped, there is nothing to split.
gpu_mapping_addr = uvm_cpu_chunk_get_gpu_mapping_addr(block, page_index, chunk, id);
if (gpu_mapping_addr == 0)
continue;
gpu = block_get_gpu(block, id);
status = uvm_pmm_sysmem_mappings_split_gpu_mappings(&gpu->pmm_reverse_sysmem_mappings,
gpu_mapping_addr,
new_size);
if (status != NV_OK)
goto merge;
uvm_processor_mask_set(&gpu_split_mask, id);
}
uvm_cpu_chunk_remove_from_block(block, chunk, page_index);
new_chunks = uvm_kvmalloc(num_new_chunks * sizeof(*new_chunks));
if (new_chunks)
status = uvm_cpu_chunk_split(block, chunk, new_size, UVM_CPU_CHUNK_PAGE_INDEX(chunk, page_index), new_chunks);
else
status = NV_ERR_NO_MEMORY;
if (status != NV_OK) {
uvm_cpu_chunk_insert_in_block(block, chunk, UVM_CPU_CHUNK_PAGE_INDEX(chunk, page_index));
merge:
for_each_gpu_id_in_mask (id, &gpu_split_mask) {
gpu_mapping_addr = uvm_cpu_chunk_get_gpu_mapping_addr(block, page_index, chunk, id);
gpu = block_get_gpu(block, id);
uvm_pmm_sysmem_mappings_merge_gpu_mappings(&gpu->pmm_reverse_sysmem_mappings,
gpu_mapping_addr,
uvm_cpu_chunk_get_size(chunk));
}
} else {
for (i = 0; i < num_new_chunks; i++) {
status = uvm_cpu_chunk_insert_in_block(block,
new_chunks[i],
UVM_CPU_CHUNK_PAGE_INDEX(new_chunks[i],
(page_index +
(i * (new_size / PAGE_SIZE)))));
UVM_ASSERT(status == NV_OK);
}
}
uvm_kvfree(new_chunks);
return status;
}
// Perform any CPU chunk splitting that may be required for this block split.
// Just like block_presplit_gpu_chunks, no chunks are moved to the new block.
static NV_STATUS block_presplit_cpu_chunks(uvm_va_block_t *existing, uvm_va_block_t *new)
{
uvm_page_index_t page_index = uvm_va_block_cpu_page_index(existing, new->start);
uvm_cpu_chunk_t *splitting_chunk;
uvm_chunk_size_t split_sizes = uvm_cpu_chunk_get_allocation_sizes();
uvm_chunk_sizes_mask_t split_sizes = uvm_cpu_chunk_get_allocation_sizes();
uvm_chunk_size_t subchunk_size;
NV_STATUS status = NV_OK;
@ -7660,32 +7936,7 @@ static NV_STATUS block_presplit_cpu_chunks(uvm_va_block_t *existing, uvm_va_bloc
split_sizes &= ~(IS_ALIGNED(new->start, UVM_CHUNK_SIZE_64K) ? UVM_CHUNK_SIZE_64K - 1 : 0);
for_each_chunk_size_rev(subchunk_size, split_sizes) {
uvm_gpu_id_t id;
UVM_ASSERT(IS_ALIGNED(uvm_cpu_chunk_get_size(splitting_chunk), subchunk_size));
for_each_gpu_id(id) {
uvm_gpu_t *gpu;
if (!uvm_va_block_gpu_state_get(existing, id))
continue;
// If the parent chunk has not been mapped, there is nothing to split.
if (uvm_cpu_chunk_get_gpu_mapping_addr(existing, page_index, splitting_chunk, id) == 0)
continue;
gpu = block_get_gpu(existing, id);
status = uvm_pmm_sysmem_mappings_split_gpu_mappings(&gpu->pmm_reverse_sysmem_mappings,
uvm_cpu_chunk_get_gpu_mapping_addr(existing,
page_index,
splitting_chunk,
id),
subchunk_size);
if (status != NV_OK)
return status;
}
status = uvm_cpu_chunk_split(existing, splitting_chunk, subchunk_size);
status = block_split_cpu_chunk_to_size(existing, page_index, splitting_chunk, subchunk_size);
if (status != NV_OK)
return status;
@ -7695,36 +7946,114 @@ static NV_STATUS block_presplit_cpu_chunks(uvm_va_block_t *existing, uvm_va_bloc
return NV_OK;
}
static void block_merge_cpu_chunks(uvm_va_block_t *existing, uvm_va_block_t *new)
static NV_STATUS block_merge_cpu_chunks_to_size(uvm_va_block_t *block,
uvm_chunk_size_t size,
uvm_page_index_t page_index)
{
uvm_page_index_t page_index = uvm_va_block_cpu_page_index(existing, new->start);
uvm_cpu_chunk_t *chunk = uvm_cpu_chunk_get_chunk_for_page(existing, page_index);
uvm_va_space_t *va_space = existing->va_range->va_space;
uvm_cpu_chunk_t *chunk;
size_t num_merge_chunks;
uvm_chunk_size_t chunk_size;
uvm_cpu_chunk_t **merge_chunks;
uvm_gpu_id_t id;
size_t i;
NV_STATUS status;
if (!chunk)
return;
chunk = uvm_cpu_chunk_get_chunk_for_page(block, page_index);
chunk_size = uvm_cpu_chunk_get_size(chunk);
num_merge_chunks = size / chunk_size;
// Merge the CPU chunk. If a merge was not done, nothing else needs to be done.
chunk = uvm_cpu_chunk_merge(existing, chunk);
if (!chunk)
return;
// It's OK if we can't merge here. We know that the CPU chunk split
// operation completed successfully. Therefore, the CPU chunks are in a
// sane state.
merge_chunks = uvm_kvmalloc(num_merge_chunks * sizeof(*merge_chunks));
if (!merge_chunks)
return NV_ERR_NO_MEMORY;
for (i = 0; i < num_merge_chunks; i++) {
merge_chunks[i] = uvm_cpu_chunk_get_chunk_for_page(block, page_index + (i * (chunk_size / PAGE_SIZE)));
UVM_ASSERT(merge_chunks[i]);
UVM_ASSERT(uvm_cpu_chunk_get_size(merge_chunks[i]) == chunk_size);
uvm_cpu_chunk_remove_from_block(block, merge_chunks[i], page_index + (i * (chunk_size / PAGE_SIZE)));
}
// Merge the CPU chunk. If a merge was not done, re-insert the original chunks.
status = uvm_cpu_chunk_merge(block, merge_chunks, num_merge_chunks, size, &chunk);
if (status == NV_WARN_NOTHING_TO_DO) {
for (i = 0; i < num_merge_chunks; i++)
uvm_cpu_chunk_insert_in_block(block, merge_chunks[i], page_index + (i * (chunk_size / PAGE_SIZE)));
goto done;
}
UVM_ASSERT(status == NV_OK);
status = uvm_cpu_chunk_insert_in_block(block, chunk, page_index);
UVM_ASSERT(status == NV_OK);
for_each_gpu_id(id) {
NvU64 gpu_mapping_addr;
uvm_gpu_t *gpu;
if (!uvm_va_block_gpu_state_get(existing, id))
if (!uvm_va_block_gpu_state_get(block, id))
continue;
gpu_mapping_addr = uvm_cpu_chunk_get_gpu_mapping_addr(existing, page_index, chunk, id);
gpu_mapping_addr = uvm_cpu_chunk_get_gpu_mapping_addr(block, page_index, chunk, id);
if (gpu_mapping_addr == 0)
continue;
gpu = uvm_va_space_get_gpu(va_space, id);
uvm_pmm_sysmem_mappings_merge_gpu_mappings(&gpu->pmm_reverse_sysmem_mappings,
gpu_mapping_addr,
uvm_cpu_chunk_get_size(chunk));
gpu = block_get_gpu(block, id);
uvm_pmm_sysmem_mappings_merge_gpu_mappings(&gpu->pmm_reverse_sysmem_mappings, gpu_mapping_addr, size);
}
done:
uvm_kvfree(merge_chunks);
return status;
}
static void block_merge_cpu_chunks(uvm_va_block_t *existing, uvm_va_block_t *new)
{
uvm_page_index_t page_index = uvm_va_block_cpu_page_index(existing, new->start);
uvm_cpu_chunk_t *chunk = uvm_cpu_chunk_get_chunk_for_page(existing, page_index);
uvm_chunk_sizes_mask_t merge_sizes = uvm_cpu_chunk_get_allocation_sizes();
uvm_chunk_size_t largest_size;
uvm_chunk_size_t chunk_size;
uvm_chunk_size_t merge_size;
size_t block_size = uvm_va_block_size(existing);
NV_STATUS status;
if (!chunk)
return;
chunk_size = uvm_cpu_chunk_get_size(chunk);
// Remove all CPU chunk sizes above the size of the existing VA block.
// Since block sizes are not always powers of 2, use the largest power of 2
// less than or equal to the block size since we can't merge to a size
// larger than the block's size.
largest_size = rounddown_pow_of_two(block_size);
merge_sizes &= (largest_size | (largest_size - 1));
// Remove all CPU chunk sizes smaller than the size of the chunk being merged up.
merge_sizes &= ~(chunk_size | (chunk_size - 1));
for_each_chunk_size(merge_size, merge_sizes) {
uvm_va_block_region_t chunk_region;
// The block has to fully contain the VA range after the merge.
if (!uvm_va_block_contains_address(existing, UVM_ALIGN_DOWN(new->start, merge_size)) ||
!uvm_va_block_contains_address(existing, UVM_ALIGN_DOWN(new->start, merge_size) + merge_size - 1))
break;
chunk_region = uvm_va_block_chunk_region(existing, merge_size, page_index);
// If not all pages in the region covered by the chunk are allocated,
// we can't merge.
if (!uvm_page_mask_region_full(&existing->cpu.allocated, chunk_region))
break;
status = block_merge_cpu_chunks_to_size(existing, merge_size, chunk_region.first);
if (status != NV_OK)
break;
}
}
@ -7737,7 +8066,7 @@ static NV_STATUS block_split_preallocate_no_retry(uvm_va_block_t *existing, uvm_
uvm_gpu_t *gpu;
uvm_gpu_id_t id;
uvm_page_index_t split_page_index;
uvm_va_range_t *existing_va_range = existing->va_range;
uvm_va_block_test_t *block_test;
status = block_presplit_cpu_chunks(existing, new);
if (status != NV_OK)
@ -7759,8 +8088,13 @@ static NV_STATUS block_split_preallocate_no_retry(uvm_va_block_t *existing, uvm_
}
}
if (existing_va_range && existing_va_range->inject_split_error) {
existing_va_range->inject_split_error = false;
block_test = uvm_va_block_get_test(existing);
if (block_test && block_test->inject_split_error) {
block_test->inject_split_error = false;
if (!uvm_va_block_is_hmm(existing)) {
UVM_ASSERT(existing->va_range->inject_split_error);
existing->va_range->inject_split_error = false;
}
status = NV_ERR_NO_MEMORY;
goto error;
}
@ -8330,8 +8664,10 @@ NV_STATUS uvm_va_block_split_locked(uvm_va_block_t *existing_va_block,
block_set_processor_masks(existing_va_block);
block_set_processor_masks(new_block);
if (uvm_va_block_is_hmm(existing_va_block))
if (uvm_va_block_is_hmm(existing_va_block)) {
uvm_hmm_va_block_split_tree(existing_va_block, new_block);
uvm_va_policy_node_split_move(existing_va_block, new_block);
}
out:
// Run checks on existing_va_block even on failure, since an error must
@ -8363,7 +8699,7 @@ static bool block_region_might_read_duplicate(uvm_va_block_t *va_block,
if (!uvm_va_space_can_read_duplicate(va_space, NULL))
return false;
// TODO: Bug 2046423: need to implement read duplication support in Linux.
// TODO: Bug 3660922: need to implement HMM read duplication support.
if (uvm_va_block_is_hmm(va_block) ||
uvm_va_range_get_policy(va_range)->read_duplication == UVM_READ_DUPLICATION_DISABLED)
return false;
@ -8382,22 +8718,20 @@ static bool block_region_might_read_duplicate(uvm_va_block_t *va_block,
// could be changed in the future to optimize multiple faults/counters on
// contiguous pages.
static uvm_prot_t compute_new_permission(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
uvm_page_index_t page_index,
uvm_processor_id_t fault_processor_id,
uvm_processor_id_t new_residency,
uvm_fault_access_type_t access_type)
{
uvm_va_range_t *va_range;
uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
uvm_prot_t logical_prot, new_prot;
// TODO: Bug 1766432: Refactor into policies. Current policy is
// query_promote: upgrade access privileges to avoid future faults IF
// they don't trigger further revocations.
va_range = va_block->va_range;
new_prot = uvm_fault_access_type_to_prot(access_type);
logical_prot = uvm_va_range_logical_prot(va_range);
logical_prot = compute_logical_prot(va_block, va_block_context, page_index);
UVM_ASSERT(logical_prot >= new_prot);
@ -8542,7 +8876,10 @@ NV_STATUS uvm_va_block_add_mappings_after_migration(uvm_va_block_t *va_block,
uvm_va_policy_t *policy = va_block_context->policy;
uvm_processor_id_t preferred_location;
// Read duplication takes precedence over SetAccesedBy.
uvm_assert_mutex_locked(&va_block->lock);
UVM_ASSERT(uvm_va_block_check_policy_is_valid(va_block, policy, region));
// Read duplication takes precedence over SetAccessedBy.
//
// Exclude ranges with read duplication set...
if (uvm_va_policy_is_read_duplicate(policy, va_space)) {
@ -8764,6 +9101,8 @@ NV_STATUS uvm_va_block_add_mappings(uvm_va_block_t *va_block,
uvm_range_group_range_iter_t iter;
uvm_prot_t prot_to_map;
UVM_ASSERT(uvm_va_block_check_policy_is_valid(va_block, va_block_context->policy, region));
if (UVM_ID_IS_CPU(processor_id) && !uvm_va_block_is_hmm(va_block)) {
if (!uvm_va_range_vma_check(va_range, va_block_context->mm))
return NV_OK;
@ -8778,7 +9117,7 @@ NV_STATUS uvm_va_block_add_mappings(uvm_va_block_t *va_block,
va_block_context->mask_by_prot[prot_to_map - 1].count = 0;
for_each_va_block_page_in_region_mask(page_index, page_mask, region) {
// Read duplication takes precedence over SetAccesedBy. Exclude pages
// Read duplication takes precedence over SetAccessedBy. Exclude pages
// read-duplicated by performance heuristics
if (uvm_page_mask_test(&va_block->read_duplicated_pages, page_index))
continue;
@ -8882,6 +9221,7 @@ static bool map_remote_on_atomic_fault(uvm_va_space_t *va_space,
// could be changed in the future to optimize multiple faults or access
// counter notifications on contiguous pages.
static uvm_processor_id_t block_select_residency(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
uvm_page_index_t page_index,
uvm_processor_id_t processor_id,
NvU32 access_type_mask,
@ -8895,7 +9235,9 @@ static uvm_processor_id_t block_select_residency(uvm_va_block_t *va_block,
bool may_read_duplicate;
uvm_processor_id_t preferred_location;
if (is_uvm_fault_force_sysmem_set()) {
// TODO: Bug 3660968: Remove uvm_hmm_force_sysmem_set() check as soon as
// HMM migration is implemented VMAs other than anonymous memory.
if (is_uvm_fault_force_sysmem_set() || uvm_hmm_must_use_sysmem(va_block, va_block_context)) {
*read_duplicate = false;
return UVM_ID_CPU;
}
@ -8990,6 +9332,7 @@ static uvm_processor_id_t block_select_residency(uvm_va_block_t *va_block,
}
uvm_processor_id_t uvm_va_block_select_residency(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
uvm_page_index_t page_index,
uvm_processor_id_t processor_id,
NvU32 access_type_mask,
@ -8998,14 +9341,24 @@ uvm_processor_id_t uvm_va_block_select_residency(uvm_va_block_t *va_block,
uvm_service_operation_t operation,
bool *read_duplicate)
{
uvm_processor_id_t id = block_select_residency(va_block,
page_index,
processor_id,
access_type_mask,
policy,
thrashing_hint,
operation,
read_duplicate);
uvm_processor_id_t id;
UVM_ASSERT(uvm_va_block_check_policy_is_valid(va_block,
va_block_context->policy,
uvm_va_block_region_for_page(page_index)));
UVM_ASSERT(uvm_hmm_va_block_context_vma_is_valid(va_block,
va_block_context,
uvm_va_block_region_for_page(page_index)));
id = block_select_residency(va_block,
va_block_context,
page_index,
processor_id,
access_type_mask,
policy,
thrashing_hint,
operation,
read_duplicate);
// If the intended residency doesn't have memory, fall back to the CPU.
if (!block_processor_has_memory(va_block, id)) {
@ -9035,32 +9388,13 @@ static bool check_access_counters_dont_revoke(uvm_va_block_t *block,
return true;
}
NV_STATUS uvm_va_block_service_locked(uvm_processor_id_t processor_id,
uvm_va_block_t *va_block,
uvm_va_block_retry_t *block_retry,
uvm_service_block_context_t *service_context)
// Update service_context->prefetch_hint, service_context->per_processor_masks,
// and service_context->region.
static void uvm_va_block_get_prefetch_hint(uvm_va_block_t *va_block,
uvm_service_block_context_t *service_context)
{
NV_STATUS status = NV_OK;
uvm_processor_id_t new_residency;
uvm_prot_t new_prot;
uvm_va_range_t *va_range = va_block->va_range;
uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
uvm_perf_prefetch_hint_t prefetch_hint = UVM_PERF_PREFETCH_HINT_NONE();
uvm_processor_mask_t processors_involved_in_cpu_migration;
uvm_assert_mutex_locked(&va_block->lock);
UVM_ASSERT(va_range->type == UVM_VA_RANGE_TYPE_MANAGED);
// GPU fault servicing must be done under the VA space read lock. GPU fault
// servicing is required for RM to make forward progress, and we allow other
// threads to call into RM while holding the VA space lock in read mode. If
// we took the VA space lock in write mode on the GPU fault service path,
// we could deadlock because the thread in RM which holds the VA space lock
// for read wouldn't be able to complete until fault servicing completes.
if (service_context->operation != UVM_SERVICE_OPERATION_REPLAYABLE_FAULTS || UVM_ID_IS_CPU(processor_id))
uvm_assert_rwsem_locked(&va_space->lock);
else
uvm_assert_rwsem_locked_read(&va_space->lock);
// Performance heuristics policy: we only consider prefetching when there
// are migrations to a single processor, only.
@ -9074,20 +9408,20 @@ NV_STATUS uvm_va_block_service_locked(uvm_processor_id_t processor_id,
// Update prefetch tracking structure with the pages that will migrate
// due to faults
uvm_perf_prefetch_prenotify_fault_migrations(va_block,
&service_context->block_context,
new_residency,
new_residency_mask,
service_context->region);
prefetch_hint = uvm_perf_prefetch_get_hint(va_block, new_residency_mask);
uvm_perf_prefetch_get_hint(va_block,
&service_context->block_context,
new_residency,
new_residency_mask,
service_context->region,
&service_context->prefetch_bitmap_tree,
&service_context->prefetch_hint);
// Obtain the prefetch hint and give a fake fault access type to the
// prefetched pages
if (UVM_ID_IS_VALID(prefetch_hint.residency)) {
UVM_ASSERT(prefetch_hint.prefetch_pages_mask != NULL);
if (UVM_ID_IS_VALID(service_context->prefetch_hint.residency)) {
const uvm_page_mask_t *prefetch_pages_mask = &service_context->prefetch_hint.prefetch_pages_mask;
for_each_va_block_page_in_mask(page_index, prefetch_hint.prefetch_pages_mask, va_block) {
for_each_va_block_page_in_mask(page_index, prefetch_pages_mask, va_block) {
UVM_ASSERT(!uvm_page_mask_test(new_residency_mask, page_index));
service_context->access_type[page_index] = UVM_FAULT_ACCESS_TYPE_PREFETCH;
@ -9102,9 +9436,43 @@ NV_STATUS uvm_va_block_service_locked(uvm_processor_id_t processor_id,
}
}
service_context->region = uvm_va_block_region_from_block(va_block);
uvm_page_mask_or(new_residency_mask, new_residency_mask, prefetch_pages_mask);
service_context->region = uvm_va_block_region_from_mask(va_block, new_residency_mask);
}
}
else {
service_context->prefetch_hint.residency = UVM_ID_INVALID;
}
}
NV_STATUS uvm_va_block_service_locked(uvm_processor_id_t processor_id,
uvm_va_block_t *va_block,
uvm_va_block_retry_t *block_retry,
uvm_service_block_context_t *service_context)
{
NV_STATUS status = NV_OK;
uvm_processor_id_t new_residency;
uvm_prot_t new_prot;
uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
uvm_processor_mask_t processors_involved_in_cpu_migration;
uvm_assert_mutex_locked(&va_block->lock);
UVM_ASSERT(uvm_va_block_check_policy_is_valid(va_block,
service_context->block_context.policy,
service_context->region));
// GPU fault servicing must be done under the VA space read lock. GPU fault
// servicing is required for RM to make forward progress, and we allow other
// threads to call into RM while holding the VA space lock in read mode. If
// we took the VA space lock in write mode on the GPU fault service path,
// we could deadlock because the thread in RM which holds the VA space lock
// for read wouldn't be able to complete until fault servicing completes.
if (service_context->operation != UVM_SERVICE_OPERATION_REPLAYABLE_FAULTS || UVM_ID_IS_CPU(processor_id))
uvm_assert_rwsem_locked(&va_space->lock);
else
uvm_assert_rwsem_locked_read(&va_space->lock);
uvm_va_block_get_prefetch_hint(va_block, service_context);
for (new_prot = UVM_PROT_READ_ONLY; new_prot < UVM_PROT_MAX; ++new_prot)
service_context->mappings_by_prot[new_prot-1].count = 0;
@ -9137,11 +9505,10 @@ NV_STATUS uvm_va_block_service_locked(uvm_processor_id_t processor_id,
uvm_page_mask_zero(did_migrate_mask);
uvm_processor_mask_zero(all_involved_processors);
if (UVM_ID_IS_VALID(prefetch_hint.residency)) {
UVM_ASSERT(uvm_id_equal(prefetch_hint.residency, new_residency));
UVM_ASSERT(prefetch_hint.prefetch_pages_mask != NULL);
if (UVM_ID_IS_VALID(service_context->prefetch_hint.residency)) {
UVM_ASSERT(uvm_id_equal(service_context->prefetch_hint.residency, new_residency));
uvm_page_mask_or(new_residency_mask, new_residency_mask, prefetch_hint.prefetch_pages_mask);
uvm_page_mask_or(new_residency_mask, new_residency_mask, &service_context->prefetch_hint.prefetch_pages_mask);
}
if (service_context->read_duplicate_count == 0 ||
@ -9156,7 +9523,7 @@ NV_STATUS uvm_va_block_service_locked(uvm_processor_id_t processor_id,
service_context->read_duplicate_count == 0?
new_residency_mask:
&service_context->block_context.caller_page_mask,
prefetch_hint.prefetch_pages_mask,
&service_context->prefetch_hint.prefetch_pages_mask,
cause);
if (status != NV_OK)
return status;
@ -9172,7 +9539,7 @@ NV_STATUS uvm_va_block_service_locked(uvm_processor_id_t processor_id,
new_residency,
service_context->region,
&service_context->block_context.caller_page_mask,
prefetch_hint.prefetch_pages_mask,
&service_context->prefetch_hint.prefetch_pages_mask,
cause);
if (status != NV_OK)
return status;
@ -9193,6 +9560,7 @@ NV_STATUS uvm_va_block_service_locked(uvm_processor_id_t processor_id,
// the new residency
for_each_va_block_page_in_region_mask(page_index, new_residency_mask, service_context->region) {
new_prot = compute_new_permission(va_block,
&service_context->block_context,
page_index,
processor_id,
new_residency,
@ -9465,6 +9833,65 @@ NV_STATUS uvm_va_block_service_locked(uvm_processor_id_t processor_id,
return NV_OK;
}
NV_STATUS uvm_va_block_check_logical_permissions(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
uvm_processor_id_t processor_id,
uvm_page_index_t page_index,
uvm_fault_type_t access_type,
bool allow_migration)
{
uvm_va_range_t *va_range = va_block->va_range;
uvm_prot_t access_prot = uvm_fault_access_type_to_prot(access_type);
UVM_ASSERT(uvm_va_block_check_policy_is_valid(va_block,
va_block_context->policy,
uvm_va_block_region_for_page(page_index)));
UVM_ASSERT(uvm_hmm_va_block_context_vma_is_valid(va_block,
va_block_context,
uvm_va_block_region_for_page(page_index)));
// CPU permissions are checked later by block_map_cpu_page.
//
// TODO: Bug 1766124: permissions are checked by block_map_cpu_page because
// it can also be called from change_pte. Make change_pte call this
// function and only check CPU permissions here.
if (UVM_ID_IS_GPU(processor_id)) {
if (va_range && uvm_va_range_is_managed_zombie(va_range))
return NV_ERR_INVALID_ADDRESS;
// GPU faults only check vma permissions if a mm is registered with the
// VA space (ie. uvm_va_space_mm_retain_lock(va_space) != NULL) or if
// uvm_enable_builtin_tests is set, because the Linux kernel can change
// vm_flags at any moment (for example on mprotect) and here we are not
// guaranteed to have vma->vm_mm->mmap_lock. During tests we ensure that
// this scenario does not happen.
if ((va_block_context->mm || uvm_enable_builtin_tests) &&
(access_prot > compute_logical_prot(va_block, va_block_context, page_index)))
return NV_ERR_INVALID_ACCESS_TYPE;
}
// Non-migratable range:
// - CPU accesses are always fatal, regardless of the VA range residency
// - GPU accesses are fatal if the GPU can't map the preferred location
if (!allow_migration) {
UVM_ASSERT(!uvm_va_block_is_hmm(va_block));
if (UVM_ID_IS_CPU(processor_id)) {
return NV_ERR_INVALID_OPERATION;
}
else {
uvm_va_space_t *va_space = va_range->va_space;
return uvm_processor_mask_test(
&va_space->accessible_from[uvm_id_value(uvm_va_range_get_policy(va_range)->preferred_location)],
processor_id)?
NV_OK : NV_ERR_INVALID_ACCESS_TYPE;
}
}
return NV_OK;
}
// Check if we are faulting on a page with valid permissions to check if we can
// skip fault handling. See uvm_va_block_t::cpu::fault_authorized for more
// details
@ -9528,19 +9955,6 @@ static NV_STATUS block_cpu_fault_locked(uvm_va_block_t *va_block,
UVM_ASSERT(fault_addr >= va_block->start);
UVM_ASSERT(fault_addr <= va_block->end);
// There are up to three mm_structs to worry about, and they might all be
// different:
//
// 1) vma->vm_mm
// 2) current->mm
// 3) va_space->va_space_mm.mm (though note that if this is valid, then it
// must match vma->vm_mm).
//
// The kernel guarantees that vma->vm_mm has a reference taken with
// mmap_lock held on the CPU fault path, so tell the fault handler to use
// that one. current->mm might differ if we're on the access_process_vm
// (ptrace) path or if another driver is calling get_user_pages.
service_context->block_context.mm = uvm_va_range_vma(va_range)->vm_mm;
uvm_assert_mmap_lock_locked(service_context->block_context.mm);
service_context->block_context.policy = uvm_va_policy_get(va_block, fault_addr);
@ -9556,8 +9970,11 @@ static NV_STATUS block_cpu_fault_locked(uvm_va_block_t *va_block,
}
// Check logical permissions
status = uvm_va_range_check_logical_permissions(va_block->va_range,
page_index = uvm_va_block_cpu_page_index(va_block, fault_addr);
status = uvm_va_block_check_logical_permissions(va_block,
&service_context->block_context,
UVM_ID_CPU,
page_index,
fault_access_type,
uvm_range_group_address_migratable(va_space, fault_addr));
if (status != NV_OK)
@ -9565,7 +9982,6 @@ static NV_STATUS block_cpu_fault_locked(uvm_va_block_t *va_block,
uvm_processor_mask_zero(&service_context->cpu_fault.gpus_to_check_for_ecc);
page_index = uvm_va_block_cpu_page_index(va_block, fault_addr);
if (skip_cpu_fault_with_valid_permissions(va_block, page_index, fault_access_type))
return NV_OK;
@ -9588,6 +10004,7 @@ static NV_STATUS block_cpu_fault_locked(uvm_va_block_t *va_block,
// Compute new residency and update the masks
new_residency = uvm_va_block_select_residency(va_block,
&service_context->block_context,
page_index,
UVM_ID_CPU,
uvm_fault_access_type_mask_bit(fault_access_type),
@ -9689,7 +10106,6 @@ NV_STATUS uvm_va_block_find(uvm_va_space_t *va_space, NvU64 addr, uvm_va_block_t
}
NV_STATUS uvm_va_block_find_create(uvm_va_space_t *va_space,
struct mm_struct *mm,
NvU64 addr,
uvm_va_block_context_t *va_block_context,
uvm_va_block_t **out_block)
@ -9697,9 +10113,12 @@ NV_STATUS uvm_va_block_find_create(uvm_va_space_t *va_space,
uvm_va_range_t *va_range;
size_t index;
if (uvm_enable_builtin_tests && atomic_dec_if_positive(&va_space->test.va_block_allocation_fail_nth) == 0)
return NV_ERR_NO_MEMORY;
va_range = uvm_va_range_find(va_space, addr);
if (!va_range) {
if (!mm)
if (!va_block_context || !va_block_context->mm)
return NV_ERR_INVALID_ADDRESS;
return uvm_hmm_va_block_find_create(va_space, addr, va_block_context, out_block);
}
@ -9738,6 +10157,8 @@ NV_STATUS uvm_va_block_write_from_cpu(uvm_va_block_t *va_block,
if (UVM_ID_IS_INVALID(proc))
proc = UVM_ID_CPU;
block_context->policy = uvm_va_policy_get(va_block, dst);
// Use make_resident() in all cases to break read-duplication, but
// block_retry can be NULL as if the page is not resident yet we will make
// it resident on the CPU.
@ -10222,12 +10643,19 @@ NV_STATUS uvm_test_va_block_inject_error(UVM_TEST_VA_BLOCK_INJECT_ERROR_PARAMS *
struct mm_struct *mm;
uvm_va_block_t *va_block;
uvm_va_block_test_t *va_block_test;
uvm_va_block_context_t *block_context = NULL;
NV_STATUS status = NV_OK;
mm = uvm_va_space_mm_retain_lock(va_space);
mm = uvm_va_space_mm_or_current_retain_lock(va_space);
uvm_va_space_down_read(va_space);
status = uvm_va_block_find_create(va_space, mm, params->lookup_address, NULL, &va_block);
block_context = uvm_va_block_context_alloc(mm);
if (!block_context) {
status = NV_ERR_NO_MEMORY;
goto out;
}
status = uvm_va_block_find_create(va_space, params->lookup_address, block_context, &va_block);
if (status != NV_OK)
goto out;
@ -10248,8 +10676,8 @@ NV_STATUS uvm_test_va_block_inject_error(UVM_TEST_VA_BLOCK_INJECT_ERROR_PARAMS *
if (params->eviction_error)
va_block_test->inject_eviction_error = params->eviction_error;
if (params->cpu_pages_allocation_error)
va_block_test->inject_cpu_pages_allocation_error = params->cpu_pages_allocation_error;
if (params->cpu_pages_allocation_error_count)
va_block_test->inject_cpu_pages_allocation_error_count = params->cpu_pages_allocation_error_count;
if (params->populate_error)
va_block_test->inject_populate_error = params->populate_error;
@ -10258,7 +10686,8 @@ NV_STATUS uvm_test_va_block_inject_error(UVM_TEST_VA_BLOCK_INJECT_ERROR_PARAMS *
out:
uvm_va_space_up_read(va_space);
uvm_va_space_mm_release_unlock(va_space, mm);
uvm_va_space_mm_or_current_release_unlock(va_space, mm);
uvm_va_block_context_free(block_context);
return status;
}
@ -10329,7 +10758,7 @@ NV_STATUS uvm_test_change_pte_mapping(UVM_TEST_CHANGE_PTE_MAPPING_PARAMS *params
goto out;
}
status = uvm_va_block_find_create(va_space, mm, params->va, block_context, &block);
status = uvm_va_block_find_create(va_space, params->va, block_context, &block);
if (status != NV_OK)
goto out;

View File

@ -249,7 +249,7 @@ struct uvm_va_block_struct
// Lock protecting the block. See the comment at the top of uvm.c.
uvm_mutex_t lock;
// Parent VA range. UVM managed blocks have this set. HMM blocks will have
// Parent VA range. Managed blocks have this set. HMM blocks will have
// va_range set to NULL and hmm.va_space set instead. Dead blocks that are
// waiting for the last ref count to be removed have va_range and
// hmm.va_space set to NULL (could be either type of block).
@ -437,13 +437,22 @@ struct uvm_va_block_struct
uvm_perf_module_data_desc_t perf_modules_data[UVM_PERF_MODULE_TYPE_COUNT];
// Prefetch infomation that is updated while holding the va_block lock but
// records state while the lock is not held.
struct
{
uvm_processor_id_t last_migration_proc_id;
NvU16 fault_migrations_to_last_proc;
} prefetch_info;
#if UVM_IS_CONFIG_HMM()
struct
{
// The MMU notifier is registered per va_block.
struct mmu_interval_notifier notifier;
// Parent VA space pointer. It is NULL for UVM managed blocks or if
// Parent VA space pointer. It is NULL for managed blocks or if
// the HMM block is dead. This field can be read while holding the
// block lock and is only modified while holding the va_space write
// lock and va_block lock (same as the va_range pointer).
@ -488,21 +497,27 @@ struct uvm_va_block_wrapper_struct
// uvm_cpu_chunk_allocation_sizes module parameter.
NvU32 cpu_chunk_allocation_size_mask;
// Force the next eviction attempt on this block to fail. Used for
// testing only.
bool inject_eviction_error;
// Subsequent operations that need to allocate CPU pages will fail. As
// opposed to other error injection settings, this one is persistent.
// opposed to other error injection settings, this one fails N times
// and then succeeds instead of failing on the Nth try. A value of ~0u
// means fail indefinitely.
// This is because this error is supposed to be fatal and tests verify
// the state of the VA blocks after the failure. However, some tests
// use kernels to trigger migrations and a fault replay could trigger
// a successful migration if this error flag is cleared.
bool inject_cpu_pages_allocation_error;
NvU32 inject_cpu_pages_allocation_error_count;
// Force the next eviction attempt on this block to fail. Used for
// testing only.
bool inject_eviction_error;
// Force the next successful chunk allocation to then fail. Used for testing
// only to simulate driver metadata allocation failure.
bool inject_populate_error;
// Force the next split on this block to fail.
// Set by error injection ioctl for testing purposes only.
bool inject_split_error;
} test;
};
@ -639,8 +654,18 @@ static void uvm_va_block_context_init(uvm_va_block_context_t *va_block_context,
memset(va_block_context, 0xff, sizeof(*va_block_context));
va_block_context->mm = mm;
#if UVM_IS_CONFIG_HMM()
va_block_context->hmm.vma = NULL;
#endif
}
// Check that a single policy covers the given region for the given va_block.
// This always returns true and is intended to only be used with UVM_ASSERT().
// Locking: the va_block lock must be held.
bool uvm_va_block_check_policy_is_valid(uvm_va_block_t *va_block,
uvm_va_policy_t *policy,
uvm_va_block_region_t region);
// TODO: Bug 1766480: Using only page masks instead of a combination of regions
// and page masks could simplify the below APIs and their implementations
// at the cost of having to scan the whole mask for small regions.
@ -651,8 +676,10 @@ static void uvm_va_block_context_init(uvm_va_block_context_t *va_block_context,
// pages in the region which are present in the mask.
//
// prefetch_page_mask may be passed as a subset of page_mask when cause is
// UVM_MAKE_RESIDENT_CAUSE_FAULT to indicate pages that have been pulled due
// to automatic page prefetching heuristics. For pages in this mask,
// UVM_MAKE_RESIDENT_CAUSE_REPLAYABLE_FAULT,
// UVM_MAKE_RESIDENT_CAUSE_NON_REPLAYABLE_FAULT, or
// UVM_MAKE_RESIDENT_CAUSE_ACCESS_COUNTER to indicate pages that have been
// pulled due to automatic page prefetching heuristics. For pages in this mask,
// UVM_MAKE_RESIDENT_CAUSE_PREFETCH will be reported in migration events,
// instead.
//
@ -674,20 +701,24 @@ static void uvm_va_block_context_init(uvm_va_block_context_t *va_block_context,
// block's lock has been unlocked and relocked as part of the call and that the
// whole sequence of operations performed under the block's lock needs to be
// attempted again. To facilitate that, the caller needs to provide the same
// va_block_retry struct for each attempt that has been initialized before the first
// attempt and needs to be deinitialized after the last one. Most callers can
// just use UVM_VA_BLOCK_LOCK_RETRY() that takes care of that for the caller.
// va_block_retry struct for each attempt that has been initialized before the
// first attempt and needs to be deinitialized after the last one. Most callers
// can just use UVM_VA_BLOCK_LOCK_RETRY() that takes care of that for the
// caller.
//
// If dest_id is the CPU then va_block_retry can be NULL and allocation-retry of
// user memory is guaranteed not to happen. Allocation-retry of page tables can
// still occur though.
//
// va_block_context must be non-NULL. This function will set a bit in
// va_block_context must not be NULL. This function will set a bit in
// va_block_context->make_resident.pages_changed_residency for each page that
// changed residency (due to a migration or first population) as a result of the
// operation. This function only sets bits in that mask. It is the caller's
// responsiblity to zero the mask or not first.
//
// va_block_context->policy must also be set by the caller for the given region.
// See the comments for uvm_va_block_check_policy_is_valid().
//
// Notably any status other than NV_OK indicates that the block's lock might
// have been unlocked and relocked.
//
@ -710,6 +741,8 @@ NV_STATUS uvm_va_block_make_resident(uvm_va_block_t *va_block,
// where they are unmapped
// - All remote mappings (due to either SetAccessedBy or performance heuristics)
// are broken
// - Only managed va_blocks are supported.
// TODO: Bug 3660922: need to implement HMM read duplication support.
// - LOCKING: If va_block_context->mm != NULL, va_block_context->mm->mmap_lock
// must be held in at least read mode.
NV_STATUS uvm_va_block_make_resident_read_duplicate(uvm_va_block_t *va_block,
@ -721,6 +754,34 @@ NV_STATUS uvm_va_block_make_resident_read_duplicate(uvm_va_block_t *va_block,
const uvm_page_mask_t *prefetch_page_mask,
uvm_make_resident_cause_t cause);
// Similar to uvm_va_block_make_resident() (read documentation there). The
// difference is that source pages are only copied to the destination and the
// residency is not updated until uvm_va_block_make_resident_post() is called.
// Otherwise, the combination of uvm_va_block_make_resident_pre() and
// uvm_va_block_make_resident_post() should be the same as just calling
// uvm_va_block_make_resident().
// This split is needed when using migrate_vma_setup() and migrate_vma_pages()
// so that when migrate_vma_pages() indicates a page is not migrating, the
// va_block state is not updated.
// LOCKING: The caller must hold the va_block lock.
NV_STATUS uvm_va_block_make_resident_pre(uvm_va_block_t *va_block,
uvm_va_block_retry_t *va_block_retry,
uvm_va_block_context_t *va_block_context,
uvm_processor_id_t dest_id,
uvm_va_block_region_t region,
const uvm_page_mask_t *page_mask,
const uvm_page_mask_t *prefetch_page_mask,
uvm_make_resident_cause_t cause);
// The page_mask must be the same or a subset of the page_mask passed to
// uvm_va_block_make_resident_pre(). This step updates the residency and breaks
// read duplication.
// LOCKING: The caller must hold the va_block lock.
void uvm_va_block_make_resident_post(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
uvm_va_block_region_t region,
const uvm_page_mask_t *page_mask);
// Creates or upgrades a mapping from the input processor to the given virtual
// address region. Pages which already have new_prot permissions or higher are
// skipped, so this call ensures that the range is mapped with at least new_prot
@ -749,7 +810,8 @@ NV_STATUS uvm_va_block_make_resident_read_duplicate(uvm_va_block_t *va_block,
// pages because the earlier operation can cause a PTE split or merge which is
// assumed by the later operation.
//
// va_block_context must not be NULL.
// va_block_context must not be NULL and va_block_context->policy must be valid.
// See the comments for uvm_va_block_check_policy_is_valid().
//
// If allocation-retry was required as part of the operation and was successful,
// NV_ERR_MORE_PROCESSING_REQUIRED is returned. In this case, the entries in the
@ -805,7 +867,7 @@ NV_STATUS uvm_va_block_map_mask(uvm_va_block_t *va_block,
// pages because the earlier operation can cause a PTE split or merge which is
// assumed by the later operation.
//
// va_block_context must not be NULL.
// va_block_context must not be NULL. The va_block_context->policy is unused.
//
// If allocation-retry was required as part of the operation and was successful,
// NV_ERR_MORE_PROCESSING_REQUIRED is returned. In this case, the entries in the
@ -837,12 +899,20 @@ NV_STATUS uvm_va_block_unmap_mask(uvm_va_block_t *va_block,
// up-to-date data.
// - Unmap the preferred location's processor from any pages in this region
// which are not resident on the preferred location.
//
// va_block_context must not be NULL and va_block_context->policy must be valid.
// See the comments for uvm_va_block_check_policy_is_valid().
//
// LOCKING: The caller must hold the VA block lock.
NV_STATUS uvm_va_block_set_preferred_location_locked(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context);
// Maps the given processor to all resident pages in this block, as allowed by
// location and policy. Waits for the operation to complete before returning.
// This function should only be called with managed va_blocks.
//
// va_block_context must not be NULL and va_block_context->policy must be valid.
// See the comments for uvm_va_block_check_policy_is_valid().
//
// LOCKING: This takes and releases the VA block lock. If va_block_context->mm
// != NULL, va_block_context->mm->mmap_lock must be held in at least
@ -852,8 +922,10 @@ NV_STATUS uvm_va_block_set_accessed_by(uvm_va_block_t *va_block,
uvm_processor_id_t processor_id);
// Breaks SetAccessedBy and remote mappings
// This function should only be called with managed va_blocks.
//
// va_block_context must NOT be NULL
// va_block_context must not be NULL and va_block_context->policy must be valid.
// See the comments for uvm_va_block_check_policy_is_valid().
//
// LOCKING: This takes and releases the VA block lock. If va_block_context->mm
// != NULL, va_block_context->mm->mmap_lock must be held in at least
@ -862,8 +934,10 @@ NV_STATUS uvm_va_block_set_read_duplication(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context);
// Restores SetAccessedBy mappings
// This function should only be called with managed va_blocks.
//
// va_block_context must NOT be NULL
// va_block_context must not be NULL and va_block_context->policy must be valid.
// See the comments for uvm_va_block_check_policy_is_valid().
//
// LOCKING: This takes and releases the VA block lock. If va_block_context->mm
// != NULL, va_block_context->mm->mmap_lock must be held in at least
@ -871,6 +945,29 @@ NV_STATUS uvm_va_block_set_read_duplication(uvm_va_block_t *va_block,
NV_STATUS uvm_va_block_unset_read_duplication(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context);
// Check if processor_id is allowed to access the va_block with access_type
// permissions. Return values:
//
// NV_ERR_INVALID_ADDRESS The VA block is logically dead (zombie)
// NV_ERR_INVALID_ACCESS_TYPE The vma corresponding to the VA range does not
// allow access_type permissions, or migration is
// disallowed and processor_id cannot access the
// range remotely (UVM-Lite).
// NV_ERR_INVALID_OPERATION The access would violate the policies specified
// by UvmPreventMigrationRangeGroups.
//
// va_block_context must not be NULL, va_block_context->policy must be valid,
// and if the va_block is a HMM block, va_block_context->hmm.vma must be valid
// which also means the va_block_context->mm is not NULL, retained, and locked
// for at least read.
// Locking: the va_block lock must be held.
NV_STATUS uvm_va_block_check_logical_permissions(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
uvm_processor_id_t processor_id,
uvm_page_index_t page_index,
uvm_fault_type_t access_type,
bool allow_migration);
// API for access privilege revocation
//
// Revoke prot_to_revoke access permissions for the given processor.
@ -898,7 +995,7 @@ NV_STATUS uvm_va_block_unset_read_duplication(uvm_va_block_t *va_block,
// different pages because the earlier operation can cause a PTE split or merge
// which is assumed by the later operation.
//
// va_block_context must not be NULL.
// va_block_context must not be NULL. The va_block_context->policy is unused.
//
// If allocation-retry was required as part of the operation and was successful,
// NV_ERR_MORE_PROCESSING_REQUIRED is returned. In this case, the entries in the
@ -938,7 +1035,8 @@ NV_STATUS uvm_va_block_revoke_prot_mask(uvm_va_block_t *va_block,
// processor_id, which triggered the migration and should have already been
// mapped).
//
// va_block_context must not be NULL.
// va_block_context must not be NULL and va_block_context->policy must be valid.
// See the comments for uvm_va_block_check_policy_is_valid().
//
// This function acquires/waits for the va_block tracker and updates that
// tracker with any new work pushed.
@ -968,7 +1066,8 @@ NV_STATUS uvm_va_block_add_mappings_after_migration(uvm_va_block_t *va_block,
// Note that this can return NV_ERR_MORE_PROCESSING_REQUIRED just like
// uvm_va_block_map() indicating that the operation needs to be retried.
//
// va_block_context must not be NULL.
// va_block_context must not be NULL and va_block_context->policy must be valid.
// See the comments for uvm_va_block_check_policy_is_valid().
//
// LOCKING: The caller must hold the va block lock. If va_block_context->mm !=
// NULL, va_block_context->mm->mmap_lock must be held in at least read
@ -989,6 +1088,8 @@ NV_STATUS uvm_va_block_add_gpu_va_space(uvm_va_block_t *va_block, uvm_gpu_va_spa
// If mm != NULL, that mm is used for any CPU mappings which may be created as
// a result of this call. See uvm_va_block_context_t::mm for details.
//
// va_block_context must not be NULL. The va_block_context->policy is unused.
//
// LOCKING: The caller must hold the va_block lock. If block_context->mm is not
// NULL, the caller must hold mm->mmap_lock in at least read mode.
void uvm_va_block_remove_gpu_va_space(uvm_va_block_t *va_block,
@ -1057,10 +1158,7 @@ NV_STATUS uvm_va_block_split(uvm_va_block_t *existing_va_block,
// Exactly the same split semantics as uvm_va_block_split, including error
// handling except the existing_va_block block lock needs to be held and
// the new_va_block has to be preallocated.
//
// new_va_block's va_range is set to new_va_range before any reverse mapping is
// established to the new block, but the caller is responsible for inserting the
// new block into the range.
// Also note that the existing_va_block lock may be dropped and re-acquired.
NV_STATUS uvm_va_block_split_locked(uvm_va_block_t *existing_va_block,
NvU64 new_end,
uvm_va_block_t *new_va_block,
@ -1076,6 +1174,7 @@ NV_STATUS uvm_va_block_split_locked(uvm_va_block_t *existing_va_block,
// - va_space lock must be held in at least read mode
//
// service_context->block_context.mm is ignored and vma->vm_mm is used instead.
// service_context->block_context.policy is set by this function.
//
// Returns NV_ERR_INVALID_ACCESS_TYPE if a CPU mapping to fault_addr cannot be
// accessed, for example because it's within a range group which is non-
@ -1089,6 +1188,8 @@ NV_STATUS uvm_va_block_cpu_fault(uvm_va_block_t *va_block,
// (migrations, cache invalidates, etc.) in response to the given service block
// context
//
// service_context->block_context.policy is set by this function.
//
// Locking:
// - service_context->block_context.mm->mmap_lock must be held in at least
// read mode, if valid.
@ -1132,10 +1233,18 @@ static inline NvU64 uvm_va_block_cpu_page_address(uvm_va_block_t *block, uvm_pag
return block->start + PAGE_SIZE * page_index;
}
// Get the physical address on the given GPU for given residency
uvm_gpu_phys_address_t uvm_va_block_res_phys_page_address(uvm_va_block_t *va_block,
uvm_page_index_t page_index,
uvm_processor_id_t residency,
uvm_gpu_t *gpu);
// Get the page physical address on the given GPU
//
// This will assert that GPU state is indeed present.
uvm_gpu_phys_address_t uvm_va_block_gpu_phys_page_address(uvm_va_block_t *va_block, uvm_page_index_t page_index, uvm_gpu_t *gpu);
uvm_gpu_phys_address_t uvm_va_block_gpu_phys_page_address(uvm_va_block_t *va_block,
uvm_page_index_t page_index,
uvm_gpu_t *gpu);
static bool uvm_va_block_contains_address(uvm_va_block_t *block, NvU64 address)
{
@ -1191,26 +1300,28 @@ NV_STATUS uvm_va_block_find(uvm_va_space_t *va_space, NvU64 addr, uvm_va_block_t
// Same as uvm_va_block_find except that the block is created if not found.
// If addr is covered by a UVM_VA_RANGE_TYPE_MANAGED va_range, a managed block
// will be created. Otherwise, if addr is not covered by any va_range, mm is
// non-NULL, and HMM is enabled in the va_space, an HMM block will be created.
// In either case, if mm is non-NULL, it must be retained and locked in at
// least read mode. Return values:
// will be created. Otherwise, if addr is not covered by any va_range, HMM is
// enabled in the va_space, and va_block_context and va_block_context->mm are
// non-NULL, then a HMM block will be created and va_block_context->hmm.vma is
// set to the VMA covering 'addr'. The va_block_context->policy field is left
// unchanged.
// In either case, if va_block_context->mm is non-NULL, it must be retained and
// locked in at least read mode. Return values:
// NV_ERR_INVALID_ADDRESS addr is not a UVM_VA_RANGE_TYPE_MANAGED va_range nor
// a HMM enabled VMA.
// NV_ERR_NO_MEMORY memory could not be allocated.
NV_STATUS uvm_va_block_find_create(uvm_va_space_t *va_space,
struct mm_struct *mm,
NvU64 addr,
uvm_va_block_context_t *va_block_context,
uvm_va_block_t **out_block);
// Same as uvm_va_block_find_create except that only UVM managed va_blocks are
// Same as uvm_va_block_find_create except that only managed va_blocks are
// created if not already present in the VA range.
static NV_STATUS uvm_va_block_find_create_managed(uvm_va_space_t *va_space,
NvU64 addr,
uvm_va_block_t **out_block)
{
return uvm_va_block_find_create(va_space, NULL, addr, NULL, out_block);
return uvm_va_block_find_create(va_space, addr, NULL, out_block);
}
// Look up a chunk backing a specific address within the VA block. Returns NULL if none.
@ -1232,7 +1343,8 @@ typedef enum
// The caller needs to handle allocation-retry. va_block_retry can be NULL if
// the destination is the CPU.
//
// va_block_context must not be NULL.
// va_block_context must not be NULL and va_block_context->policy must be valid.
// See the comments for uvm_va_block_check_policy_is_valid().
//
// LOCKING: The caller must hold the va_block lock. If va_block_context->mm !=
// NULL, va_block_context->mm->mmap_lock must be held in at least
@ -1249,6 +1361,9 @@ NV_STATUS uvm_va_block_migrate_locked(uvm_va_block_t *va_block,
//
// The [dst, dst + size) range has to fit within a single PAGE_SIZE page.
//
// va_block_context must not be NULL. The caller is not required to set
// va_block_context->policy.
//
// The caller needs to support allocation-retry of page tables.
//
// LOCKING: The caller must hold the va_block lock
@ -1317,6 +1432,8 @@ void uvm_va_block_mark_cpu_dirty(uvm_va_block_t *va_block);
// successful, NV_ERR_MORE_PROCESSING_REQUIRED is returned. In this case the
// block's lock was unlocked and relocked.
//
// va_block_context must not be NULL. The va_block_context->policy is unused.
//
// LOCKING: The caller must hold the va_block lock.
NV_STATUS uvm_va_block_set_cancel(uvm_va_block_t *va_block, uvm_va_block_context_t *block_context, uvm_gpu_t *gpu);
@ -1396,6 +1513,26 @@ static uvm_va_block_region_t uvm_va_block_region_from_block(uvm_va_block_t *va_b
return uvm_va_block_region(0, uvm_va_block_num_cpu_pages(va_block));
}
// Create a block region from a va block and page mask. Note that the region
// covers the first through the last set bit and may have unset bits in between.
static uvm_va_block_region_t uvm_va_block_region_from_mask(uvm_va_block_t *va_block, const uvm_page_mask_t *page_mask)
{
uvm_va_block_region_t region;
uvm_page_index_t outer = uvm_va_block_num_cpu_pages(va_block);
region.first = find_first_bit(page_mask->bitmap, outer);
if (region.first >= outer) {
region = uvm_va_block_region(0, 0);
}
else {
// At least one bit is set so find_last_bit() should not return 'outer'.
region.outer = find_last_bit(page_mask->bitmap, outer) + 1;
UVM_ASSERT(region.outer <= outer);
}
return region;
}
static bool uvm_page_mask_test(const uvm_page_mask_t *mask, uvm_page_index_t page_index)
{
UVM_ASSERT(page_index < PAGES_PER_UVM_VA_BLOCK);
@ -1715,61 +1852,6 @@ static NvU64 uvm_reverse_map_end(const uvm_reverse_map_t *reverse_map)
#define for_each_va_block_page(page_index, va_block) \
for_each_va_block_page_in_region((page_index), uvm_va_block_region_from_block(va_block))
static void uvm_va_block_bitmap_tree_init_from_page_count(uvm_va_block_bitmap_tree_t *bitmap_tree, size_t page_count)
{
bitmap_tree->leaf_count = page_count;
bitmap_tree->level_count = ilog2(roundup_pow_of_two(page_count)) + 1;
uvm_page_mask_zero(&bitmap_tree->pages);
}
static void uvm_va_block_bitmap_tree_init(uvm_va_block_bitmap_tree_t *bitmap_tree, uvm_va_block_t *va_block)
{
size_t num_pages = uvm_va_block_num_cpu_pages(va_block);
uvm_va_block_bitmap_tree_init_from_page_count(bitmap_tree, num_pages);
}
static void uvm_va_block_bitmap_tree_iter_init(const uvm_va_block_bitmap_tree_t *bitmap_tree,
uvm_page_index_t page_index,
uvm_va_block_bitmap_tree_iter_t *iter)
{
UVM_ASSERT(bitmap_tree->level_count > 0);
UVM_ASSERT_MSG(page_index < bitmap_tree->leaf_count,
"%zd vs %zd",
(size_t)page_index,
(size_t)bitmap_tree->leaf_count);
iter->level_idx = bitmap_tree->level_count - 1;
iter->node_idx = page_index;
}
static uvm_va_block_region_t uvm_va_block_bitmap_tree_iter_get_range(const uvm_va_block_bitmap_tree_t *bitmap_tree,
const uvm_va_block_bitmap_tree_iter_t *iter)
{
NvU16 range_leaves = uvm_perf_tree_iter_leaf_range(bitmap_tree, iter);
NvU16 range_start = uvm_perf_tree_iter_leaf_range_start(bitmap_tree, iter);
uvm_va_block_region_t subregion = uvm_va_block_region(range_start, range_start + range_leaves);
UVM_ASSERT(iter->level_idx >= 0);
UVM_ASSERT(iter->level_idx < bitmap_tree->level_count);
return subregion;
}
static NvU16 uvm_va_block_bitmap_tree_iter_get_count(const uvm_va_block_bitmap_tree_t *bitmap_tree,
const uvm_va_block_bitmap_tree_iter_t *iter)
{
uvm_va_block_region_t subregion = uvm_va_block_bitmap_tree_iter_get_range(bitmap_tree, iter);
return uvm_page_mask_region_weight(&bitmap_tree->pages, subregion);
}
#define uvm_va_block_bitmap_tree_traverse_counters(counter,tree,page,iter) \
for (uvm_va_block_bitmap_tree_iter_init((tree), (page), (iter)), \
(counter) = uvm_va_block_bitmap_tree_iter_get_count((tree), (iter)); \
(iter)->level_idx >= 0; \
(counter) = --(iter)->level_idx < 0? 0: \
uvm_va_block_bitmap_tree_iter_get_count((tree), (iter)))
// Return the block region covered by the given chunk size. page_index must be
// any page within the block known to be covered by the chunk.
static uvm_va_block_region_t uvm_va_block_chunk_region(uvm_va_block_t *block,
@ -1898,6 +1980,12 @@ uvm_va_block_region_t uvm_va_block_big_page_region(uvm_va_block_t *va_block,
// returned.
uvm_va_block_region_t uvm_va_block_big_page_region_all(uvm_va_block_t *va_block, NvU32 big_page_size);
// Returns the largest sub-region region of 'region' which can fit big pages.
// If the region cannot fit any big pages, an invalid region (0, 0) is returned.
uvm_va_block_region_t uvm_va_block_big_page_region_subset(uvm_va_block_t *va_block,
uvm_va_block_region_t region,
NvU32 big_page_size);
// Returns the big page index (the bit index within
// uvm_va_block_gpu_state_t::big_ptes) corresponding to page_index. If
// page_index cannot be covered by a big PTE due to alignment or block size,
@ -1907,7 +1995,14 @@ size_t uvm_va_block_big_page_index(uvm_va_block_t *va_block, uvm_page_index_t pa
// Returns the new residency for a page that faulted or triggered access
// counter notifications. The read_duplicate output parameter indicates if the
// page meets the requirements to be read-duplicated
// va_block_context must not be NULL, va_block_context->policy must be valid,
// and if the va_block is a HMM block, va_block_context->hmm.vma must be valid
// which also means the va_block_context->mm is not NULL, retained, and locked
// for at least read. See the comments for uvm_va_block_check_policy_is_valid()
// and uvm_hmm_va_block_context_vma_is_valid() in uvm_hmm.h.
// Locking: the va_block lock must be held.
uvm_processor_id_t uvm_va_block_select_residency(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
uvm_page_index_t page_index,
uvm_processor_id_t processor_id,
NvU32 access_type_mask,

View File

@ -75,28 +75,6 @@ typedef struct
DECLARE_BITMAP(bitmap, PAGES_PER_UVM_VA_BLOCK);
} uvm_page_mask_t;
// Encapsulates a counter tree built on top of a page mask bitmap in
// which each leaf represents a page in the block. It contains
// leaf_count and level_count so that it can use some macros for
// perf trees
typedef struct
{
uvm_page_mask_t pages;
NvU16 leaf_count;
NvU8 level_count;
} uvm_va_block_bitmap_tree_t;
// Iterator for the bitmap tree. It contains level_idx and node_idx so
// that it can use some macros for perf trees
typedef struct
{
s8 level_idx;
uvm_page_index_t node_idx;
} uvm_va_block_bitmap_tree_iter_t;
// When updating GPU PTEs, this struct describes the new arrangement of PTE
// sizes. It is calculated before the operation is applied so we know which PTE
// sizes to allocate.
@ -127,11 +105,6 @@ typedef struct
// that region should be 4k, and that some of those 4k PTEs will be written
// by the operation.
DECLARE_BITMAP(big_ptes_covered, MAX_BIG_PAGES_PER_UVM_VA_BLOCK);
// These are the big PTE regions which will no longer have any valid
// mappings after the operation. Only the bits which are set in
// big_ptes_covered are valid.
DECLARE_BITMAP(big_ptes_fully_unmapped, MAX_BIG_PAGES_PER_UVM_VA_BLOCK);
} uvm_va_block_new_pte_state_t;
// Event that triggered the call to uvm_va_block_make_resident/
@ -269,7 +242,8 @@ typedef struct
typedef enum
{
UVM_VA_BLOCK_TRANSFER_MODE_MOVE = 1,
UVM_VA_BLOCK_TRANSFER_MODE_COPY = 2
UVM_VA_BLOCK_TRANSFER_MODE_COPY = 2,
UVM_VA_BLOCK_TRANSFER_MODE_COPY_ONLY = 3
} uvm_va_block_transfer_mode_t;
struct uvm_reverse_map_struct

View File

@ -49,8 +49,9 @@ uvm_va_policy_t *uvm_va_policy_get(uvm_va_block_t *va_block, NvU64 addr)
return node ? &node->policy : &uvm_va_policy_default;
}
else
else {
return uvm_va_range_get_policy(va_block->va_range);
}
}
#if UVM_IS_CONFIG_HMM()

View File

@ -50,7 +50,7 @@ typedef enum
//
// A policy covers one or more contiguous Linux VMAs or portion of a VMA and
// does not cover non-existant VMAs.
// The VA range is determined from either the uvm_va_range_t for UVM managed
// The VA range is determined from either the uvm_va_range_t for managed
// allocations or the uvm_va_policy_node_t for HMM allocations.
//
typedef struct uvm_va_policy_struct
@ -94,6 +94,12 @@ bool uvm_va_policy_is_read_duplicate(uvm_va_policy_t *policy, uvm_va_space_t *va
// Locking: The va_block lock must be held.
uvm_va_policy_t *uvm_va_policy_get(uvm_va_block_t *va_block, NvU64 addr);
// Return a uvm_va_policy_node_t given a uvm_va_policy_t pointer.
static uvm_va_policy_node_t *uvm_va_policy_node_from_policy(uvm_va_policy_t *policy)
{
return container_of(policy, uvm_va_policy_node_t, policy);
}
#if UVM_IS_CONFIG_HMM()
// Module load/exit
@ -239,6 +245,11 @@ static NV_STATUS uvm_va_policy_set_range(uvm_va_block_t *va_block,
return NV_OK;
}
static uvm_va_policy_node_t *uvm_va_policy_node_iter_first(uvm_va_block_t *va_block, NvU64 start, NvU64 end)
{
return NULL;
}
#endif // UVM_IS_CONFIG_HMM()
#endif // __UVM_VA_POLICY_H__

Some files were not shown because too many files have changed in this diff Show More